## Data flow to mysql

In [1]:
import mysql.connector as mdb
from mysql.connector import Error
import sys, traceback

In [2]:
def open_database(host, db_name, username, password):
    try:
        connection = mdb.connect(host=host,
                                 database=db_name,
                                 user=username,
                                 password=password)
        if connection.is_connected():
           db_Info = connection.get_server_info()
           print("Connected to MySQL database... MySQL Server version on ",db_Info)
           cursor = connection.cursor()
           cursor.execute("select database();")
           record = cursor.fetchone()
           print ("Your connected to - ", record)
           cursor.close()
    except Error as e :
        print ("Error while connecting to MySQL", e)
    return connection

In [3]:
def close_database(connection):
    #closing database connection.
    if(connection.is_connected()):
        connection.close()
        print("MySQL connection is closed")

In [4]:
conn = open_database('localhost', 'caselaw', 'root', 'H3rnandez!')

Connected to MySQL database... MySQL Server version on  8.0.15
Your connected to -  ('caselaw',)


In [5]:
query = """SELECT * FROM caselaw.case LIMIT 1;"""
cursor = conn.cursor(buffered=True)
cursor.execute(query)
records = cursor.fetchall()
for row in records:
    print(row)

---
### Datasets

In [6]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [7]:
!ls ../data_dump

case.csv                               cases_metadata.csv
case_opinion_from_advocate_general.csv countries.csv
caselaw.zip                            legal_intelligence_cases.csv
caselawDB.sql                          legislation_citations.csv
caselaw_citations.csv


In [8]:
df_case = pd.read_csv("../data_dump/case.csv").replace(np.nan, 'NULL', regex=True)
df_country = pd.read_csv("../data_dump/countries.csv").replace(np.nan, 'NULL', regex=True)
df_li_cases = pd.read_csv("../data_dump/legal_intelligence_cases.csv").replace(np.nan, 'NULL', regex=True).drop_duplicates(subset ="ecli")
df_case_opinion = pd.read_csv("../data_dump/case_opinion_from_advocate_general.csv").replace(np.nan, 'NULL', regex=True)
df_case_citation = pd.read_csv("../data_dump/caselaw_citations.csv").replace(np.nan, 'NULL', regex=True)
df_legislation_citation = pd.read_csv("../data_dump/legislation_citations.csv").replace(np.nan, 'NULL', regex=True)

In [9]:
#change LI Ecli to not underscores
for i, li_case in df_li_cases.iterrows():
    #get the ecli number
    ecli = li_case.ecli
    #replace the underscores
    new_ecli = ecli.replace('_', ':')
    #save the new ecli in the dataframe
    df_li_cases.at[i, 'ecli'] = new_ecli

---
### Sample to run the procedure

In [10]:
sample_size = 50000
#list of ecli numbers we choose for our sample
sample = list(df_case['case_id'].sample(n=sample_size, random_state=18))

#compare case opinion and cases on ecli cause it seems like there is no overlap 
#case_eclis = df_case['case_id'].values
#opinion_eclis = df_case_opinion['case_id'].values

##0: case sample
df_case = df_case[df_case['case_id'].isin(sample)]

##1: legislation citations sample
df_legislation_citation = df_legislation_citation[df_legislation_citation['source_ecli'].isin(sample)]

##2: case citations sample
df_case_citation = df_case_citation[df_case_citation['source_ecli'].isin(sample)]

##3: case opinions sample
df_case_opinion = df_case_opinion[df_case_opinion['case_id'].isin(sample)]

##4: li cases sample
df_li_cases = df_li_cases[df_li_cases['ecli'].isin(sample)]

print(len(df_case), len(df_legislation_citation), len(df_case_citation), len(df_case_opinion), len(df_li_cases))

50000 410 15645 0 10


---
### Utils

In [11]:
def to_tuples(df):
    return [tuple(x) for x in df.values]

In [12]:
def clean_table_sql(table_name):
    try:
        cursor1 = conn.cursor(buffered=True)
        query1 = """DELETE FROM `{}`;""".format(table_name)
        cursor1.execute(query1)
        cursor2 = conn.cursor(buffered=True)
        query2 = """ALTER TABLE `{}` AUTO_INCREMENT = 1;""".format(table_name)
        cursor2.execute(query2)
        conn.commit()
    except Error as error :
        conn.rollback()
        print("Failed to delete MySQL table {}".format(error))
        traceback.print_exc(file=sys.stdout)

In [13]:
def get_parent_ids(table, column_table, df, column_df):    
    """DB table, DB column_table, df: pandas df to look at, column_df"""
    pid = []
    cursor = conn.cursor()
    for idx, data in enumerate(df[column_df]):
        if "'" in data:
            query = """SELECT id FROM `{}` WHERE {} = \"{}\" """.format(table, column_table, data)
        else:
            query = """SELECT id FROM `{}` WHERE {} = '{}' """.format(table, column_table, data)
        #print(idx, data)
        cursor.execute(query)
        records = cursor.fetchall()
        if(len(records) == 0):
            print('select did not find match')
        pid.append(records[0][0])
    cursor.close()
    return pid

---
### Tables Hierarchies

1. Court
2. Case
3. Case Opinion
4. LI Case
5. Subject
6. Case subject
7. Country
8. Case country
9. Case citation
10. Legislation citation
11. Case related decision


In [14]:
clean_table_sql('case_related_decision')
clean_table_sql('legislation_citation')
clean_table_sql('case_citation')
clean_table_sql('case_country')
clean_table_sql('country')
clean_table_sql('case_subject')
clean_table_sql('subject')
clean_table_sql('legal_intelligence_case')
clean_table_sql('case_opinion')
clean_table_sql('case')
clean_table_sql('court')

---
### Courts

In [15]:
court = pd.DataFrame()

In [16]:
courts_list = df_case.authority.unique()
courts_list = [i.replace('"','-') for i in courts_list]
court['name'] = courts_list

In [17]:
#dummies
court.loc[:,'type'] = 'NULL'
court.loc[:,'level'] = 'NULL'
court.loc[:,'country'] = 'NULL'
court.loc[:,'language'] = 'NULL' 
court.loc[:,'jurisdiction'] = 'NULL' 
court.loc[:,'law_area'] = 'NULL'
court.loc[:,'authority_level'] = 'NULL'

In [19]:
#court.loc[810,'name']#.head(2)

In [20]:
tuples = to_tuples(court)

In [21]:
#(\"%s\", %s, %s, %s, %s, %s, %s, %s)")
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `court`"
                "(name, type, level, country, language, jurisdiction, law_area, authority_level)"
                "VALUES (\"%s\", %s, %s, %s, %s, %s, %s, %s)")%data
        #print(idx, query)
        cursor.execute(query)
    conn.commit()
    cursor.close()
    print('court added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

court added


In [22]:
len(court)

250

---
### Case

In [23]:
case = pd.DataFrame()
case_columns = ['date',
        'description',
        'language',
        'venue',
        'abstract',
        'procedure_type',
        'lodge_date',
        'link',
        'ecli']

In [24]:
temp = df_case[['date',
                'description',
                'language',
                'venue',
                'abstract',
                'procedure_type',
                'lodge_date',
                'alternative_sources',
                'case_id']]
temp.columns = case_columns
case = case.append(temp, ignore_index=True)

In [25]:
parents_ids = get_parent_ids('court', 'name', df_case, 'authority')

In [26]:
case['name'] = 'NULL'
case['court_id'] = parents_ids
case['date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in case['date']]
case['lodge_date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in case['lodge_date']]

In [27]:
case.head()

Unnamed: 0,date,description,language,venue,abstract,procedure_type,lodge_date,link,ecli,name,court_id
0,1913-01-10,,NL,,,,1900-01-01,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:11,,1
1,1913-02-21,,NL,,,,1900-01-01,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:HR:1913:36,,2
2,1913-03-07,,NL,,,,1900-01-01,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:HR:1913:204,,2
3,1913-02-05,,NL,,,,1900-01-01,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:RBZWO:1913:7,,3
4,1913-05-02,,NL,,,,1900-01-01,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:14,,1


In [28]:
tuples = to_tuples(case)

In [29]:
try:  
    cursor = conn.cursor(buffered=True)
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case`"
                "(date, description, language, venue, abstract, procedure_type, lodge_date, link, ecli, name, court_id)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case added


---
### Case opinion advocate general

In [30]:
case_opinion = pd.DataFrame()
case_opinion_columns = ['date',
                        'case_number',
                        'description',
                        'language',
                        'country',
                        'venue',
                        'abstract',
                        'procedure_type',
                        'authority',
                        'ecli']

In [31]:
temp = df_case_opinion[['date',
                        'case_number',
                        'description',
                        'language',
                        'country',
                        'venue',
                        'abstract',
                        'procedure_type',
                        'authority',
                        'case_id']]
temp.columns = case_opinion_columns
case_opinion = case_opinion.append(temp, ignore_index=True)

In [32]:
case_opinion['date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in case_opinion['date']]

In [33]:
case_opinion.head(2)

Unnamed: 0,date,case_number,description,language,country,venue,abstract,procedure_type,authority,ecli


In [34]:
tuples = to_tuples(case_opinion)

In [35]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_opinion`"
                "(date, case_number, description, language, country, venue, abstract, procedure_type, authority, ecli)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_opinion added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_opinion added


---
### Legal Intelligence Cases

In [36]:
legal_intelligence_case = pd.DataFrame()
legal_intelligence_case_columns = ['ecli',
                                   'name',
                                   'date',
                                   'abstract',
                                   'subject',
                                   'link',
                                   'DisplayTitle',
                                   'OriginalUrl',
                                   'Jurisidiction',
                                   'DocumentType',
                                   'CaseNumber',
                                   'PublicationNumber',
                                   'IssueNumber',
                                   'lodge_date',
                                   'DateAdded',
                                   'Sources',
                                   'UrlWithAutoLogOnToken',
                                   'court',
                                   'DisplaySubtitle']

In [37]:
temp = df_li_cases[['ecli',
                    'Title',
                    'date',
                    'abstract',
                    'LawArea',
                    'Url',
                    'DisplayTitle',
                    'OriginalUrl',
                    'Jurisdiction',
                    'DocumentType',
                    'case_number',
                    'PublicationNumber',
                    'IssueNumber',
                    'lodge_date',
                    'DateAdded',
                    'Sources',
                    'UrlWithAutoLogOnToken',
                    'authority',
                    'DisplaySubtitle']]
temp.columns = legal_intelligence_case_columns
legal_intelligence_case = legal_intelligence_case.append(temp, ignore_index=True)

In [38]:
legal_intelligence_case.head(2)

Unnamed: 0,ecli,name,date,abstract,subject,link,DisplayTitle,OriginalUrl,Jurisidiction,DocumentType,CaseNumber,PublicationNumber,IssueNumber,lodge_date,DateAdded,Sources,UrlWithAutoLogOnToken,court,DisplaySubtitle
0,ECLI:NL:RVS:1996:BL2445,Casus Onder meer een milieuvereniging komt in ...,19960101,Casus Onder meer een milieuvereniging komt in ...,['Ruimtelijk Bestuursrecht/Milieurecht/Energie...,http://www.legalintelligence.com/documents/496...,Casus Onder meer een milieuvereniging komt in ...,https://www.navigator.nl/#/document/da26a4d31f...,Nederland,Rechtspraak,E03941877,,,19960426,20110520,Milieurecht Totaal,http://www.legalintelligence.com/documents/496...,Raad van State,Milieurecht Totaal - Module-serie - Module Mil...
1,ECLI:NL:HR:1996:ZC2221,INVORDERING Ketenaansprakelijkheid. Aanneming ...,19960101,Aan Osman E (E) is over de jaren 1988 t/m 1990...,"['Belastingrecht', 'Onbekend']",http://www.legalintelligence.com/documents/519...,"V-N 1996/4960, 13 - Met noot - Redactie - Hoge...",https://www.navigator.nl/#/document/inod72bcbb...,Nederland,Rechtspraak,16137,"V-N 1996/4960, 13",VN/1996/000062,19961206,20110623,VN,http://www.legalintelligence.com/documents/519...,Hoge Raad,INVORDERING Ketenaansprakelijkheid. Aanneming ...


In [39]:
#legal_intelligence_case['name'] = [i[0:250] for i in legal_intelligence_case['name']]
#legal_intelligence_case['DisplayTitle'] = [i[0:250] for i in legal_intelligence_case['DisplayTitle']]
legal_intelligence_case['date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in df_li_cases['date']]
legal_intelligence_case['lodge_date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in df_li_cases['lodge_date']]
legal_intelligence_case['DateAdded'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in df_li_cases['DateAdded']]

In [40]:
tuples = to_tuples(legal_intelligence_case)

In [41]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `legal_intelligence_case`"
                "(ecli, name, date, abstract, subject, link, DisplayTitle, OriginalUrl, Jurisidiction, DocumentType, CaseNumber, PublicationNumber, IssueNumber, lodge_date, DateAdded, Sources, UrlWithAutoLogOnToken, court, DisplaySubtitle)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legal intelligence added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

legal intelligence added


---
### Subjects

In [42]:
subjects_as_list = [list(row.split("; ")) for row in df_case.subject]
unique_subjects = \
    set(list(
        pd.core.common\
            .flatten(subjects_as_list)))

In [43]:
subject = pd.DataFrame()
subject['name'] = list(sorted(unique_subjects))
subject.loc[:,'standard_name'] = 'NULL'

In [44]:
subject.head(2)

Unnamed: 0,name,standard_name
0,Aanbestedingsrecht,
1,Ambtenarenrecht,


In [45]:
#clean_table_sql('subject')

In [46]:
tuples = to_tuples(subject)

In [47]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `subject`"
                "(name, standard_name)"
                "VALUES (%s, %s)")
        #print(idx)
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('subject added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

subject added


---
### Case - Subject

In [48]:
df_subjects_case = df_case[['subject','case_id']]
df_subjects_case.loc[:,'subject'] = subjects_as_list
df_subjects_case = df_subjects_case.explode('subject')
len(df_subjects_case)

68130

In [49]:
parents_ids_subjects = get_parent_ids('subject', 'name', df_subjects_case, 'subject')
len(parents_ids_subjects)

68130

In [50]:
parents_ids_cases = get_parent_ids('case', 'ecli', df_subjects_case, 'case_id')
len(parents_ids_cases)

68130

In [51]:
case_subject = pd.DataFrame({'case_id':parents_ids_cases,
                             'subject_id':parents_ids_subjects})

In [52]:
#case_subject.sort_values(by='case_id').tail(50) #quality check
#[(type(i), type(j)) for i,j in zip(case_subject.subject_id,case_subject.case_id)]

In [53]:
case_subject.tail(3)

Unnamed: 0,case_id,subject_id
68127,49998,6
68128,49999,20
68129,50000,20


In [54]:
tuples = to_tuples(case_subject)
tuples = [(int(i), int(j)) for i,j in tuples]

In [55]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_subject`"
                "(case_id, subject_id)"
                "VALUES (%s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_subject added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_subject added


---
### Countries

In [56]:
df_country.head()

Unnamed: 0,country_id,eu,name,flag
0,AT,1,Austria,https://www.countryflags.io/at/flat/64.png
1,BE,1,Belgium,https://www.countryflags.io/be/flat/64.png
2,BG,1,Bulgaria,https://www.countryflags.io/bg/flat/64.png
3,CH,0,Switzerland,https://www.countryflags.io/ch/flat/64.png
4,CZ,1,Czechia,https://www.countryflags.io/cz/flat/64.png


In [57]:
df_country.loc[:,'language'] = 'NULL'
df_country.loc[:,'eea'] = 0

In [58]:
country = pd.DataFrame()
country_columns = ['id',
                   'name',
                   'language',
                   'flag',
                   'eu',
                   'eea']

In [59]:
temp = df_country[['country_id',
                   'name',
                   'language',
                   'flag',
                   'eu',
                   'eea']]
temp.columns = country_columns
country = country.append(temp, ignore_index=True)

In [60]:
country.head(3)

Unnamed: 0,id,name,language,flag,eu,eea
0,AT,Austria,,https://www.countryflags.io/at/flat/64.png,1,0
1,BE,Belgium,,https://www.countryflags.io/be/flat/64.png,1,0
2,BG,Bulgaria,,https://www.countryflags.io/bg/flat/64.png,1,0


In [61]:
tuples = to_tuples(country)

In [62]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `country`"
                "(id, name, language, flag, eu, eea)"
                "VALUES (%s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('country added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

country added


---
### Case - Country

In [63]:
df_case.head(2)

Unnamed: 0,case_id,date,case_number,description,language,venue,abstract,procedure_type,lodge_date,country,...,abstract.1,procedure_type.1,lodge_date.1,country.1,subject.1,authority.1,legal_references.1,related_cases.1,alternative_sources.1,full_text
85,ECLI:NL:GHAMS:1913:11,1913-01-10,Onbekend,,NL,,,,,NL,...,,,,,,,,,,
93,ECLI:NL:HR:1913:36,1913-02-21,Onbekend,,NL,,,,,NL,...,,,,,,,,,,


In [64]:
#as_list = [list(row.split("; ")) for row in df_case.country] #in case there is more than one country in the row

In [65]:
df_country_case = df_case[['case_id','country']]
#df_country_case.loc[:,'country'] = as_list
df_country_case = df_country_case.explode('country')
len(df_country_case)

50000

In [66]:
parents_ids_countries = get_parent_ids('country', 'id', df_country_case, 'country')
len(parents_ids_countries)

50000

In [67]:
parents_ids_cases = get_parent_ids('case', 'ecli', df_country_case, 'case_id')
len(parents_ids_cases)

50000

In [68]:
case_country = pd.DataFrame({'case_id':parents_ids_cases,
                             'country_id':parents_ids_countries})

In [69]:
case_country.tail(3)

Unnamed: 0,case_id,country_id
49997,49998,NL
49998,49999,NL
49999,50000,NL


In [70]:
tuples = to_tuples(case_country)
#tuples = [(int(i), int(j)) for i,j in tuples]

In [71]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_country`"
                "(case_id, country_id)"
                "VALUES (%s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_subject added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_subject added


---
### Case law citation

In [72]:
df_case_citation.head(4)

Unnamed: 0,source_ecli,source_paragraph,target_ecli,target_paragraph
39,ECLI:NL:GHAMS:1914:39,,ECLI:NL:HR:1913:129,
73,ECLI:NL:GHSGR:1914:10,,ECLI:NL:HR:1913:129,
95,ECLI:NL:GHSGR:1914:30,,ECLI:NL:HR:1913:129,
133,ECLI:NL:GHSHE:1914:27,,ECLI:NL:HR:1913:129,


In [73]:
parent_ids = get_parent_ids('case', 'ecli', df_case_citation, 'source_ecli')
df_case_citation['case_id'] = parent_ids

In [74]:
case_citation = pd.DataFrame()
case_citation_columns = ['source_ecli',
                         'source_paragraph',
                         'target_ecli',
                         'target_paragraph',
                        'case_id']

In [75]:
temp = df_case_citation[['source_ecli',
                         'source_paragraph',
                         'target_ecli',
                         'target_paragraph',
                         'case_id']]
temp.columns = case_citation_columns
case_citation = case_citation.append(temp, ignore_index=True)

In [76]:
case_citation.head(3)

Unnamed: 0,source_ecli,source_paragraph,target_ecli,target_paragraph,case_id
0,ECLI:NL:GHAMS:1914:39,,ECLI:NL:HR:1913:129,,24
1,ECLI:NL:GHSGR:1914:10,,ECLI:NL:HR:1913:129,,26
2,ECLI:NL:GHSGR:1914:30,,ECLI:NL:HR:1913:129,,16


In [77]:
tuples = to_tuples(case_citation)

In [78]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_citation`"
                "(source_ecli, source_paragraph, target_ecli, target_paragraph, case_id)"
                "VALUES (%s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_citation added


---
### Legislation citation

In [79]:
df_legislation_citation.head(4)

Unnamed: 0,source_ecli,source_paragraph,target_article,target_article_paragraph,target_article_webpage
21,ECLI:NL:GHAMS:1962:AX9852,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0005537/2016-...
22,ECLI:NL:GHAMS:1962:AX9852,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0005537/2009-...
23,ECLI:NL:GHAMS:1962:AX9852,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0005537/2018-...
153,ECLI:NL:GHARN:1962:24,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0005537/2016-...


In [80]:
parent_ids = get_parent_ids('case', 'ecli', df_legislation_citation, 'source_ecli')
df_legislation_citation['case_id'] = parent_ids

In [81]:
df_legislation_citation.loc[:,'target_name'] = 'NULL'
df_legislation_citation.loc[:,'target_sourcename'] = 'NULL'

In [82]:
legislation_citation = pd.DataFrame()
legislation_citation_columns = ['source_ecli',
                                'source_paragraph',
                                'target_id',
                                'target_paragraph',
                                'target_name',
                                'target_sourcename',
                                'target_link',
                                'case_id']

In [83]:
temp = df_legislation_citation[['source_ecli',
                                'source_paragraph',
                                'target_article',
                                'target_article_paragraph',
                                'target_name',
                                'target_sourcename',
                                'target_article_webpage',
                                'case_id']]
temp.columns = legislation_citation_columns
legislation_citation = legislation_citation.append(temp, ignore_index=True)

In [84]:
legislation_citation.head(3)

Unnamed: 0,source_ecli,source_paragraph,target_id,target_paragraph,target_name,target_sourcename,target_link,case_id
0,ECLI:NL:GHAMS:1962:AX9852,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,,,http://wetten.overheid.nl/id/BWBR0005537/2016-...,935
1,ECLI:NL:GHAMS:1962:AX9852,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,,,http://wetten.overheid.nl/id/BWBR0005537/2009-...,935
2,ECLI:NL:GHAMS:1962:AX9852,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,,,http://wetten.overheid.nl/id/BWBR0005537/2018-...,935


In [85]:
tuples = to_tuples(legislation_citation)

In [86]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `legislation_citation`"
                "(source_ecli, source_paragraph, target_id, target_paragraph, target_name, target_sourcename, target_link, case_id)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legislation_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

legislation_citation added


---
### Case related decision

In [87]:
df_case_related = df_case[['case_id','related_cases']]\
    .rename(columns = {'case_id':'source_ecli', 
                       'related_cases': 'referencing_case_ecli'})

In [88]:
parent_ids = get_parent_ids('case', 'ecli', df_case_related, 'source_ecli')
df_case_related['case_id'] = parent_ids

In [89]:
df_case_related.loc[:,'referenced_case_ecli'] = 'NULL'

In [90]:
df_case_related.head(2)

Unnamed: 0,source_ecli,referencing_case_ecli,case_id,referenced_case_ecli
85,ECLI:NL:GHAMS:1913:11,,1,
93,ECLI:NL:HR:1913:36,,2,


In [91]:
case_related_decision = pd.DataFrame()
case_related_decision_columns = ['source_ecli',
                                 'referencing_case_ecli',
                                'referenced_case_ecli',
                                'case_id']

In [92]:
temp = df_case_related[['source_ecli',
                                'referencing_case_ecli',
                                'referenced_case_ecli',
                                'case_id']]
temp.columns = case_related_decision_columns
case_related_decision = case_related_decision.append(temp, ignore_index=True)

In [93]:
case_related_decision.head(3)

Unnamed: 0,source_ecli,referencing_case_ecli,referenced_case_ecli,case_id
0,ECLI:NL:GHAMS:1913:11,,,1
1,ECLI:NL:HR:1913:36,,,2
2,ECLI:NL:HR:1913:204,,,3


In [94]:
tuples = to_tuples(case_related_decision)

In [95]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_related_decision`"
                "(source_ecli, referencing_case_ecli, referenced_case_ecli, case_id)"
                "VALUES (%s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_related_decision added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_related_decision added
