## Data flow to mysql

In [None]:
import mysql.connector as mdb
from mysql.connector import Error
import sys, traceback

In [None]:
def open_database(host, db_name, username, password):
    try:
        connection = mdb.connect(host=host,
                                 database=db_name,
                                 user=username,
                                 password=password)
        if connection.is_connected():
           db_Info = connection.get_server_info()
           print("Connected to MySQL database... MySQL Server version on ",db_Info)
           cursor = connection.cursor()
           cursor.execute("select database();")
           record = cursor.fetchone()
           print ("Your connected to - ", record)
           cursor.close()
    except Error as e :
        print ("Error while connecting to MySQL", e)
    return connection

In [None]:
def close_database(connection):
    #closing database connection.
    if(connection.is_connected()):
        connection.close()
        print("MySQL connection is closed")

In [4]:
conn = open_database('localhost', 'caselaw', 'root', 'H3rnandez!')

Connected to MySQL database... MySQL Server version on  8.0.15
Your connected to -  ('caselaw',)


In [5]:
query = """SELECT * FROM caselaw.case LIMIT 1;"""
cursor = conn.cursor(buffered=True)
cursor.execute(query)
records = cursor.fetchall()
for row in records:
    print(row)

(1, datetime.date(2000, 2, 3), None, None, 'NL', None, None, None, datetime.date(1900, 1, 1), '\n        \n          <rdf:list xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ecli="https://e-justice.europa.eu/ecli" xmlns:tr="http://tuchtrecht.overheid.nl/" xmlns:eu="http://publications.europa.eu/celex/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:bwb="bwb-dl" xmlns:cvdr="http://decentrale.regelgeving.overheid.nl/cvdr/" xmlns:psi="http://psi.rechtspraak.nl/" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">\n          <rdf:li>AB 2000, 272 met annotatie van J.H. van der Veen</rdf:li>\n          <rdf:li>FED 2000/514</rdf:li>\n        </rdf:list>\n      \n      \n    ', 'ECLI:NL:CBB:2000:AN6374', 1)


---
### Datasets

In [6]:
import pandas as pd
import numpy as np
import warnings
import re
warnings.filterwarnings('ignore')

In [7]:
!ls ../data_dump

case.csv                               cases_metadata.csv
case_opinion_from_advocate_general.csv countries.csv
caselaw.zip                            legal_intelligence_cases.csv
caselawDB.sql                          legislation_citations.csv
caselaw_citations.csv                  [34msample[m[m
caselaw_db.sql                         test.csv


In [8]:
df_case = pd.read_csv("../data_dump/case.csv").replace(np.nan, 'NULL', regex=True)
df_country = pd.read_csv("../data_dump/countries.csv").replace(np.nan, 'NULL', regex=True)
df_li_cases = pd.read_csv("../data_dump/legal_intelligence_cases.csv").replace(np.nan, 'NULL', regex=True).drop_duplicates(subset ="ecli")
df_case_opinion = pd.read_csv("../data_dump/case_opinion_from_advocate_general.csv").replace(np.nan, 'NULL', regex=True)
#df_case_citation = pd.read_csv("../data_dump/caselaw_citations.csv").replace(np.nan, 'NULL', regex=True)
df_case_citation = pd.read_csv("../data_dump/sample/caselaw_citations_ecli_sample_2.csv").replace(np.nan, 'NULL', regex=True)
#df_legislation_citation = pd.read_csv("../data_dump/legislation_citations.csv").replace(np.nan, 'NULL', regex=True)
df_legislation_citation = pd.read_csv("../data_dump/sample/legislation_citations_ecli_sample_2.csv").replace(np.nan, 'NULL', regex=True)

In [9]:
df_case_citation.head()

Unnamed: 0,source_ecli,source_paragraph,target_ecli,target_paragraph
0,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2004:AR7893,
1,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2005:AT2142,
2,ECLI:NL:GHARN:2000:160,,ECLI:NL:HR:2002:AD8776,
3,ECLI:NL:CRVB:2000:ZB8679,,ECLI:NL:PHR:2015:153,
4,ECLI:NL:CRVB:2000:ZB8679,,ECLI:NL:PHR:2015:151,


In [10]:
#change LI Ecli to not underscores
for i, li_case in df_li_cases.iterrows():
    #get the ecli number
    ecli = li_case.ecli
    #replace the underscores
    new_ecli = ecli.replace('_', ':')
    #save the new ecli in the dataframe
    df_li_cases.at[i, 'ecli'] = new_ecli

---
### Sample to run the procedure

In [11]:
df_sample = pd.read_csv("../data_dump/sample/ecli_sample_2.csv", header=None).replace(np.nan, 'NULL', regex=True)

In [12]:
#sample from file
sample = list(df_sample[0].values)

##0: case sample
df_case = df_case[df_case['case_id'].isin(sample)].reset_index(drop=True)

##1: case opinions sample (opinions come from the target ecli) #Construction opinions flag ecli
df_case_citation['is_opinion'] = [1 if re.search(r'\bPHR\b', ecli) else 0 for ecli in df_case_citation.target_ecli]
        # subset of case opinion
df_case_opinion = df_case_citation[df_case_citation['is_opinion'] == 1]\
    .merge(df_case_opinion, how='left', left_on='target_ecli', right_on='case_id')\
    .drop(columns=['source_paragraph','target_ecli','target_paragraph','is_opinion'])\
    .rename(columns={'source_ecli':'ecli', 'case_id':'ecli_opinion'})

##2: case citations sample
df_case_citation = df_case_citation[df_case_citation['is_opinion'] == 0].drop(columns='is_opinion')
df_case_citation = df_case_citation[df_case_citation['source_ecli'].isin(sample)].reset_index(drop=True)

##3: legislation citations sample
df_legislation_citation = df_legislation_citation[df_legislation_citation['source_ecli'].isin(sample)].reset_index(drop=True)

##4: li cases sample
df_li_cases = df_li_cases[df_li_cases['ecli'].isin(sample)].reset_index(drop=True)

print(len(df_case), len(df_legislation_citation), len(df_case_citation), len(df_case_opinion), len(df_li_cases))

5000 6303 1305 1069 1


In [13]:
df_case_citation.head()

Unnamed: 0,source_ecli,source_paragraph,target_ecli,target_paragraph
0,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2004:AR7893,
1,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2005:AT2142,
2,ECLI:NL:GHARN:2000:160,,ECLI:NL:HR:2002:AD8776,
3,ECLI:NL:CRVB:2000:ZB8679,,ECLI:NL:CRVB:2007:BA7165,
4,ECLI:NL:CRVB:2000:ZB8679,,ECLI:NL:CRVB:2014:1535,


---
### Utils

In [14]:
def to_tuples(df):
    return [tuple(x) for x in df.values]

In [15]:
def null_for_nones(df, columname):
    """df to change column name to make nones instead of nulls"""
    for i in range(len(df)):
        if df.loc[i,columname] == 'NULL':
            df.loc[i,columname] = None 

In [16]:
def clean_table_sql(table_name):
    try:
        cursor1 = conn.cursor(buffered=True)
        query1 = """DELETE FROM `{}`;""".format(table_name)
        cursor1.execute(query1)
        cursor2 = conn.cursor(buffered=True)
        query2 = """ALTER TABLE `{}` AUTO_INCREMENT = 1;""".format(table_name)
        cursor2.execute(query2)
        conn.commit()
    except Error as error :
        conn.rollback()
        print("Failed to delete MySQL table {}".format(error))
        traceback.print_exc(file=sys.stdout)

In [17]:
def get_parent_ids(table, column_table, df, column_df):    
    """DB table, DB column_table, df: pandas df to look at, column_df"""
    pid = []
    cursor = conn.cursor()
    for idx, data in enumerate(df[column_df]):
        if "'" in data:
            query = """SELECT id FROM `{}` WHERE {} = \"{}\" """.format(table, column_table, data)
        else:
            query = """SELECT id FROM `{}` WHERE {} = '{}' """.format(table, column_table, data)
        #print(idx, data)
        cursor.execute(query)
        records = cursor.fetchall()
        if(len(records) == 0):
            print('select did not find match')
        pid.append(records[0][0])
    cursor.close()
    return pid

---
### Tables Hierarchies

1. Court
2. Case
3. Case Opinion
4. LI Case
5. Subject
6. Case subject
7. Country
8. Case country
9. Case citation
10. Legislation citation
11. Case related decision


In [18]:
clean_table_sql('case_related_decision')
clean_table_sql('legislation_citation')
clean_table_sql('case_citation')
clean_table_sql('case_country')
clean_table_sql('country')
clean_table_sql('case_subject')
clean_table_sql('subject')
clean_table_sql('legal_intelligence_case')
clean_table_sql('case_opinion')
clean_table_sql('case')
clean_table_sql('court')

---
### Courts

In [19]:
court = pd.DataFrame()

In [20]:
courts_list = df_case.authority.unique()
courts_list = [i.replace('"','-') for i in courts_list]
court['name'] = courts_list

In [21]:
#dummies
court.loc[:,'type'] = 'NULL'
court.loc[:,'level'] = 'NULL'
court.loc[:,'country'] = 'NULL'
court.loc[:,'language'] = 'NULL' 
court.loc[:,'jurisdiction'] = 'NULL' 
court.loc[:,'law_area'] = 'NULL'
court.loc[:,'authority_level'] = 'NULL'

In [22]:
#court.loc[810,'name']#.head(2)

In [23]:
for col in court.columns:
    null_for_nones(court, col)

In [24]:
tuples = to_tuples(court)

In [25]:
#(\"%s\", %s, %s, %s, %s, %s, %s, %s)")
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `court`"
                "(name, type, level, country, language, jurisdiction, law_area, authority_level)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s)")
        #print(idx, query)
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('court added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

court added


In [26]:
len(court)

62

---
### Case

In [27]:
case = pd.DataFrame()
case_columns = ['date',
        'description',
        'language',
        'venue',
        'abstract',
        'procedure_type',
        'lodge_date',
        'link',
        'ecli']

In [28]:
temp = df_case[['date',
                'description',
                'language',
                'venue',
                'abstract',
                'procedure_type',
                'lodge_date',
                'alternative_sources',
                'case_id']]
temp.columns = case_columns
case = case.append(temp, ignore_index=True)

In [29]:
parents_ids = get_parent_ids('court', 'name', df_case, 'authority')

In [30]:
case['name'] = 'NULL'
case['court_id'] = parents_ids
case['date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in case['date']]
case['lodge_date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in case['lodge_date']]

In [31]:
case.head()

Unnamed: 0,date,description,language,venue,abstract,procedure_type,lodge_date,link,ecli,name,court_id
0,2000-02-03,,NL,,,,1900-01-01,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:CBB:2000:AN6374,,1
1,2000-02-24,,NL,,,,1900-01-01,,ECLI:NL:CBB:2000:ZG1779,,1
2,2000-02-23,,NL,,,,1900-01-01,,ECLI:NL:CBB:2000:ZG1781,,1
3,2000-01-20,,NL,,,,1900-01-01,,ECLI:NL:CRVB:2000:AI5518,,2
4,2000-02-04,,NL,,,,1900-01-01,,ECLI:NL:CRVB:2000:AI5482,,2


In [32]:
for col in case.columns:
    null_for_nones(case, col)

In [33]:
tuples = to_tuples(case)

In [34]:
try:  
    cursor = conn.cursor(buffered=True)
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case`"
                "(date, description, language, venue, abstract, procedure_type, lodge_date, link, ecli, name, court_id)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case added


---
### Case opinion advocate general

In [35]:
case_opinion = pd.DataFrame()
case_opinion_columns = ['date',
                        'case_number',
                        'description',
                        'language',
                        'country',
                        'venue',
                        'abstract',
                        'procedure_type',
                        'authority',
                        'ecli',
                       'ecli_opinion']

In [36]:
temp = df_case_opinion[['date',
                        'case_number',
                        'description',
                        'language',
                        'country',
                        'venue',
                        'abstract',
                        'procedure_type',
                        'authority',
                        'ecli',
                       'ecli_opinion']]
temp.columns = case_opinion_columns
case_opinion = case_opinion.append(temp, ignore_index=True)

In [37]:
case_opinion.description = [i[1:] if i[0] in ['\t','\n',' '] else i for i in case_opinion.description.apply(str)]
case_opinion.abstract = [i[1:] if i[0] in ['\t','\n',' '] else i for i in case_opinion.abstract.apply(str)]
case_opinion.abstract =[ i[:200] for i in case_opinion.abstract.apply(str)]

In [38]:
#to check
case_opinion = case_opinion[~case_opinion.ecli_opinion.isna()]\
    .drop_duplicates(subset='ecli', keep="first")\
    .reset_index(drop=True)

In [39]:
case_opinion.replace(np.nan,'NULL',regex = True, inplace=True)

In [40]:
case_opinion.date = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in case_opinion['date']]

In [41]:
for col in case_opinion.columns:
    null_for_nones(case_opinion, col)

In [42]:
tuples = to_tuples(case_opinion)

In [43]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_opinion`"
                "(date, case_number, description, language, country, venue, abstract, procedure_type, authority, ecli, ecli_opinion)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_opinion added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_opinion added


---
### Legal Intelligence Cases

In [44]:
legal_intelligence_case = pd.DataFrame()
legal_intelligence_case_columns = ['ecli',
                                   'name',
                                   'date',
                                   'abstract',
                                   'subject',
                                   'link',
                                   'DisplayTitle',
                                   'OriginalUrl',
                                   'Jurisdiction',
                                   'DocumentType',
                                   'CaseNumber',
                                   'PublicationNumber',
                                   'IssueNumber',
                                   'lodge_date',
                                   'DateAdded',
                                   'Sources',
                                   'UrlWithAutoLogOnToken',
                                   'court',
                                   'DisplaySubtitle']

In [45]:
temp = df_li_cases[['ecli',
                    'Title',
                    'date',
                    'abstract',
                    'LawArea',
                    'Url',
                    'DisplayTitle',
                    'OriginalUrl',
                    'Jurisdiction',
                    'DocumentType',
                    'case_number',
                    'PublicationNumber',
                    'IssueNumber',
                    'lodge_date',
                    'DateAdded',
                    'Sources',
                    'UrlWithAutoLogOnToken',
                    'authority',
                    'DisplaySubtitle']]
temp.columns = legal_intelligence_case_columns
legal_intelligence_case = legal_intelligence_case.append(temp, ignore_index=True)

In [46]:
legal_intelligence_case.head(2)

Unnamed: 0,ecli,name,date,abstract,subject,link,DisplayTitle,OriginalUrl,Jurisdiction,DocumentType,CaseNumber,PublicationNumber,IssueNumber,lodge_date,DateAdded,Sources,UrlWithAutoLogOnToken,court,DisplaySubtitle
0,ECLI:NL:RVS:2000:AA6821,"Module Ruimtelijke ordening 2000/4001: ABRvS, ...",20000101,Afdeling bestuursrechtspraak van de Raad van S...,['Ruimtelijk Bestuursrecht/Milieurecht/Energie...,http://www.legalintelligence.com/documents/497...,"Module Ruimtelijke ordening 2000/4001: ABRvS, ...",https://www.navigator.nl/#/document/dae45509e5...,Nederland,Rechtspraak,199902237/01,,,20000511,20110628,Module Ruimtelijke Ordening,http://www.legalintelligence.com/documents/497...,Raad van State,Module-serie - Module Ruimtelijke Ordening


In [47]:
#legal_intelligence_case['name'] = [i[0:250] for i in legal_intelligence_case['name']]
#legal_intelligence_case['DisplayTitle'] = [i[0:250] for i in legal_intelligence_case['DisplayTitle']]
legal_intelligence_case['date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in df_li_cases['date']]
legal_intelligence_case['lodge_date'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in df_li_cases['lodge_date']]
legal_intelligence_case['DateAdded'] = [pd.to_datetime(i, errors='coerce') if i != 'NULL' else pd.to_datetime('1900-01-01 00:00:00') for i in df_li_cases['DateAdded']]

In [48]:
for col in legal_intelligence_case.columns:
    null_for_nones(legal_intelligence_case, col)

In [49]:
tuples = to_tuples(legal_intelligence_case)

In [50]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `legal_intelligence_case`"
                "(ecli, name, date, abstract, subject, link, DisplayTitle, OriginalUrl, Jurisdiction, DocumentType, CaseNumber, PublicationNumber, IssueNumber, lodge_date, DateAdded, Sources, UrlWithAutoLogOnToken, court, DisplaySubtitle)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legal intelligence added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

legal intelligence added


---
### Subjects

In [51]:
subjects_as_list = [list(row.split("; ")) for row in df_case.subject]
unique_subjects = \
    set(list(
        pd.core.common\
            .flatten(subjects_as_list)))

In [52]:
subject = pd.DataFrame()
subject['name'] = list(sorted(unique_subjects))
subject.loc[subject.name=='NULL'] = 'Niet Gespecificeerd'
subject.loc[:,'standard_name'] = 'NULL'

In [53]:
subject.head(2)

Unnamed: 0,name,standard_name
0,Ambtenarenrecht,
1,Belastingrecht,


In [54]:
#clean_table_sql('subject')

In [55]:
for col in subject.columns:
    null_for_nones(subject, col)

In [56]:
tuples = to_tuples(subject)

In [57]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `subject`"
                "(name, standard_name)"
                "VALUES (%s, %s)")
        #print(idx)
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('subject added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

subject added


---
### Case - Subject

In [58]:
df_subjects_case = df_case[['subject','case_id']]
df_subjects_case.loc[:,'subject'] = subjects_as_list
df_subjects_case = df_subjects_case.explode('subject')
len(df_subjects_case)

6658

In [59]:
df_subjects_case.head(2)

Unnamed: 0,subject,case_id
0,,ECLI:NL:CBB:2000:AN6374
1,Bestuursrecht,ECLI:NL:CBB:2000:ZG1779


In [60]:
df_subjects_case.subject = ['Niet Gespecificeerd' if i == 'NULL' else i for i in df_subjects_case.subject]

In [61]:
parents_ids_subjects = get_parent_ids('subject', 'name', df_subjects_case, 'subject')
len(parents_ids_subjects)

6658

In [62]:
parents_ids_cases = get_parent_ids('case', 'ecli', df_subjects_case, 'case_id')
len(parents_ids_cases)

6658

In [63]:
case_subject = pd.DataFrame({'case_id':parents_ids_cases,
                             'subject_id':parents_ids_subjects})

In [64]:
#case_subject.sort_values(by='case_id').tail(50) #quality check
#[(type(i), type(j)) for i,j in zip(case_subject.subject_id,case_subject.case_id)]

In [65]:
case_subject.tail(3)

Unnamed: 0,case_id,subject_id
6655,4998,12
6656,4999,11
6657,5000,3


In [66]:
for col in case_subject.columns:
    null_for_nones(case_subject, col)

In [67]:
tuples = to_tuples(case_subject)
tuples = [(int(i), int(j)) for i,j in tuples]

In [68]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_subject`"
                "(case_id, subject_id)"
                "VALUES (%s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_subject added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_subject added


---
### Countries

In [69]:
df_country.head()

Unnamed: 0,country_id,eu,name,flag
0,AT,1,Austria,https://www.countryflags.io/at/flat/64.png
1,BE,1,Belgium,https://www.countryflags.io/be/flat/64.png
2,BG,1,Bulgaria,https://www.countryflags.io/bg/flat/64.png
3,CH,0,Switzerland,https://www.countryflags.io/ch/flat/64.png
4,CZ,1,Czechia,https://www.countryflags.io/cz/flat/64.png


In [70]:
df_country.loc[:,'language'] = 'NULL'
df_country.loc[:,'eea'] = 0

In [71]:
country = pd.DataFrame()
country_columns = ['id',
                   'name',
                   'language',
                   'flag',
                   'eu',
                   'eea']

In [72]:
temp = df_country[['country_id',
                   'name',
                   'language',
                   'flag',
                   'eu',
                   'eea']]
temp.columns = country_columns
country = country.append(temp, ignore_index=True)

In [73]:
country.head(3)

Unnamed: 0,id,name,language,flag,eu,eea
0,AT,Austria,,https://www.countryflags.io/at/flat/64.png,1,0
1,BE,Belgium,,https://www.countryflags.io/be/flat/64.png,1,0
2,BG,Bulgaria,,https://www.countryflags.io/bg/flat/64.png,1,0


In [74]:
for col in country.columns:
    null_for_nones(country, col)

In [75]:
tuples = to_tuples(country)

In [76]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `country`"
                "(id, name, language, flag, eu, eea)"
                "VALUES (%s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('country added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

country added


---
### Case - Country

In [77]:
df_case.head(2)

Unnamed: 0,case_id,date,case_number,description,language,venue,abstract,procedure_type,lodge_date,country,...,abstract.1,procedure_type.1,lodge_date.1,country.1,subject.1,authority.1,legal_references.1,related_cases.1,alternative_sources.1,full_text
0,ECLI:NL:CBB:2000:AN6374,2000-02-03,AWB97/1593,,NL,,,,,NL,...,,,,,,,,,,
1,ECLI:NL:CBB:2000:ZG1779,2000-02-24,98/1344;5130,,NL,,,,,NL,...,,,,,,,,,,


In [78]:
#as_list = [list(row.split("; ")) for row in df_case.country] #in case there is more than one country in the row

In [79]:
df_country_case = df_case[['case_id','country']]
#df_country_case.loc[:,'country'] = as_list
df_country_case = df_country_case.explode('country')
len(df_country_case)

5000

In [80]:
parents_ids_countries = get_parent_ids('country', 'id', df_country_case, 'country')
len(parents_ids_countries)

5000

In [81]:
parents_ids_cases = get_parent_ids('case', 'ecli', df_country_case, 'case_id')
len(parents_ids_cases)

5000

In [82]:
case_country = pd.DataFrame({'case_id':parents_ids_cases,
                             'country_id':parents_ids_countries})

In [83]:
case_country.tail(3)

Unnamed: 0,case_id,country_id
4997,4998,NL
4998,4999,NL
4999,5000,NL


In [84]:
for col in case_country.columns:
    null_for_nones(case_country, col)

In [85]:
tuples = to_tuples(case_country)
#tuples = [(int(i), int(j)) for i,j in tuples]

In [86]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_country`"
                "(case_id, country_id)"
                "VALUES (%s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_subject added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_subject added


---
### Case law citation

In [87]:
df_case_citation.head(4)

Unnamed: 0,source_ecli,source_paragraph,target_ecli,target_paragraph
0,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2004:AR7893,
1,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2005:AT2142,
2,ECLI:NL:GHARN:2000:160,,ECLI:NL:HR:2002:AD8776,
3,ECLI:NL:CRVB:2000:ZB8679,,ECLI:NL:CRVB:2007:BA7165,


In [88]:
parent_ids = get_parent_ids('case', 'ecli', df_case_citation, 'source_ecli')
df_case_citation['case_id'] = parent_ids

In [89]:
case_citation = pd.DataFrame()
case_citation_columns = ['source_ecli',
                         'source_paragraph',
                         'target_ecli',
                         'target_paragraph',
                        'case_id']

In [90]:
temp = df_case_citation[['source_ecli',
                         'source_paragraph',
                         'target_ecli',
                         'target_paragraph',
                         'case_id']]
temp.columns = case_citation_columns
case_citation = case_citation.append(temp, ignore_index=True)

In [91]:
case_citation.head(3)

Unnamed: 0,source_ecli,source_paragraph,target_ecli,target_paragraph,case_id
0,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2004:AR7893,,6
1,ECLI:NL:CRVB:2000:ZB8616,,ECLI:NL:CRVB:2005:AT2142,,6
2,ECLI:NL:GHARN:2000:160,,ECLI:NL:HR:2002:AD8776,,12


In [92]:
for col in case_citation.columns:
    null_for_nones(case_citation, col)

In [93]:
tuples = to_tuples(case_citation)

In [94]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_citation`"
                "(source_ecli, source_paragraph, target_ecli, target_paragraph, case_id)"
                "VALUES (%s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_citation added


---
### Legislation citation

In [95]:
df_legislation_citation.head(4)

Unnamed: 0,source_ecli,source_paragraph,target_article,target_article_paragraph,target_article_webpage,article_name
0,ECLI:NL:CRVB:2000:AI5482,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0006169/2006-...,"Wet voorzieningen gehandicapten, Artikel 2"
1,ECLI:NL:CRVB:2000:AI5482,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0006169/2006-...,Wet voorzieningen gehandicapten
2,ECLI:NL:CRVB:2000:ZB8679,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0002221/2010-...,"Algemene Ouderdomswet, Artikel 3"
3,ECLI:NL:CRVB:2000:ZB8679,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,http://wetten.overheid.nl/id/BWBR0002221/2013-...,"Algemene Ouderdomswet, Artikel 6"


In [96]:
parent_ids = get_parent_ids('case', 'ecli', df_legislation_citation, 'source_ecli')
df_legislation_citation['case_id'] = parent_ids

In [97]:
df_legislation_citation.loc[:,'target_name'] = 'NULL'
df_legislation_citation.loc[:,'target_sourcename'] = 'NULL'

In [98]:
legislation_citation = pd.DataFrame()
legislation_citation_columns = ['source_ecli',
                                'source_paragraph',
                                'target_id',
                                'target_paragraph',
                                'target_name',
                                'target_sourcename',
                                'target_link',
                                'article_name',
                                'case_id']

In [99]:
temp = df_legislation_citation[['source_ecli',
                                'source_paragraph',
                                'target_article',
                                'target_article_paragraph',
                                'target_name',
                                'target_sourcename',
                                'target_article_webpage',
                                'article_name',
                                'case_id']]
temp.columns = legislation_citation_columns
legislation_citation = legislation_citation.append(temp, ignore_index=True)

In [100]:
legislation_citation.head(3)

Unnamed: 0,source_ecli,source_paragraph,target_id,target_paragraph,target_name,target_sourcename,target_link,article_name,case_id
0,ECLI:NL:CRVB:2000:AI5482,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,,,http://wetten.overheid.nl/id/BWBR0006169/2006-...,"Wet voorzieningen gehandicapten, Artikel 2",5
1,ECLI:NL:CRVB:2000:AI5482,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,,,http://wetten.overheid.nl/id/BWBR0006169/2006-...,Wet voorzieningen gehandicapten,5
2,ECLI:NL:CRVB:2000:ZB8679,,http://linkeddata.overheid.nl/terms/bwb/id/BWB...,,,,http://wetten.overheid.nl/id/BWBR0002221/2010-...,"Algemene Ouderdomswet, Artikel 3",13


In [101]:
for col in legislation_citation.columns:
    null_for_nones(legislation_citation, col)

In [102]:
tuples = to_tuples(legislation_citation)

In [103]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `legislation_citation`"
                "(source_ecli, source_paragraph, target_id, target_paragraph, target_name, target_sourcename, target_link, article_name, case_id)"
                "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legislation_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

legislation_citation added


---
### Case related decision

In [104]:
df_case_related = df_case[['case_id','related_cases']]\
    .rename(columns = {'case_id':'source_ecli', 
                       'related_cases': 'referencing_case_ecli'})

In [105]:
parent_ids = get_parent_ids('case', 'ecli', df_case_related, 'source_ecli')
df_case_related['case_id'] = parent_ids

In [106]:
df_case_related.loc[:,'referenced_case_ecli'] = 'NULL'

In [107]:
df_case_related.head(2)

Unnamed: 0,source_ecli,referencing_case_ecli,case_id,referenced_case_ecli
0,ECLI:NL:CBB:2000:AN6374,,1,
1,ECLI:NL:CBB:2000:ZG1779,,2,


In [108]:
case_related_decision = pd.DataFrame()
case_related_decision_columns = ['source_ecli',
                                 'referencing_case_ecli',
                                'referenced_case_ecli',
                                'case_id']

In [109]:
temp = df_case_related[['source_ecli',
                                'referencing_case_ecli',
                                'referenced_case_ecli',
                                'case_id']]
temp.columns = case_related_decision_columns
case_related_decision = case_related_decision.append(temp, ignore_index=True)

In [110]:
case_related_decision.head(3)

Unnamed: 0,source_ecli,referencing_case_ecli,referenced_case_ecli,case_id
0,ECLI:NL:CBB:2000:AN6374,,,1
1,ECLI:NL:CBB:2000:ZG1779,,,2
2,ECLI:NL:CBB:2000:ZG1781,,,3


In [111]:
for col in case_related_decision.columns:
    null_for_nones(case_related_decision, col)

In [112]:
tuples = to_tuples(case_related_decision)

In [113]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        query = ("INSERT INTO `case_related_decision`"
                "(source_ecli, referencing_case_ecli, referenced_case_ecli, case_id)"
                "VALUES (%s, %s, %s, %s)")
        cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_related_decision added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_related_decision added
