## Data flow to mysql

In [26]:
import mysql.connector as mdb
from mysql.connector import Error
import sys, traceback

In [2]:
def open_database(host, db_name, username, password):
    try:
        connection = mdb.connect(host=host,
                                 database=db_name,
                                 user=username,
                                 password=password)
        if connection.is_connected():
           db_Info = connection.get_server_info()
           print("Connected to MySQL database... MySQL Server version on ",db_Info)
           cursor = connection.cursor()
           cursor.execute("select database();")
           record = cursor.fetchone()
           print ("Your connected to - ", record)
           cursor.close()
    except Error as e :
        print ("Error while connecting to MySQL", e)
    return connection

In [3]:
def close_database(connection):
    #closing database connection.
    if(connection.is_connected()):
        connection.close()
        print("MySQL connection is closed")

In [4]:
conn = open_database('localhost', 'caselaw', 'root', 'H3rnandez!')

Connected to MySQL database... MySQL Server version on  8.0.15
Your connected to -  ('caselaw',)


In [5]:
query = """SELECT * FROM caselaw.case LIMIT 10;"""
cursor = conn.cursor()
print(cursor.execute(query))

None


---
### Datasets

In [6]:
import pandas as pd
import numpy as np

In [7]:
!ls ../data_dump

case.csv                               cases_metadata.csv
case_opinion_from_advocate_general.csv countries.csv
caselawDB.sql                          legal_intelligence_cases.csv
caselaw_citations.csv                  legislation_citations.csv


In [8]:
df_case = pd.read_csv("../data_dump/case.csv").replace(np.nan, 'NULL', regex=True)
df_country = pd.read_csv("../data_dump/countries.csv").replace(np.nan, 'NULL', regex=True)
df_li_cases = pd.read_csv("../data_dump/legal_intelligence_cases.csv").replace(np.nan, 'NULL', regex=True)
df_case_opinion = pd.read_csv("../data_dump/case_opinion_from_advocate_general.csv").replace(np.nan, 'NULL', regex=True)
df_case_citation = pd.read_csv("../data_dump/caselaw_citations.csv").replace(np.nan, 'NULL', regex=True)
df_legislation_citation = pd.read_csv("../data_dump/legislation_citations.csv").replace(np.nan, 'NULL', regex=True)

  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)


---
### Tables Hierarchies

1. Court
2. Case
3. Case Opinion
4. LI Case
5. Subject
6. Case subject
7. Country
8. Case country
9. Case citation
10. Legislation citation
11. Case related decision


In [9]:
case_opinion = pd.DataFrame()
case_opinion_columns = ['id',
                'date',
                'case_number',
                'description',
                'language',
                'country',
                'venue',
                'abstract',
                'procedure_type',
                'authority',
                'ecli']

In [10]:
legal_intelligence_case = pd.DataFrame()
legal_intelligence_case_columns = ['id',
           'ecli',
           'name',
           'date',
           'abstract',
           'subject',
           'link',
           'DisplayTitle',
           'OriginalUrl',
           'Jurisidiction',
           'DocumentType',
           'CaseNumber',
           'PublicationNumber',
           'IssueNumber',
           'lodge_date',
           'DateAdded',
           'Sources',
           'UrlWithAutoLogOnToken',
           'court',
           'DisplaySubtitle']

In [11]:
subject = pd.DataFrame()
subject_columns = ['id',
           'name',
           'standard_name']

In [12]:
case_subject = pd.DataFrame()
case_subject_columns = ['case_id',
                'subject_id']

In [13]:
country = pd.DataFrame()
country_columns = ['id',
           'name',
           'language',
           'flag',
           'eu',
           'eea']

In [14]:
case_country = pd.DataFrame()
case_country_columns = ['case_id',
                'country_id']

In [15]:
case_citation = pd.DataFrame()
case_citation_columns = ['id',
                 'source_ecli',
                 'source_paragraph',
                 'target_ecli',
                 'target_paragraph']

In [16]:
legislation_citation = pd.DataFrame()
legislation_citation_columns = ['id',
                        'source_paragraph',
                        'target_id',
                        'target_paragraph',
                        'target_name',
                        'target_sourcename',
                        'target_link',
                        'case_id']

In [17]:
case_related_decision = pd.DataFrame()
case_related_decision_columns = ['id',
                'referencing_case_ecli',
                'referenced_case_ecli',
                'case_id']

In [18]:
def to_tuples(df):
    return [tuple(x) for x in df.values]

In [27]:
def clean_table_sql(table_name):
    try:
        cursor = conn.cursor()
        query_delete = """delete from `{}`;""".format(table_name)
        cursor.execute(query_delete)
        query = """ALTER TABLE `{}` AUTO_INCREMENT = 1;""".format(table_name)
        cursor.execute(query)
        conn.commit()
        cursor.close()
    except Error as error :
        conn.rollback()
        print("Failed to insert into MySQL table {}".format(error))
        traceback.print_exc(file=sys.stdout)

---
### Courts

In [20]:
court = pd.DataFrame()

In [21]:
courts_list = df_case.authority.unique()
#court['id'] = range(1, len(courts_list)+1)
court['name'] = courts_list

In [22]:
#dummies
court.loc[:,'type'] = 'NULL'
court.loc[:,'level'] = 'NULL'
#court.loc[:,'country'] = 'NULL' #VARCHAR(2)
#court.loc[:,'language'] = 'NULL' #VARCHAR(2)
court.loc[:,'jurisdiction'] = 'NULL' 
court.loc[:,'law_area'] = 'NULL'
court.loc[:,'authority_level'] = 'NULL'

In [23]:
court.head(2)

Unnamed: 0,name,type,level,jurisdiction,law_area,authority_level
0,Gerechtshof Amsterdam,,,,,
1,Gerechtshof Arnhem,,,,,


In [24]:
tuples = to_tuples(court)

In [28]:
clean_table_sql('court')

In [29]:
try:  
    cursor = conn.cursor()
    for idx, data in enumerate(tuples):
        if "'" in data[0]:
            query = """
            INSERT INTO `court` (name, type, level, jurisdiction, law_area, authority_level)
            VALUES (\"%s\", %s, %s, %s, %s, %s)
            """%data
        else:
            query = """
            INSERT INTO `court` (name, type, level, jurisdiction, law_area, authority_level)
            VALUES ('%s', %s, %s, %s, %s, %s)
            """%data
        #print(idx, query)
        result  = cursor.execute(query)
    conn.commit()
    cursor.close()
    print('Court table created')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

Court table created


In [30]:
len(court)

1223

---
### Case

In [31]:
case = pd.DataFrame()
case_columns = ['date',
        'description',
        'language',
        'venue',
        'abstract',
        'procedure_type',
        'lodge_date',
        'link',
        'ecli']

In [32]:
temp = df_case[['date',
                'description',
                'language',
                'venue',
                'abstract',
                'procedure_type',
                'lodge_date',
                'alternative_sources',
                'case_id']]
temp.columns = case_columns
case = case.append(temp, ignore_index=True)

In [33]:
case.head()

Unnamed: 0,date,description,language,venue,abstract,procedure_type,lodge_date,link,ecli
0,1913-03-31,,NL,,,,,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:15
1,1913-02-28,,NL,,,,,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:28
2,1913-03-17,,NL,,,,,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:19
3,1913-02-28,,NL,,,,,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:4
4,1913-03-10,,NL,,,,,"\n \n <rdf:list xmlns:rdf=""htt...",ECLI:NL:GHAMS:1913:20


In [38]:
def get_parent_ids(df, columnname):    
    pid = []
    cursor = conn .cursor()
    for idx, data in enumerate(df[columnname]):
        if '"' in data:
            sql_select_Query = "SELECT id FROM court WHERE name = '{}'".format(data)
            datum = data
        else:
            sql_select_Query = "SELECT id FROM court WHERE name = \"{}\"".format(data)
            datum = data
        cursor.execute(sql_select_Query)
        records = cursor.fetchall()
         if(len(records) == 0):
            print('no records')
        #print(idx, records)
        #print(sql_select_Query)
        pid.append(records[0][0])
    cursor.close()
    return pid

In [None]:
parents_ids = get_parent_ids(df_case, 'authority')

In [42]:
len(parents_ids)

1223

In [43]:
len(case)

2632420

In [None]:
case['court_id'] = parents_ids

In [48]:
df_case.procedure_type

0          NULL
1          NULL
2          NULL
3          NULL
4          NULL
           ... 
2632415    NULL
2632416    NULL
2632417    NULL
2632418    NULL
2632419    NULL
Name: procedure_type, Length: 2632420, dtype: object

In [43]:
df_case['name'] = 'NULL'

In [None]:
df_case[[]]

In [42]:
case = pd.DataFrame()

In [None]:
case['name'] = 'NULL'

In [91]:
#pivot table
cases = pd.DataFrame()
#cases['case_id'] #incremental databse id INT NOT NULL AUTO_INCREMENT,
cases['case_ecli'] = cases_metadata.id #NOT NULL,`fk_related_decision` fk_case_opinion`
cases['date'] = pd.to_datetime(cases_metadata['date'])
#cases['name'] = 
cases['description'] = cases_metadata.description
cases['language'] = cases_metadata.language
cases['venue'] = cases_metadata.venue
cases['abstract'] = cases_metadata.abstract
cases['procedure_type'] = cases_metadata.procedure_type
#cases['lodge_date'] = cases_metadata.lodge_date
#cases['link'] = cases_metadata.alternative_sources
#cases['country_id'] #`fk_country`
#cases['subject_id'] #`fk_subject`
#cases['court_id'] #fk_authority`

In [92]:
tuples = to_tuples(cases)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `cases` (case_ecli, date, description, language, venue, abstract, procedure_type) 
        VALUES (%s, %s, %s, %s, %s, %s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('cases added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

cases added


---
### Countries

In [11]:
#catalogue
#countries = pd.DataFrame()
#countries['country_id'] #NOT NULL COMMENT 'Country ISO Code',
#countries['name'] #'English name',
#countries['language'] #'ISO code for language spoken in country',
#countries['flag'] #'Link to image file of flag',
#countries['eu'] #'Part of EU or not?',
#countries['population'] #'Current / latest population number',
#countries['eea'] #'not sure what this stands for exactly...'

In [104]:
#countries.head()

In [14]:
tuples = to_tuples(countries)
import traceback, sys
try:  
    cursor = conn.cursor(buffered=True)
    for data in tuples:
        query = """INSERT INTO `countries` (country_id, eu, name, flag, population, eea) 
        VALUES (%s, %s, %s, %s, %s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('countries added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

countries added


---
### Subjects

In [18]:
cases_metadata.subject.unique()[1:] #omit the null case

array(['Bestuursrecht', 'Bestuursrecht; Ambtenarenrecht',
       'Bestuursrecht; Socialezekerheidsrecht',
       'Bestuursrecht; Vreemdelingenrecht'], dtype=object)

In [23]:
#catalogue
subjects = pd.DataFrame()
subjects['name'] = cases_metadata.subject.unique()[1:] #omit the null case
#subjects['standard_name'] = cases_metadata.subject.unique()[1:]

In [105]:
#subjects.head()

In [24]:
tuples = to_tuples(subjects)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `subjects` (name) 
        VALUES (%s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('subjects added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

subjects added


In [106]:
#authorities.head()

In [36]:
tuples = to_tuples(authorities)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `authorities` (name, country) 
        VALUES (%s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('authorities added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

authorities added


---
### Case related decision

In [63]:
#one to one
case_related_decision = pd.DataFrame()
case_related_decision['case_ecli'] = cases_metadata.id #NOT NULL,
case_related_decision['referencing_case_id'] = cases_metadata.related_cases#NOT NULL,
#case_related_decision['referenced_case_id']

In [107]:
#case_related_decision.head()

In [65]:
tuples = to_tuples(case_related_decision)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `case_related_decision` (case_ecli, referencing_case_id) 
        VALUES (%s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_related_decision added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_related_decision added


---
### Case opinion advocate general

In [67]:
#one to one
case_opinion_from_advocate_general = pd.DataFrame()
case_opinion_from_advocate_general['case_ecli'] = cases_metadata.id #NOT NULL,
#case_opinion_from_advocate_general['date'] #
case_opinion_from_advocate_general['case_number'] = cases_metadata.case_number #
#case_opinion_from_advocate_general['description'] #
#case_opinion_from_advocate_general['language'] #
#case_opinion_from_advocate_general['country'] #
#case_opinion_from_advocate_general['venue'] #
#case_opinion_from_advocate_general['abstract'] #
#case_opinion_from_advocate_general['procedure_type'] #
#case_opinion_from_advocate_general['authority'] #

In [108]:
#case_opinion_from_advocate_general.head()

In [69]:
tuples = to_tuples(case_opinion_from_advocate_general)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `case_opinion_from_advocate_general` (case_ecli, case_number) 
        VALUES (%s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_opinion_from_advocate_general added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_opinion_from_advocate_general added


---
### Legislation citation

In [94]:
#one to many 
#legislation_citation = pd.DataFrame()
#legislation_citation['citation_id'] #INT NOT NULL,
#legislation_citation['source_ecli'] #NOT NULL,`fk_source_case_legislation`
#legislation_citation['source_paragraph']
#legislation_citation['target_id'] #NOT NULL,
#legislation_citation['target_paragraph']
#legislation_citation['target_name']
#legislation_citation['target_sourcename']
#legislation_citation['target_link']

In [95]:
legislation_citation.rename(columns = {'target_article':'target_id',
                                       'target_article_paragraph':'target_paragraph',
                                       'target_article_webpage':'target_link'}, inplace=True)

In [109]:
#legislation_citation.head()

In [97]:
tuples = to_tuples(legislation_citation)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """
                INSERT INTO legislation_citation (source_ecli, source_paragraph, target_id, target_paragraph, target_link)
                VALUES (%s, %s, %s, %s, %s)
                """
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legislation_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

legislation_citation added


---
### Case law citation

In [99]:
#one to many
#case_citation = pd.DataFrame()
#case_citation['citation_id'] #INT NOT NULL,
#case_citation['source_ecli'] #NOT NULL,`fk_source_case_cases`
#case_citation['source_paragraph'] #
#case_citation['target_ecli'] #NOT NULL,`fk_target_case_cases`
#case_citation['target_paragraph'] #

In [110]:
#case_citation.head()

In [102]:
tuples = to_tuples(case_citation)
import traceback, sys
try:  le
    cursor = conn.cursor()
    for data in tuples:
        query = """
                INSERT INTO case_citation (source_ecli, source_paragraph, target_ecli, target_paragraph)
                VALUES (%s, %s, %s, %s)
                """
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_citation added


### Legal Intelligence Cases

In [None]:
#li_cases['id'] = #do we create this one ourselves? 
#li_cases['ecli'] 
#li_cases['name'] 
#li_cases['date'] 
#li_cases['abstract']
#li_cases['subject']
#li_cases['link']
#li_cases['procedure_type']
#li_cases['court_id']
#li_cases['DisplayTitle']
#li_cases['OriginalUrl']
#li_cases['Jurisdiction']
#li_cases['DocumentType']
#li_cases['CaseNumber']
#li_cases['PublicationDate']
#li_cases['PublicationNumber']
#li_cases['IssueNumber']
#li_cases['DateAdded']
#li_cases['Sources']
#li_cases['UrlWithAutoLogOnToken']

In [None]:
tuples = to_tuples(li_cases)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """
                INSERT INTO legal_intelligence_case (id, ecli, name, date, abstract, subject, link, procedure_type, court_id, DisplayTitle, OriginalUrl, Jurisdiction, DocumentType, CaseNumber, PublicationDate, PublicationNumber, IssueNumber, DateAdded, Sources, UrlWithAutoLogOnToken)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legal_intelligence_case added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)