## Data flow to mysql

In [2]:
import mysql.connector as mdb
from mysql.connector import Error

In [3]:
def open_database(host, db_name, username, password):
    try:
        connection = mdb.connect(host=host,
                                 database=db_name,
                                 user=username,
                                 password=password)
        if connection.is_connected():
           db_Info = connection.get_server_info()
           print("Connected to MySQL database... MySQL Server version on ",db_Info)
           cursor = connection.cursor()
           cursor.execute("select database();")
           record = cursor.fetchone()
           print ("Your connected to - ", record)
           cursor.close()
    except Error as e :
        print ("Error while connecting to MySQL", e)
    return connection

In [4]:
def close_database(connection):
    #closing database connection.
    if(connection.is_connected()):
        connection.close()
        print("MySQL connection is closed")

In [5]:
conn = open_database('localhost', 'caselaw', 'root', 'H3rnandez!')

Error while connecting to MySQL 2003: Can't connect to MySQL server on 'localhost:3306' (61 Connection refused)


UnboundLocalError: local variable 'connection' referenced before assignment

In [6]:
query = """SELECT * FROM caselaw.countries LIMIT 10;"""
cursor = conn.cursor()
print(cursor.execute(query))

NameError: name 'conn' is not defined

---
### Datasets

In [7]:
import pandas as pd
import numpy as np

In [8]:
cases_metadata = pd.read_csv("../data/cases_metadata.csv").replace(np.nan, 'NULL', regex=True)
case_citation = pd.read_csv("../data/caselaw_citations.csv").replace(np.nan, 'NULL', regex=True)
legislation_citation = pd.read_csv("../data/legislation_citations.csv").replace(np.nan, 'NULL', regex=True)
countries = pd.read_csv("../data/countries.csv").replace(np.nan, 'NULL', regex=True)

FileNotFoundError: [Errno 2] File ../data/cases_metadata.csv does not exist: '../data/cases_metadata.csv'

In [103]:
#read the Legal Intelligence case table 
#double check what name we gave the file
legal_intel_cases = pd.read_csv("../data/li_cases.csv").replace(np.nan, 'NULL', regex=True)

In [9]:
def to_tuples(df):
    return [tuple(x) for x in df.values]

---
### Countries

In [11]:
#catalogue
#countries = pd.DataFrame()
#countries['country_id'] #NOT NULL COMMENT 'Country ISO Code',
#countries['name'] #'English name',
#countries['language'] #'ISO code for language spoken in country',
#countries['flag'] #'Link to image file of flag',
#countries['eu'] #'Part of EU or not?',
#countries['population'] #'Current / latest population number',
#countries['eea'] #'not sure what this stands for exactly...'

In [104]:
#countries.head()

### Question from Marion
We are now reading a csv file called 'countries' where each row in the file corresponds to a country, and contains information like 'flag' and 'population'. These info cannot be found where reading cases or their metadata, we have to add them manually right? Do we then assume that before dumping this dataframe into the sql database, we are manually filling in the entries that were missing about the countries?

In [14]:
tuples = to_tuples(countries)
import traceback, sys
try:  
    cursor = conn.cursor(buffered=True)
    for data in tuples:
        query = """INSERT INTO `countries` (country_id, eu, name, flag, population, eea) 
        VALUES (%s, %s, %s, %s, %s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('countries added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

countries added


---
### Subjects

In [18]:
cases_metadata.subject.unique()[1:] #omit the null case

array(['Bestuursrecht', 'Bestuursrecht; Ambtenarenrecht',
       'Bestuursrecht; Socialezekerheidsrecht',
       'Bestuursrecht; Vreemdelingenrecht'], dtype=object)

In [23]:
#catalogue
subjects = pd.DataFrame()
subjects['name'] = cases_metadata.subject.unique()[1:] #omit the null case
#subjects['standard_name'] = cases_metadata.subject.unique()[1:]

In [105]:
#subjects.head()

In [24]:
tuples = to_tuples(subjects)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `subjects` (name) 
        VALUES (%s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('subjects added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

subjects added


---
### Courts

In [34]:
#catalogue
courts = pd.DataFrame()
courts['name'] = cases_metadata.authority.unique()
#courts['type'] #
#courts['level'] #
courts['country'] = 'NL'
#courts['language'] 
#courts['jurisdiction'] #
#courts['law_area']
#courts['authority_level']

In [106]:
#authorities.head()

In [36]:
tuples = to_tuples(authorities)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `authorities` (name, country) 
        VALUES (%s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('authorities added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

authorities added


---
### Case related decision

In [63]:
#one to one
case_related_decision = pd.DataFrame()
case_related_decision['case_ecli'] = cases_metadata.id #NOT NULL,
case_related_decision['referencing_case_id'] = cases_metadata.related_cases#NOT NULL,
#case_related_decision['referenced_case_id']

In [107]:
#case_related_decision.head()

In [65]:
tuples = to_tuples(case_related_decision)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `case_related_decision` (case_ecli, referencing_case_id) 
        VALUES (%s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_related_decision added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_related_decision added


---
### Case opinion advocate general

In [67]:
#one to one
case_opinion_from_advocate_general = pd.DataFrame()
case_opinion_from_advocate_general['case_ecli'] = cases_metadata.id #NOT NULL,
#case_opinion_from_advocate_general['date'] #
case_opinion_from_advocate_general['case_number'] = cases_metadata.case_number #
#case_opinion_from_advocate_general['description'] #
#case_opinion_from_advocate_general['language'] #
#case_opinion_from_advocate_general['country'] #
#case_opinion_from_advocate_general['venue'] #
#case_opinion_from_advocate_general['abstract'] #
#case_opinion_from_advocate_general['procedure_type'] #
#case_opinion_from_advocate_general['authority'] #

In [108]:
#case_opinion_from_advocate_general.head()

In [69]:
tuples = to_tuples(case_opinion_from_advocate_general)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `case_opinion_from_advocate_general` (case_ecli, case_number) 
        VALUES (%s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_opinion_from_advocate_general added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_opinion_from_advocate_general added


---
### Cases

In [91]:
#pivot table
cases = pd.DataFrame()
#cases['case_id'] #incremental databse id INT NOT NULL AUTO_INCREMENT,
cases['case_ecli'] = cases_metadata.id #NOT NULL,`fk_related_decision` fk_case_opinion`
cases['date'] = pd.to_datetime(cases_metadata['date'])
#cases['name'] = 
cases['description'] = cases_metadata.description
cases['language'] = cases_metadata.language
cases['venue'] = cases_metadata.venue
cases['abstract'] = cases_metadata.abstract
cases['procedure_type'] = cases_metadata.procedure_type
#cases['lodge_date'] = cases_metadata.lodge_date
#cases['link'] = cases_metadata.alternative_sources
#cases['country_id'] #`fk_country`
#cases['subject_id'] #`fk_subject`
#cases['court_id'] #fk_authority`

In [92]:
tuples = to_tuples(cases)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """INSERT INTO `cases` (case_ecli, date, description, language, venue, abstract, procedure_type) 
        VALUES (%s, %s, %s, %s, %s, %s, %s)"""
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('cases added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

cases added


---
### Legislation citation

In [94]:
#one to many 
#legislation_citation = pd.DataFrame()
#legislation_citation['citation_id'] #INT NOT NULL,
#legislation_citation['source_ecli'] #NOT NULL,`fk_source_case_legislation`
#legislation_citation['source_paragraph']
#legislation_citation['target_id'] #NOT NULL,
#legislation_citation['target_paragraph']
#legislation_citation['target_name']
#legislation_citation['target_sourcename']
#legislation_citation['target_link']

In [95]:
legislation_citation.rename(columns = {'target_article':'target_id',
                                       'target_article_paragraph':'target_paragraph',
                                       'target_article_webpage':'target_link'}, inplace=True)

In [109]:
#legislation_citation.head()

In [97]:
tuples = to_tuples(legislation_citation)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """
                INSERT INTO legislation_citation (source_ecli, source_paragraph, target_id, target_paragraph, target_link)
                VALUES (%s, %s, %s, %s, %s)
                """
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legislation_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

legislation_citation added


---
### Case law citation

In [99]:
#one to many
#case_citation = pd.DataFrame()
#case_citation['citation_id'] #INT NOT NULL,
#case_citation['source_ecli'] #NOT NULL,`fk_source_case_cases`
#case_citation['source_paragraph'] #
#case_citation['target_ecli'] #NOT NULL,`fk_target_case_cases`
#case_citation['target_paragraph'] #

In [110]:
#case_citation.head()

In [102]:
tuples = to_tuples(case_citation)
import traceback, sys
try:  le
    cursor = conn.cursor()
    for data in tuples:
        query = """
                INSERT INTO case_citation (source_ecli, source_paragraph, target_ecli, target_paragraph)
                VALUES (%s, %s, %s, %s)
                """
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('case_citation added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)

case_citation added


### Legal Intelligence Cases

In [None]:
#li_cases['id'] = #do we create this one ourselves? 
#li_cases['ecli'] 
#li_cases['name'] 
#li_cases['date'] 
#li_cases['abstract']
#li_cases['subject']
#li_cases['link']
#li_cases['procedure_type']
#li_cases['court_id']
#li_cases['DisplayTitle']
#li_cases['OriginalUrl']
#li_cases['Jurisdiction']
#li_cases['DocumentType']
#li_cases['CaseNumber']
#li_cases['PublicationDate']
#li_cases['PublicationNumber']
#li_cases['IssueNumber']
#li_cases['DateAdded']
#li_cases['Sources']
#li_cases['UrlWithAutoLogOnToken']

In [None]:
tuples = to_tuples(li_cases)
import traceback, sys
try:  
    cursor = conn.cursor()
    for data in tuples:
        query = """
                INSERT INTO legal_intelligence_case (id, ecli, name, date, abstract, subject, link, procedure_type, court_id, DisplayTitle, OriginalUrl, Jurisdiction, DocumentType, CaseNumber, PublicationDate, PublicationNumber, IssueNumber, DateAdded, Sources, UrlWithAutoLogOnToken)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
        result  = cursor.execute(query, data)
    conn.commit()
    cursor.close()
    print('legal_intelligence_case added')
except Error as error :
    conn.rollback()
    print("Failed to insert into MySQL table {}".format(error))
    traceback.print_exc(file=sys.stdout)