In [None]:
import pandas as pd
import sqlite3


df = pd.read_csv('2018-2010_export.csv')
display(df)

In [None]:
conn = sqlite3.connect('india_export.db')
cur = conn.cursor()
cur.execute('''DROP TABLE IF EXISTS export''')
df.to_sql('export', conn, if_exists='replace', index=False) # - writes the pd.df to SQLIte DB
conn.commit()

In [None]:
pd.read_sql('select * from export', conn)

In [None]:
conn.close()

# Steps to normalizing India export database

- Inspect table and determine how many tables into convert the database into 
    - Four tables:
        - year 
        - country   
        - commodity 
        - a table that joins country, year, commmodity, and value 
        
- Steps to create year table
- list of unique years
- create statement
- insert function 


In [None]:
import pandas as pd
import sqlite3
from sqlite3 import Error

def create_connection(db_file, delete_db=False):
    import os
    if delete_db and os.path.exists(db_file):
        os.remove(db_file)

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql):
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)

    rows = cur.fetchall()

    return rows


In [None]:
conn = create_connection('india_export.db')

In [None]:
# Get list of unique years

sql_statement = "SELECT DISTINCT year from export ORDER BY year"

years = execute_sql_statement(sql_statement, conn)
years = list(map(lambda row: int(row[0]), years))
print(years)

In [None]:
create_table_sql = """CREATE TABLE [Year] (
    [YEAR] INTEGER NOT NULL PRIMARY KEY
);
"""
conn_norm.close()

conn_norm = create_connection('india_export_norm.db', True)
create_table(conn_norm, create_table_sql)
sql_statement = "SELECT * FROM Year"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

In [None]:
conn_norm = create_connection('india_export_norm.db')
sql_statement = "SELECT * FROM Year"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)

In [None]:
def insert_year(conn, values):
    sql = ''' INSERT INTO YEAR(YEAR)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

with conn_norm:
    for year in years:
        insert_year(conn_norm, (year, ))


In [None]:
sql_statement = "SELECT * FROM Year"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)

In [None]:
conn_norm.close()


In [None]:
conn_norm = create_connection('india_export_norm.db')
sql_statement = "SELECT * FROM Year"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)

In [None]:
# Get list of unique countries

sql_statement = "SELECT DISTINCT country from export ORDER BY country"

countries = execute_sql_statement(sql_statement, conn)
countries = list(map(lambda row: row[0], countries))
print(countries)

In [None]:
def insert_country(conn, values):
    sql = ''' INSERT INTO COUNTRY(COUNTRY)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

create_table_sql = """CREATE TABLE IF NOT EXISTS [Country] (
    [Country] TEXT NOT NULL PRIMARY KEY
);
"""
create_table(conn_norm, create_table_sql)

with conn_norm:
    for country in countries:
        insert_country(conn_norm, (country, ))
        
sql_statement = "SELECT * FROM country"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)


In [None]:
sql_statement = "SELECT DISTINCT COMMODITY FROM export order by COMMODITY"
commodities = execute_sql_statement(sql_statement, conn)
print(commodities)
print(len(commodities))

In [None]:
def insert_commodity(conn, values):
    sql = ''' INSERT INTO COMMODITY (COMMODITY)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

create_table_sql = """CREATE TABLE IF NOT EXISTS [COMMODITY] (
    [HSCODE] INTEGER NOT NULL PRIMARY KEY, 
    [COMMODITY] TEXT NOT NULL
);
"""
create_table(conn_norm, create_table_sql)



In [None]:
with conn_norm:
    for commodity in commodities:
        insert_commodity(conn_norm, commodity)
        
sql_statement = "SELECT * FROM Commodity"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)

In [None]:
# Table 4 which is going link all the tables with values

def insert_value(conn, values):
    sql = ''' INSERT INTO Export (HSCODE, COUNTRY, YEAR, VALUE)
              VALUES(?, ?, ?, ?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

create_table_sql = """CREATE TABLE IF NOT EXISTS [Export] (
    [VALUEID] INTEGER NOT NULL PRIMARY KEY,
    [HSCODE] INTEGER NOT NULL, 
    [COUNTRY] TEXT NOT NULL,
    [YEAR] INTEGER NOT NULL,
    [VALUE] REAL,
    FOREIGN KEY(COUNTRY) REFERENCES Country(COUNTRY),
    FOREIGN KEY(YEAR) REFERENCES YEAR(YEAR),
    FOREIGN KEY(HSCODE) REFERENCES Commodity(HSCODE)
);
"""
create_table(conn_norm, create_table_sql)

In [None]:
sql_statement = 'SELECT Commodity, country, year, value from export'
values = execute_sql_statement(sql_statement, conn)
print(values[:10])

In [None]:
sql_statement = 'SELECT HSCODE, COMMODITY from COMMODITY'
values = execute_sql_statement(sql_statement, conn_norm)
print(values[:10])

In [None]:
commodity_lookup = {}
for hscode, commodity in values:
    commodity_lookup[commodity] = hscode
    
print(commodity_lookup)

In [None]:
commodity_lookup['MEAT AND EDIBLE MEAT OFFAL.']

In [None]:
sql_statement = 'SELECT HSCODE, Commodity, country, year, value from export'
values = execute_sql_statement(sql_statement, conn)
# with conn_norm:
count = 0
with conn_norm:
    for value in values:
        text = value[1]
        hscode = commodity_lookup[text]
        insert_tuple = (hscode, value[2], value[3], value[4])
        insert_value(conn_norm, insert_tuple)


In [None]:
import time
time.monotonic() 



In [None]:

# conn_norm = create_connection('india_export_norm.db')
# cur = conn_norm.cursor()
# cur.execute('DROP TABLE EXPORT')
create_table_sql = """CREATE TABLE IF NOT EXISTS [Export] (
    [VALUEID] INTEGER NOT NULL PRIMARY KEY,
    [HSCODE] INTEGER NOT NULL, 
    [COUNTRY] TEXT NOT NULL,
    [YEAR] INTEGER NOT NULL,
    [VALUE] REAL,
    FOREIGN KEY(COUNTRY) REFERENCES Country(COUNTRY),
    FOREIGN KEY(YEAR) REFERENCES YEAR(YEAR),
    FOREIGN KEY(HSCODE) REFERENCES Commodity(HSCODE)
);
"""
create_table(conn_norm, create_table_sql)

with conn_norm:
    start_time = time.monotonic() 
    for value in values:
        text = value[1]
        hscode = commodity_lookup[text]
        insert_tuple = (hscode, value[2], value[3], value[4])
        insert_value(conn_norm, insert_tuple)
end_time = time.monotonic()     


print(end_time-start_time)

In [None]:
insert_tuples = []

for value in values:
    text = value[1]
    hscode = commodity_lookup[text]
    insert_tuple = (hscode, value[2], value[3], value[4])
    insert_tuples.append(insert_tuple)
print(insert_tuples[:100])

In [None]:

conn_norm = create_connection('india_export_norm.db')
cur = conn_norm.cursor()
cur.execute('DROP TABLE EXPORT')
create_table_sql = """CREATE TABLE IF NOT EXISTS [Export] (
    [VALUEID] INTEGER NOT NULL PRIMARY KEY,
    [HSCODE] INTEGER NOT NULL, 
    [COUNTRY] TEXT NOT NULL,
    [YEAR] INTEGER NOT NULL,
    [VALUE] REAL,
    FOREIGN KEY(COUNTRY) REFERENCES Country(COUNTRY),
    FOREIGN KEY(YEAR) REFERENCES YEAR(YEAR),
    FOREIGN KEY(HSCODE) REFERENCES Commodity(HSCODE)
);
"""
create_table(conn_norm, create_table_sql)
sql = ''' INSERT INTO Export (HSCODE, COUNTRY, YEAR, VALUE)
              VALUES(?, ?, ?, ?) '''
cur = conn_norm.cursor()
start_time = time.monotonic() 
cur.executemany(sql, insert_tuples)
conn_norm.commit()
end_time = time.monotonic()     

In [None]:
print(end_time-start_time)

In [None]:
x = 'abcd'
x.upper

In [None]:
conn_norm.commit