In [6]:
import sqlite3
from sqlite3 import Error


def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
    except Error as e:
        print(e)

    return conn

def create_table(conn, create_table_sql):
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
db_file = 'student_test_in_class.db'
cur = conn.cursor()
conn = create_connection(db_file)


cur.execute("SELECT first_name  FROM students limit 10")

rows = cur.fetchall()
print(rows)

[('Melissa',), ('Melissa',), ('Wendy',), ('Marcus',), ('Nicholas',), ('Brenda',), ('Charlene',), ('Bradley',), ('Eugene',), ('Mary',)]


In [7]:
import os
db = 'depts_students.db'
if os.path.exists(db):
    os.remove(db)

# 1 -- create table sql statements
# 2 -- create insert functions
# 3 -- read files or data and use the insert function


# quiz question -- what is the purpose of []?
create_table_departments_sql = """ CREATE TABLE [Departments] ( 
    [DepartmentId] INTEGER NOT NULL PRIMARY KEY,
    [DepartmentName] TEXT 
); """

create_table_students_sql = """CREATE TABLE [Students] (
    [StudentId] INTEGER  PRIMARY KEY NOT NULL,
    [StudentName] TEXT NOT NULL,
    [DepartmentId] INTEGER,
    [DateOfBirth] DATE,
    FOREIGN KEY(DepartmentId) REFERENCES Departments(DepartmentId)
);"""

#         1        2        3       4
depts = ('IT', 'Physics', 'Arts', 'Math')

students = (
    ('Michael', 1, '1998-10-12'),
    ('John', 1, '1998-10-12'),
    ('Jack', 1, '1998-10-12'),
    ('Sara', 2, '1998-10-12'),
    ('Sally', 2, '1998-10-12'),
    ('Jena', None, '1998-10-12'),
    ('Nancy', 2, '1998-10-12'),
    ('Adam', 3, '1998-10-12'),
    ('Stevens', 3, '1998-10-12'),
    ('George', None, '1998-10-12')
)

def insert_depts(conn, values):
    sql = ''' INSERT INTO Departments(DepartmentName)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid


def insert_student(conn, values):
    sql = ''' INSERT INTO Students(StudentName, DepartmentId, DateOfBirth)
              VALUES(?,?,?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

conn = create_connection(db)

with conn:

    create_table(conn, create_table_departments_sql)
    create_table(conn, create_table_students_sql)
    
    for dept in depts:
        insert_depts(conn, (dept, ))
        
    for student in students:
        insert_student(conn, student)
conn.close()

In [12]:
import pandas as pd
db = 'depts_students.db'
conn = create_connection(db)
sql_statement = """
    SELECT StudentID, StudentName, DepartmentName, DateofBirth
    FROM Students
    JOIN Departments ON Departments.DepartmentID = Students.DepartmentID;
"""
df = pd.read_sql_query(sql_statement, conn)
display(df)
conn.close()

Unnamed: 0,StudentId,StudentName,DepartmentName,DateOfBirth
0,1,Michael,IT,1998-10-12
1,2,John,IT,1998-10-12
2,3,Jack,IT,1998-10-12
3,4,Sara,Physics,1998-10-12
4,5,Sally,Physics,1998-10-12
5,7,Nancy,Physics,1998-10-12
6,8,Adam,Arts,1998-10-12
7,9,Stevens,Arts,1998-10-12


In [16]:
conn = create_connection(db)
cur = conn.cursor()
cur.execute('SELECT * FROM Departments')
for row in cur.fetchall():  
    print(row)


(1, 'IT')
(2, 'Physics')
(3, 'Arts')
(4, 'Math')


In [17]:
for row in cur.fetchall():  
    print(row)
conn.close()

In [None]:
x in list #  O(N)
x in dict #  O(1)

In [18]:
conn = create_connection(db)
cur = conn.cursor()
cur.execute('SELECT * FROM Departments')
dept_fk_lookup = {}
for row in cur.fetchall():
    key, text = row
    dept_fk_lookup[text] = key
print(dept_fk_lookup)

conn.close()

{'IT': 1, 'Physics': 2, 'Arts': 3, 'Math': 4}


In [20]:
db = 'depts_students.db'
if os.path.exists(db):
    os.remove(db)

create_table_departments_sql = """ CREATE TABLE [Departments] (
    [DepartmentId] INTEGER  NOT NULL PRIMARY KEY,
    [DepartmentName] TEXT 
); """

create_table_students_sql = """CREATE TABLE [Students] (
    [StudentId] INTEGER  PRIMARY KEY NOT NULL,
    [StudentName] TEXT NOT NULL,
    [DepartmentId] INTEGER,
    [DateOfBirth] DATE,
    FOREIGN KEY(DepartmentId) REFERENCES Departments(DepartmentId)
);"""

depts = ('IT', 'Physics', 'Arts', 'Math')
students = (
    ('Michael', 'IT', '1998-10-12'),
    ('John', 'IT', '1998-10-12'),
    ('Jack', 'IT', '1998-10-12'),
    ('Sara', 'Physics', '1998-10-12'),
    ('Sally', 'Physics', '1998-10-12'),
    ('Jena', None, '1998-10-12'),
    ('Nancy', 'Physics', '1998-10-12'),
    ('Adam', 'Arts', '1998-10-12'),
    ('Stevens', 'Arts', '1998-10-12'),
    ('George', None, '1998-10-12')
)
conn = create_connection(db)

with conn:

    create_table(conn, create_table_departments_sql)
    create_table(conn, create_table_students_sql)
    for values in depts:
        insert_depts(conn, (values, ))
        
    for values in students:
        values = list(values)
        print('BEFORE', values)
        key = values[1]
        print('key', key)
        if key:
            values[1] = dept_fk_lookup[values[1]]
        print('AFTER',values)
        insert_student(conn, values)
conn.close()

BEFORE ['Michael', 'IT', '1998-10-12']
key IT
AFTER ['Michael', 1, '1998-10-12']
BEFORE ['John', 'IT', '1998-10-12']
key IT
AFTER ['John', 1, '1998-10-12']
BEFORE ['Jack', 'IT', '1998-10-12']
key IT
AFTER ['Jack', 1, '1998-10-12']
BEFORE ['Sara', 'Physics', '1998-10-12']
key Physics
AFTER ['Sara', 2, '1998-10-12']
BEFORE ['Sally', 'Physics', '1998-10-12']
key Physics
AFTER ['Sally', 2, '1998-10-12']
BEFORE ['Jena', None, '1998-10-12']
key None
AFTER ['Jena', None, '1998-10-12']
BEFORE ['Nancy', 'Physics', '1998-10-12']
key Physics
AFTER ['Nancy', 2, '1998-10-12']
BEFORE ['Adam', 'Arts', '1998-10-12']
key Arts
AFTER ['Adam', 3, '1998-10-12']
BEFORE ['Stevens', 'Arts', '1998-10-12']
key Arts
AFTER ['Stevens', 3, '1998-10-12']
BEFORE ['George', None, '1998-10-12']
key None
AFTER ['George', None, '1998-10-12']


In [21]:
import pandas as pd
import sqlite3
from sqlite3 import Error

def create_connection(db_file, delete_db=False):
    import os
    if delete_db and os.path.exists(db_file):
        os.remove(db_file)

    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn


def create_table(conn, create_table_sql):
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)
        
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)

    rows = cur.fetchall()

    return rows


In [23]:
conn = create_connection('india_export.db')
df = pd.read_sql_query("SELECT DISTINCT year from export ORDER BY year", conn)
display(df)
conn.close()

Unnamed: 0,year
0,2010
1,2011
2,2012
3,2013
4,2014
5,2015
6,2016
7,2017
8,2018


In [24]:
# Get list of unique years
conn = create_connection('india_export.db')

sql_statement = "SELECT DISTINCT year from export ORDER BY year"
years = execute_sql_statement(sql_statement, conn)
conn.close()

In [25]:
years

[(2010,),
 (2011,),
 (2012,),
 (2013,),
 (2014,),
 (2015,),
 (2016,),
 (2017,),
 (2018,)]

In [26]:
create_table_sql = """CREATE TABLE [Year] (
    [YEAR] INTEGER NOT NULL PRIMARY KEY
);
"""
# conn_norm.close()

conn_norm = create_connection('india_export_norm.db', True)
create_table(conn_norm, create_table_sql)
sql_statement = "SELECT * FROM Year"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

Unnamed: 0,YEAR


In [28]:
def insert_year(conn, values):
    sql = ''' INSERT INTO YEAR(YEAR)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid
conn_norm = create_connection('india_export_norm.db', True)
create_table(conn_norm, create_table_sql)

with conn_norm:
    for year in years:
        insert_year(conn_norm, year)
sql_statement = "SELECT * FROM Year"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

Unnamed: 0,YEAR
0,2010
1,2011
2,2012
3,2013
4,2014
5,2015
6,2016
7,2017
8,2018


In [29]:
# Get list of unique countries

sql_statement = "SELECT DISTINCT country from export ORDER BY country"
conn = create_connection('india_export.db')

countries = execute_sql_statement(sql_statement, conn)
print(countries)
conn.close()


[('AFGHANISTAN TIS',), ('ALBANIA',), ('ALGERIA',), ('AMERI SAMOA',), ('ANDORRA',), ('ANGOLA',), ('ANGUILLA',), ('ANTARTICA',), ('ANTIGUA',), ('ARGENTINA',), ('ARMENIA',), ('ARUBA',), ('AUSTRALIA',), ('AUSTRIA',), ('AZERBAIJAN',), ('BAHAMAS',), ('BAHARAIN IS',), ('BANGLADESH PR',), ('BARBADOS',), ('BELARUS',), ('BELGIUM',), ('BELIZE',), ('BENIN',), ('BERMUDA',), ('BHUTAN',), ('BOLIVIA',), ('BOSNIA-HRZGOVIN',), ('BOTSWANA',), ('BR VIRGN IS',), ('BRAZIL',), ('BRUNEI',), ('BULGARIA',), ('BURKINA FASO',), ('BURUNDI',), ('C AFRI REP',), ('CAMBODIA',), ('CAMEROON',), ('CANADA',), ('CANARY IS',), ('CAPE VERDE IS',), ('CAYMAN IS',), ('CHAD',), ('CHANNEL IS',), ('CHILE',), ('CHINA P RP',), ('CHRISTMAS IS.',), ('COCOS IS',), ('COLOMBIA',), ('COMOROS',), ('CONGO D. REP.',), ('CONGO P REP',), ('COOK IS',), ('COSTA RICA',), ("COTE D' IVOIRE",), ('CROATIA',), ('CUBA',), ('CURACAO',), ('CYPRUS',), ('CZECH REPUBLIC',), ('DENMARK',), ('DJIBOUTI',), ('DOMINIC REP',), ('DOMINICA',), ('ECUADOR',), ('EGYPT 

In [30]:
def insert_country(conn, values):
    sql = ''' INSERT INTO COUNTRY(COUNTRY)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

create_table_sql = """CREATE TABLE IF NOT EXISTS [Country] (
    [Country] TEXT NOT NULL PRIMARY KEY
);
"""
conn_norm = create_connection('india_export_norm.db')

create_table(conn_norm, create_table_sql)

with conn_norm:
    for country in countries:
        insert_country(conn_norm, country)
        
sql_statement = "SELECT * FROM country"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

Unnamed: 0,Country
0,AFGHANISTAN TIS
1,ALBANIA
2,ALGERIA
3,AMERI SAMOA
4,ANDORRA
...,...
243,VIRGIN IS US
244,WALLIS F IS
245,YEMEN REPUBLC
246,ZAMBIA


In [31]:
conn = create_connection('india_export.db')

sql_statement = "SELECT DISTINCT COMMODITY FROM export order by COMMODITY"
commodities = execute_sql_statement(sql_statement, conn)
print(commodities)
print(len(commodities))
conn.close()

[('AIRCRAFT, SPACECRAFT, AND PARTS THEREOF.',), ('ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GLUES; ENZYMES.',), ('ALUMINIUM AND ARTICLES THEREOF.',), ('ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CLEAVAGE PRODUCTS; PRE. EDIBLE FATS; ANIMAL OR VEGETABLE WAXEX.',), ('ARMS AND AMMUNITION; PARTS AND ACCESSORIES THEREOF.',), ('ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, KNITTED OR CORCHETED.',), ('ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, NOT KNITTED OR CROCHETED.',), ('ARTICLES OF IRON OR STEEL',), ('ARTICLES OF LEATHER,SADDLERY AND HARNESS;TRAVEL GOODS, HANDBAGS AND SIMILAR CONT.ARTICLES OF ANIMAL GUT(OTHR THN SILK-WRM)GUT.',), ('ARTICLES OF STONE, PLASTER, CEMENT, ASBESTOS, MICA OR SIMILAR MATERIALS.',), ('BEVERAGES, SPIRITS AND VINEGAR.',), ('CARPETS AND OTHER TEXTILE FLOOR COVERINGS.',), ('CERAMIC PRODUCTS.',), ('CEREALS.',), ('CLOCKS AND WATCHES AND PARTS THEREOF.',), ('COCOA AND COCOA PREPARATIONS.',), ('COFFEE, TEA, MATE AND SPICES.',), ('COPPER AND ARTICLES THEREOF.',), ('C

In [32]:
def insert_commodity(conn, values):
    sql = ''' INSERT INTO COMMODITY (COMMODITY)
              VALUES(?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

create_table_sql = """CREATE TABLE IF NOT EXISTS [COMMODITY] (
    [HSCODE] INTEGER NOT NULL PRIMARY KEY, 
    [COMMODITY] TEXT NOT NULL
);
"""
conn_norm = create_connection('india_export_norm.db')

create_table(conn_norm, create_table_sql)

with conn_norm:
    for commodity in commodities:
        insert_commodity(conn_norm, commodity)
        
sql_statement = "SELECT * FROM Commodity"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

Unnamed: 0,HSCODE,COMMODITY
0,1,"AIRCRAFT, SPACECRAFT, AND PARTS THEREOF."
1,2,ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GL...
2,3,ALUMINIUM AND ARTICLES THEREOF.
3,4,ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CL...
4,5,ARMS AND AMMUNITION; PARTS AND ACCESSORIES THE...
...,...,...
93,94,"WADDING, FELT AND NONWOVENS; SPACIAL YARNS; TW..."
94,95,WOOD AND ARTICLES OF WOOD; WOOD CHARCOAL.
95,96,"WOOL, FINE OR COARSE ANIMAL HAIR, HORSEHAIR YA..."
96,97,WORKS OF ART COLLECTORS' PIECES AND ANTIQUES.


In [33]:
# Table 4 which is going link all the tables with values

def insert_value(conn, values):
    sql = ''' INSERT INTO Export (HSCODE, COUNTRY, YEAR, VALUE)
              VALUES(?, ?, ?, ?) '''
    cur = conn.cursor()
    cur.execute(sql, values)
    return cur.lastrowid

create_table_sql = """CREATE TABLE IF NOT EXISTS [Export] (
    [VALUEID] INTEGER NOT NULL PRIMARY KEY,
    [HSCODE] INTEGER NOT NULL, 
    [COUNTRY] TEXT NOT NULL,
    [YEAR] INTEGER NOT NULL,
    [VALUE] REAL,
    FOREIGN KEY(COUNTRY) REFERENCES Country(COUNTRY),
    FOREIGN KEY(YEAR) REFERENCES YEAR(YEAR),
    FOREIGN KEY(HSCODE) REFERENCES Commodity(HSCODE)
);
"""
conn_norm = create_connection('india_export_norm.db')

create_table(conn_norm, create_table_sql)

In [34]:
sql_statement = 'SELECT HSCODE, COMMODITY from COMMODITY'
values = execute_sql_statement(sql_statement, conn_norm)
print(values[:10])

[(1, 'AIRCRAFT, SPACECRAFT, AND PARTS THEREOF.'), (2, 'ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GLUES; ENZYMES.'), (3, 'ALUMINIUM AND ARTICLES THEREOF.'), (4, 'ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CLEAVAGE PRODUCTS; PRE. EDIBLE FATS; ANIMAL OR VEGETABLE WAXEX.'), (5, 'ARMS AND AMMUNITION; PARTS AND ACCESSORIES THEREOF.'), (6, 'ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, KNITTED OR CORCHETED.'), (7, 'ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, NOT KNITTED OR CROCHETED.'), (8, 'ARTICLES OF IRON OR STEEL'), (9, 'ARTICLES OF LEATHER,SADDLERY AND HARNESS;TRAVEL GOODS, HANDBAGS AND SIMILAR CONT.ARTICLES OF ANIMAL GUT(OTHR THN SILK-WRM)GUT.'), (10, 'ARTICLES OF STONE, PLASTER, CEMENT, ASBESTOS, MICA OR SIMILAR MATERIALS.')]


In [35]:
commodity_lookup = {}
for hscode, commodity in values:
    commodity_lookup[commodity] = hscode
    
print(commodity_lookup)

{'AIRCRAFT, SPACECRAFT, AND PARTS THEREOF.': 1, 'ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GLUES; ENZYMES.': 2, 'ALUMINIUM AND ARTICLES THEREOF.': 3, 'ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CLEAVAGE PRODUCTS; PRE. EDIBLE FATS; ANIMAL OR VEGETABLE WAXEX.': 4, 'ARMS AND AMMUNITION; PARTS AND ACCESSORIES THEREOF.': 5, 'ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, KNITTED OR CORCHETED.': 6, 'ARTICLES OF APPAREL AND CLOTHING ACCESSORIES, NOT KNITTED OR CROCHETED.': 7, 'ARTICLES OF IRON OR STEEL': 8, 'ARTICLES OF LEATHER,SADDLERY AND HARNESS;TRAVEL GOODS, HANDBAGS AND SIMILAR CONT.ARTICLES OF ANIMAL GUT(OTHR THN SILK-WRM)GUT.': 9, 'ARTICLES OF STONE, PLASTER, CEMENT, ASBESTOS, MICA OR SIMILAR MATERIALS.': 10, 'BEVERAGES, SPIRITS AND VINEGAR.': 11, 'CARPETS AND OTHER TEXTILE FLOOR COVERINGS.': 12, 'CERAMIC PRODUCTS.': 13, 'CEREALS.': 14, 'CLOCKS AND WATCHES AND PARTS THEREOF.': 15, 'COCOA AND COCOA PREPARATIONS.': 16, 'COFFEE, TEA, MATE AND SPICES.': 17, 'COPPER AND ARTICLES THEREOF.'

In [36]:
conn = create_connection('india_export.db')

sql_statement = 'SELECT Commodity, country, year, value from export'
values = execute_sql_statement(sql_statement, conn)
count = 0
with conn_norm:
    for value in values:
        text = value[0]
        hscode = commodity_lookup[text]
        insert_tuple = (hscode, value[1], value[2], value[3])
        insert_value(conn_norm, insert_tuple)
conn.close()
conn_norm.close()

In [37]:
conn_norm = create_connection('india_export_norm.db')
sql_statement = "SELECT * FROM Export"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

Unnamed: 0,VALUEID,HSCODE,COUNTRY,YEAR,VALUE
0,1,45,AFGHANISTAN TIS,2018,0.18
1,2,28,AFGHANISTAN TIS,2018,0.00
2,3,21,AFGHANISTAN TIS,2018,12.48
3,4,41,AFGHANISTAN TIS,2018,0.00
4,5,23,AFGHANISTAN TIS,2018,1.89
...,...,...,...,...,...
137018,137019,90,ZIMBABWE,2010,0.03
137019,137020,51,ZIMBABWE,2010,0.45
137020,137021,97,ZIMBABWE,2010,0.00
137021,137022,74,ZIMBABWE,2010,0.00


In [39]:
conn_norm = create_connection('india_export_norm.db')
sql_statement = "SELECT * FROM Export JOIN COMMODITY ON COMMODITY.HSCODE = EXPORT.HSCODE"
df = pd.read_sql_query(sql_statement, conn_norm)
display(df)
conn_norm.close()

Unnamed: 0,VALUEID,HSCODE,COUNTRY,YEAR,VALUE,HSCODE.1,COMMODITY
0,1,45,AFGHANISTAN TIS,2018,0.18,45,MEAT AND EDIBLE MEAT OFFAL.
1,2,28,AFGHANISTAN TIS,2018,0.00,28,"FISH AND CRUSTACEANS, MOLLUSCS AND OTHER AQUAT..."
2,3,21,AFGHANISTAN TIS,2018,12.48,21,DAIRY PRODUCE; BIRDS' EGGS; NATURAL HONEY; EDI...
3,4,41,AFGHANISTAN TIS,2018,0.00,41,LIVE TREES AND OTHER PLANTS; BULBS; ROOTS AND ...
4,5,23,AFGHANISTAN TIS,2018,1.89,23,EDIBLE VEGETABLES AND CERTAIN ROOTS AND TUBERS.
...,...,...,...,...,...,...,...
137018,137019,90,ZIMBABWE,2010,0.03,90,"TOYS, GAMES AND SPORTS REQUISITES; PARTS AND A..."
137019,137020,51,ZIMBABWE,2010,0.45,51,MISCELLANEOUS MANUFACTURED ARTICLES.
137020,137021,97,ZIMBABWE,2010,0.00,97,WORKS OF ART COLLECTORS' PIECES AND ANTIQUES.
137021,137022,74,ZIMBABWE,2010,0.00,74,PROJECT GOODS; SOME SPECIAL USES.
