# Data 225 Final Project - ETL for Relational Database (db2)

# 1 Table Customer

In [1]:
from mysql.connector import MySQLConnection, Error
from mydbutils import make_connection, do_query_return_all
from pandas.io import sql
import pandas as pd
import csv
import random
random.seed(1)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format='retina'


import warnings
warnings.filterwarnings('ignore')

In [2]:
## Connect to 'daydayup_db' to extract data
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x163c6f6d0>

In [3]:
## Extract data from the relational schema on database 'daydayup_db'

df_customer = pd.read_sql("SELECT * from Customer", conn)

## Drop the columns "Fax" and "Company"
df_customer.drop(columns=['Fax', 'Company'], axis=1, inplace=True)
df_customer.head()

Unnamed: 0,CustomerId,FirstName,LastName,Address,City,State,Country,PostalCode,Phone,Email,SupportRepId
0,1,Luís,Gonçalves,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,luisg@embraer.com.br,3
1,2,Leonie,Köhler,Theodor-Heuss-Straße 34,Stuttgart,,Germany,70174,+49 0711 2842222,leonekohler@surfeu.de,5
2,3,François,Tremblay,1498 rue Bélanger,Montréal,QC,Canada,H2G 1A7,+1 (514) 721-4711,ftremblay@gmail.com,3
3,4,Bjørn,Hansen,Ullevålsveien 14,Oslo,,Norway,0171,+47 22 44 22 22,bjorn.hansen@yahoo.no,4
4,5,František,Wichterlová,Klanova 9/506,Prague,,Czech Republic,14700,+420 2 4172 5555,frantisekw@jetbrains.com,4


In [4]:
## Replace all the "None" values with "Unknown", use the "Unknow" as a group later in data analysis
df_customer.fillna(value="Unknown", inplace=True)
df_customer.head()

Unnamed: 0,CustomerId,FirstName,LastName,Address,City,State,Country,PostalCode,Phone,Email,SupportRepId
0,1,Luís,Gonçalves,"Av. Brigadeiro Faria Lima, 2170",São José dos Campos,SP,Brazil,12227-000,+55 (12) 3923-5555,luisg@embraer.com.br,3
1,2,Leonie,Köhler,Theodor-Heuss-Straße 34,Stuttgart,Unknown,Germany,70174,+49 0711 2842222,leonekohler@surfeu.de,5
2,3,François,Tremblay,1498 rue Bélanger,Montréal,QC,Canada,H2G 1A7,+1 (514) 721-4711,ftremblay@gmail.com,3
3,4,Bjørn,Hansen,Ullevålsveien 14,Oslo,Unknown,Norway,0171,+47 22 44 22 22,bjorn.hansen@yahoo.no,4
4,5,František,Wichterlová,Klanova 9/506,Prague,Unknown,Czech Republic,14700,+420 2 4172 5555,frantisekw@jetbrains.com,4


In [5]:
## Export the transformed data to csv
df_customer.to_csv('db2_customer.csv', index=False)

In [6]:
cursor.close()
conn.close()

In [7]:
## Connect to 'daydayup_db2' to drop all tables
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini') ## db2
cursor = conn.cursor()

cursor.execute("SET FOREIGN_KEY_CHECKS = 0")
cursor.execute("DROP TABLE IF EXISTS Album")
cursor.execute("DROP TABLE IF EXISTS Artist")
cursor.execute("DROP TABLE IF EXISTS Customer")
cursor.execute("DROP TABLE IF EXISTS Employee")
cursor.execute("DROP TABLE IF EXISTS Genre")
cursor.execute("DROP TABLE IF EXISTS Invoice")
cursor.execute("DROP TABLE IF EXISTS InvoiceLine")
cursor.execute("DROP TABLE IF EXISTS MediaType")
cursor.execute("DROP TABLE IF EXISTS Playlist")
cursor.execute("DROP TABLE IF EXISTS PlaylistTrack")
cursor.execute("DROP TABLE IF EXISTS Track")
cursor.execute("SET FOREIGN_KEY_CHECKS = 1")

In [8]:
## Create the table
sql = """
CREATE TABLE `Customer`
(
    `CustomerId` INT NOT NULL DEFAULT 0,
    `FirstName` NVARCHAR(40) DEFAULT 'None',
    `LastName` NVARCHAR(20) DEFAULT 'None',
    `Address` NVARCHAR(70) DEFAULT 'None',
    `City` NVARCHAR(40) DEFAULT 'None',
    `State` NVARCHAR(40) DEFAULT 'None',
    `Country` NVARCHAR(40) DEFAULT 'None',
    `PostalCode` NVARCHAR(10) DEFAULT 'None',
    `Phone` NVARCHAR(24) DEFAULT 'None',
    `Email` NVARCHAR(60) DEFAULT 'None',
    `SupportRepId` INT DEFAULT 0,
    CONSTRAINT `PK_Customer` PRIMARY KEY  (`CustomerId`)
)"""
cursor.execute(sql)

In [9]:
## Export the transformed data to csv
df_customer.to_csv('db2_customer.csv', index=False)

In [10]:
## Populate the dimension table Customer
sql_c = ( "INSERT INTO Customer \n"
        + "VALUES (%s, %s, %s, %s, %s,  %s, %s, %s, %s, %s,  %s)"
        )


first = True ## Skip the first row

with open('db2_customer.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql_c, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 2 Table Employeee

In [11]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16b7e9f00>

In [12]:
df_employee = pd.read_sql("select * from Employee", conn)
df_employee

Unnamed: 0,EmployeeId,LastName,FirstName,Title,ReportsTo,BirthDate,HireDate,Address,City,State,Country,PostalCode,Phone,Fax,Email
0,1,ADAMS,Andrew,General Manager,,1962-02-18,2002-08-14,11120 Jasper Ave NW,Edmonton,AB,Canada,T5K 2N1,+1 (780) 428-9482,+1 (780) 428-3457,andrew@chinookcorp.com
1,2,Edwards,Nancy,Sales Manager,1.0,1958-12-08,2002-05-01,825 8 Ave SW,Calgary,AB,Canada,T2P 2T3,+1 (403) 262-3443,+1 (403) 262-3322,nancy@chinookcorp.com
2,3,Peacock,Jane,Sales Support Agent,2.0,1973-08-29,2002-04-01,1111 6 Ave SW,Calgary,AB,Canada,T2P 5M5,+1 (403) 262-3443,+1 (403) 262-6712,jane@chinookcorp.com
3,4,Park,Margaret,Sales Support Agent,2.0,1947-09-19,2003-05-03,683 10 Street SW,Calgary,AB,Canada,T2P 5G3,+1 (403) 263-4423,+1 (403) 263-4289,margaret@chinookcorp.com
4,5,Johnson,Steve,Sales Support Agent,2.0,1965-03-03,2003-10-17,7727B 41 Ave,Calgary,AB,Canada,T3B 1Y7,1 (780) 836-9987,1 (780) 836-9543,steve@chinookcorp.com
5,6,Mitchell,Michael,IT Manager,1.0,1973-07-01,2003-10-17,5827 Bowness Road NW,Calgary,AB,Canada,T3B 0C5,+1 (403) 246-9887,+1 (403) 246-9899,michael@chinookcorp.com
6,7,King,Robert,IT Staff,6.0,1970-05-29,2004-01-02,590 Columbia Boulevard West,Lethbridge,AB,Canada,T1K 5N8,+1 (403) 456-9986,+1 (403) 456-8485,robert@chinookcorp.com
7,8,Callahan,Laura,IT Staff,6.0,1968-01-09,2004-03-04,923 7 ST NW,Lethbridge,AB,Canada,T1H 1Y8,+1 (403) 467-3351,+1 (403) 467-8772,laura@chinookcorp.com


In [13]:
df_employee.isnull().sum()

EmployeeId    0
LastName      0
FirstName     0
Title         0
ReportsTo     1
BirthDate     0
HireDate      0
Address       0
City          0
State         0
Country       0
PostalCode    0
Phone         0
Fax           0
Email         0
dtype: int64

In [14]:
df_employee[df_employee.ReportsTo.isnull()] ## Adams Andrew doesn't need to reports to anybody

Unnamed: 0,EmployeeId,LastName,FirstName,Title,ReportsTo,BirthDate,HireDate,Address,City,State,Country,PostalCode,Phone,Fax,Email
0,1,ADAMS,Andrew,General Manager,,1962-02-18,2002-08-14,11120 Jasper Ave NW,Edmonton,AB,Canada,T5K 2N1,+1 (780) 428-9482,+1 (780) 428-3457,andrew@chinookcorp.com


In [15]:
df_employee.ReportsTo.fillna(value=1, inplace=True)

In [16]:
df_employee[df_employee.ReportsTo.isnull()] ## Adams Andrew doesn't need to reports to himself

Unnamed: 0,EmployeeId,LastName,FirstName,Title,ReportsTo,BirthDate,HireDate,Address,City,State,Country,PostalCode,Phone,Fax,Email


In [17]:
df_employee.ReportsTo = df_employee.ReportsTo.astype('int32')
df_employee.ReportsTo.dtype

dtype('int32')

In [18]:
df_employee.to_csv("db2_employee.csv", index=False)

In [19]:
cursor.close()
conn.close()

In [20]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c20be20>

In [21]:
## Create the table
sql = '''
CREATE TABLE `Employee`
(
    `EmployeeId` INT NOT NULL DEFAULT 0,
    `LastName` NVARCHAR(20) DEFAULT 'None',
    `FirstName` NVARCHAR(20) DEFAULT 'None',
    `Title` NVARCHAR(30) DEFAULT 'None',
    `ReportsTo` INT DEFAULT 0,
    `BirthDate` DATETIME DEFAULT '1900-01-01',
    `HireDate` DATETIME DEFAULT '1900-01-01',
    `Address` NVARCHAR(70) DEFAULT 'None',
    `City` NVARCHAR(40) DEFAULT 'None',
    `State` NVARCHAR(40) DEFAULT 'None',
    `Country` NVARCHAR(40) DEFAULT 'None',
    `PostalCode` NVARCHAR(10) DEFAULT 'None',
    `Phone` NVARCHAR(24) DEFAULT 'None',
    `Fax` NVARCHAR(24) DEFAULT 'None',
    `Email` NVARCHAR(60) DEFAULT 'None',
    CONSTRAINT `PK_Employee` PRIMARY KEY  (`EmployeeId`)
)
'''
cursor.execute(sql)

In [22]:
## Populate the table
sql = ( "INSERT INTO Employee \n"
        + "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,%s, %s, %s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_employee.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 3 table Album

In [23]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c209ed0>

In [24]:
df_album = pd.read_sql("select * from Album", conn)
df_album

Unnamed: 0,AlbumId,Title,ArtistId
0,1,For Those About To Rock We Salute You,1
1,2,Balls to the Wall,2
2,3,Restless and Wild,2
3,4,Let There Be Rock,1
4,5,Big Ones,3
...,...,...,...
342,343,Respighi:Pines of Rome,226
343,344,Schubert: The Late String Quartets & String Qu...,272
344,345,Monteverdi: L'Orfeo,273
345,346,Mozart: Chamber Music,274


In [25]:
df_album.isnull().sum()

AlbumId     0
Title       0
ArtistId    0
dtype: int64

In [26]:
df_album.to_csv("db2_album.csv", index=False)

In [27]:
cursor.close()
conn.close()

In [28]:
## Create and import data for table album: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c20ad10>

In [29]:
## Create the table
sql = '''
CREATE TABLE `Album`
                (
                    `AlbumId` INT NOT NULL DEFAULT 0,
                    `Title` NVARCHAR(160) DEFAULT 'None',
                    `ArtistId` INT DEFAULT 0,
                    CONSTRAINT `PK_Album` PRIMARY KEY  (`AlbumId`)
                ) 
'''
cursor.execute(sql)

In [30]:
## Populate the table

sql = ( "INSERT INTO Album \n"
        + "VALUES (%s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_album.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 4 Table Artist

In [31]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c2b2770>

In [32]:
df_artist = pd.read_sql("select * from Artist", conn)
df_artist

Unnamed: 0,ArtistId,Name
0,1,AC/DC
1,2,Accept
2,3,Aerosmith
3,4,Alanis Morissette
4,5,Alice In Chains
...,...,...
272,273,"C. Monteverdi, Nigel Rogers - Chiaroscuro; Lon..."
273,274,Nash Ensemble
274,275,Philip Glass Ensemble
275,276,


In [33]:
df_artist.isnull().sum()

ArtistId    0
Name        0
dtype: int64

In [34]:
df_artist.to_csv("db2_artist.csv", index=False)
cursor.close()
conn.close()

In [35]:
## Create and import data for table artist: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c2b1570>

In [36]:
## Create the table
sql = '''
CREATE TABLE `Artist`
(
    `ArtistId` INT NOT NULL DEFAULT 0,
    `Name` NVARCHAR(120) DEFAULT 'None',
    CONSTRAINT `PK_Artist` PRIMARY KEY  (`ArtistId`)
)
'''
cursor.execute(sql)

In [37]:
## Populate the table

sql = ( "INSERT INTO Artist \n"
        + "VALUES (%s, %s)"
        )


first = True ## Skip the first row

with open('db2_artist.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 5 Table Genre

In [38]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c2b34c0>

In [39]:
df_genre = pd.read_sql("select * from Genre", conn)
df_genre

Unnamed: 0,GenreId,Name
0,1,Rock
1,2,Jazz
2,3,Metal
3,4,Alternative & Punk
4,5,Rock And Roll
5,6,Blues
6,7,Latin
7,8,Reggae
8,9,Pop
9,10,Soundtrack


In [40]:
df_genre.isnull().sum()

GenreId    0
Name       0
dtype: int64

In [41]:
df_genre.to_csv("db2_genre.csv", index=False)
cursor.close()
conn.close()

In [42]:
## Create and import data for table genre: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16b751cc0>

In [43]:
## Create the table
sql = '''
CREATE TABLE `Genre`
(
    `GenreId` INT NOT NULL DEFAULT 0,
    `Name` NVARCHAR(120) DEFAULT 'None',
    CONSTRAINT `PK_Genre` PRIMARY KEY  (`GenreId`)
)
'''
cursor.execute(sql)

In [44]:
## Populate the table

sql = ( "INSERT INTO Genre \n"
        + "VALUES (%s, %s)"
        )


first = True ## Skip the first row

with open('db2_genre.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 6 Table Invoice

In [45]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c2b0550>

In [46]:
df_invoice = pd.read_sql("select * from Invoice", conn)
df_invoice

Unnamed: 0,InvoiceId,CustomerId,InvoiceDate,BillingAddress,BillingCity,BillingState,BillingCountry,BillingPostalCode,Total
0,1,2,2009-01-01,Theodor-Heuss-Straße 34,Stuttgart,,Germany,70174,1.98
1,2,4,2009-01-02,Ullevålsveien 14,Oslo,,Norway,0171,3.96
2,3,8,2009-01-03,Grétrystraat 63,Brussels,,Belgium,1000,5.94
3,4,14,2009-01-06,8210 111 ST NW,Edmonton,AB,Canada,T6G 2C7,8.91
4,5,23,2009-01-11,69 Salem Street,Boston,MA,USA,2113,13.86
...,...,...,...,...,...,...,...,...,...
407,408,25,2013-12-05,319 N. Frances Street,Madison,WI,USA,53703,3.96
408,409,29,2013-12-06,796 Dundas Street West,Toronto,ON,Canada,M6J 1V1,5.94
409,410,35,2013-12-09,"Rua dos Campeões Europeus de Viena, 4350",Porto,,Portugal,,8.91
410,411,44,2013-12-14,Porthaninkatu 9,Helsinki,,Finland,00530,13.86


In [47]:
df_invoice.isnull().sum()

InvoiceId              0
CustomerId             0
InvoiceDate            0
BillingAddress         0
BillingCity            0
BillingState         202
BillingCountry         0
BillingPostalCode     28
Total                  0
dtype: int64

In [48]:
df_invoice.fillna(value='Unknown', inplace=True)

In [49]:
df_invoice.to_csv("db2_invoice.csv", index=False)
cursor.close()
conn.close()

In [50]:
## Create and import data for table genre: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c316110>

In [51]:
sql = '''
CREATE TABLE `Invoice`
(
    `InvoiceId` INT NOT NULL DEFAULT 0,
    `CustomerId` INT DEFAULT 0,
    `InvoiceDate` DATETIME DEFAULT '1900-01-01',
    `BillingAddress` NVARCHAR(70) DEFAULT 'None',
    `BillingCity` NVARCHAR(40) DEFAULT 'None',
    `BillingState` NVARCHAR(40) DEFAULT 'None',
    `BillingCountry` NVARCHAR(40) DEFAULT 'None',
    `BillingPostalCode` NVARCHAR(10) DEFAULT 'None',
    `Total` NUMERIC(10,2) NOT NULL DEFAULT 0.00,
    CONSTRAINT `PK_Invoice` PRIMARY KEY  (`InvoiceId`)
)
'''
cursor.execute(sql)

In [52]:
# populate the table

sql = ( "INSERT INTO Invoice \n"
        + "VALUES (%s, %s, %s, %s, %s,  %s, %s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_invoice.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 7 Table InvoiceLine

In [53]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c316fe0>

In [54]:
df_il = pd.read_sql("select * from InvoiceLine", conn)
df_il

Unnamed: 0,InvoiceLineId,InvoiceId,TrackId,UnitPrice,Quantity
0,1,1,2,0.99,1
1,2,1,4,0.99,1
2,3,2,6,0.99,1
3,4,2,8,0.99,1
4,5,2,10,0.99,1
...,...,...,...,...,...
753,754,138,1094,0.99,1
754,755,138,1103,0.99,1
755,756,138,1112,0.99,1
756,757,138,1121,0.99,1


In [55]:
df_il.isnull().sum()

InvoiceLineId    0
InvoiceId        0
TrackId          0
UnitPrice        0
Quantity         0
dtype: int64

In [56]:
df_il.columns

Index(['InvoiceLineId', 'InvoiceId', 'TrackId', 'UnitPrice', 'Quantity'], dtype='object')

In [57]:
df_il = df_il[['InvoiceLineId', 'InvoiceId', 'TrackId', 'UnitPrice']]

In [58]:
df_il.to_csv("db2_il.csv", index=False)
cursor.close()
conn.close()

In [59]:
## Create and import data for table genre: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c316920>

In [60]:
## Create the table
sql = '''
CREATE TABLE `InvoiceLine`
(
    `InvoiceLineId` INT NOT NULL DEFAULT 0,
    `InvoiceId` INT  DEFAULT 0 ,
    `TrackId` INT DEFAULT 0,
    `UnitPrice` NUMERIC(10,2) DEFAULT 0.00,
    CONSTRAINT `PK_InvoiceLine` PRIMARY KEY  (`InvoiceLineId`)
)
'''
cursor.execute(sql)

In [61]:
# populate the table

sql = ( "INSERT INTO InvoiceLine \n"
        + "VALUES (%s, %s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_il.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 8 Table MediaType

In [62]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c317250>

In [63]:
df_mt = pd.read_sql("select * from MediaType", conn)
df_mt

Unnamed: 0,MediaTypeId,Name
0,1,MPEG audio file
1,2,Protected AAC audio file
2,3,Protected MPEG-4 video file
3,4,Purchased AAC audio file
4,5,AAC audio file


In [64]:
df_mt.isnull().sum()

MediaTypeId    0
Name           0
dtype: int64

In [65]:
df_mt.to_csv("db2_mt.csv", index=False)
cursor.close()
conn.close()

In [66]:
## Create and import data for table genre: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c315360>

In [67]:
## Create the table
sql = '''
CREATE TABLE `MediaType`
(
    `MediaTypeId` INT NOT NULL DEFAULT 0,
    `Name` NVARCHAR(120) DEFAULT 'None',
    CONSTRAINT `PK_MediaType` PRIMARY KEY  (`MediaTypeId`)
);

'''
cursor.execute(sql)

In [68]:
# populate the table
sql = ( "INSERT INTO MediaType \n"
        + "VALUES (%s, %s)"
        )


first = True ## Skip the first row

with open('db2_mt.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 9 Table Playlist

In [69]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c316cb0>

In [70]:
df_pl = pd.read_sql("select * from Playlist", conn)
df_pl

Unnamed: 0,PlaylistId,Name
0,1,Music
1,2,Movies
2,3,TV Shows
3,4,Audiobooks
4,5,90’s Music
5,6,Audiobooks
6,7,Movies
7,8,Music
8,9,Music Videos
9,10,TV Shows


In [71]:
df_pl.isnull().sum()

PlaylistId    0
Name          0
dtype: int64

In [72]:
## Adding Customer info
import random
random.seed(1)
df_pl['CustomerId'] = [random.randint(1, 58) for _ in range(18)]
df_pl

Unnamed: 0,PlaylistId,Name,CustomerId
0,1,Music,9
1,2,Movies,37
2,3,TV Shows,55
3,4,Audiobooks,52
4,5,90’s Music,49
5,6,Audiobooks,5
6,7,Movies,17
7,8,Music,8
8,9,Music Videos,32
9,10,TV Shows,49


In [73]:
df_pl.to_csv("db2_pl.csv", index=False)
cursor.close()
conn.close()

In [74]:
## Create and import data for table Playlist: db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c39ce20>

In [75]:
## Create the table
sql = '''
CREATE TABLE `Playlist`
(
    `PlaylistId` INT NOT NULL DEFAULT 0,
    `Name` NVARCHAR(120) DEFAULT 'None',
    `CustomerId` INT DEFAULT 0,
    CONSTRAINT `PK_Playlist` PRIMARY KEY  (`PlaylistId`),
    FOREIGN KEY (CustomerId) REFERENCES Customer(CustomerId)
);

'''
cursor.execute(sql)

In [76]:
## Populate the table

sql = ( "INSERT INTO Playlist \n"
        + "VALUES (%s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_pl.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 10 Table PlaylistTrack

In [77]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c39f5b0>

In [78]:
df_pt = pd.read_sql("select * from PlaylistTrack", conn)
df_pt

Unnamed: 0,PlaylistId,TrackId,AddingDate
0,1,1,2013-08-03
1,1,2,2011-05-19
2,1,3,2011-08-20
3,1,4,2011-07-07
4,1,5,2009-02-02
...,...,...,...
8710,17,2094,2010-01-26
8711,17,2095,2009-03-06
8712,17,2096,2010-02-10
8713,17,3290,2011-10-23


In [79]:
df_pt.isnull().sum()

PlaylistId    0
TrackId       0
AddingDate    0
dtype: int64

In [80]:
## Add new field "AddingDate"
import pandas as pd
df_addingday_source = pd.read_csv("AddingDate_source.csv")
df_addingday_source.head()

Unnamed: 0,TimeKey,PurchaseDate,DayOfWeek,Month,Quarter,Year
0,1,2009-01-01,4,1,1,2009
1,2,2009-01-02,5,1,1,2009
2,3,2009-01-03,6,1,1,2009
3,4,2009-01-06,2,1,1,2009
4,5,2009-01-11,7,1,1,2009


In [81]:
import random
random.seed(1)
df_pt['AddingDate'] = [ random.choice(df_addingday_source.PurchaseDate) for _ in range(8715)]

In [82]:
df_pt

Unnamed: 0,PlaylistId,TrackId,AddingDate
0,1,1,2009-12-10
1,1,2,2013-02-02
2,1,3,2009-06-07
3,1,4,2010-10-24
4,1,5,2009-11-07
...,...,...,...
8710,17,2094,2013-02-07
8711,17,2095,2012-12-15
8712,17,2096,2010-04-21
8713,17,3290,2009-02-02


In [83]:
df_pt.to_csv("db2_pt.csv", index=False)
cursor.close()
conn.close()

In [84]:
## Connect to db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c39e920>

In [85]:
## Create the table
sql = '''
CREATE TABLE `PlaylistTrack`
(
    `PlaylistId` INT NOT NULL DEFAULT 0,
    `TrackId` INT DEFAULT 0,
    `AddingDate` DATE DEFAULT '1900-01-01',
    CONSTRAINT `PK_PlaylistTrack` PRIMARY KEY  (`PlaylistId`, `TrackId`)
);

'''
cursor.execute(sql)

In [86]:
## Populate the table

sql = ( "INSERT INTO PlaylistTrack \n"
        + "VALUES (%s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_pt.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 11 Table Track

In [87]:
conn = make_connection(config_file = 'config_sjsu_daydayup_db.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c39f3d0>

In [88]:
df_tk = pd.read_sql("select * from Track", conn)
df_tk

Unnamed: 0,TrackId,Name,AlbumId,MediaTypeId,GenreId,Composer,Milliseconds,Bytes,UnitPrice
0,1,For Those About To Rock (We Salute You),1,1,1,"Angus Young, Malcolm Young, Brian Johnson",343719,11170334,0.99
1,2,Balls to the Wall,2,2,1,,342562,5510424,0.99
2,3,Fast As a Shark,3,2,1,"F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...",230619,3990994,0.99
3,4,Restless and Wild,3,2,1,"F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...",252051,4331779,0.99
4,5,Princess of the Dawn,3,2,1,Deaffy & R.A. Smith-Diesel,375418,6290521,0.99
...,...,...,...,...,...,...,...,...,...
3498,3499,Pini Di Roma (Pinien Von Rom) I Pini Della Vi...,343,2,24,,286741,4718950,0.99
3499,3500,"String Quartet No. 12 in C Minor, D. 703 ""Quar...",344,2,24,Franz Schubert,139200,2283131,0.99
3500,3501,"L'orfeo, Act 3, Sinfonia (Orchestra)",345,2,24,Claudio Monteverdi,66639,1189062,0.99
3501,3502,"Quintet for Horn, Violin, 2 Violas, and Cello ...",346,2,24,Wolfgang Amadeus Mozart,221331,3665114,0.99


In [89]:
df_tk.isnull().sum()

TrackId           0
Name              0
AlbumId           0
MediaTypeId       0
GenreId           0
Composer        978
Milliseconds      0
Bytes             0
UnitPrice         0
dtype: int64

In [90]:
df_tk.fillna(value='unknown', inplace=True)

In [91]:
df_tk.to_csv("db2_tk.csv", index=False)
cursor.close()
conn.close()

In [92]:
## Connect to db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c39f0d0>

In [93]:
## Create the table
sql = '''
CREATE TABLE `Track`
(
    `TrackId` INT NOT NULL DEFAULT 0,
    `Name` NVARCHAR(200) DEFAULT 'None',
    `AlbumId` INT DEFAULT 0,
    `MediaTypeId` INT DEFAULT 0,
    `GenreId` INT DEFAULT 0,
    `Composer` NVARCHAR(220) DEFAULT 'None',
    `Milliseconds` INT DEFAULT 0,
    `Bytes` INT DEFAULT 0,
    `UnitPrice` NUMERIC(10,2) DEFAULT 0.0,
     PRIMARY KEY  (`TrackId`)
);
'''
cursor.execute(sql)

In [94]:
## Populate the table

sql = ( "INSERT INTO Track \n"
        + "VALUES (%s, %s, %s, %s, %s,  %s, %s, %s, %s)"
        )


first = True ## Skip the first row

with open('db2_tk.csv', newline='') as csv_file:
    data = csv.reader(csv_file, delimiter=',', quotechar='"')
    
    for row in data:
        if not first:
            #transform(row)
            cursor.execute(sql, row)
            
        first = False
    
conn.commit()
cursor.close()
conn.close()

# 12 Set up foreign keys

In [95]:
## Connect to db2
conn = make_connection(config_file = 'config_sjsu_daydayup_db2.ini')
cursor = conn.cursor()
conn

<mysql.connector.connection.MySQLConnection at 0x16c662380>

In [96]:
## Album
cursor.execute("ALTER TABLE `Album` ADD CONSTRAINT `FK_AlbumArtistId`FOREIGN KEY (`ArtistId`) REFERENCES `Artist` (`ArtistId`)")



In [97]:
## Customer
cursor.execute("ALTER TABLE `Customer` ADD CONSTRAINT `FK_CustomerSupportRepId`FOREIGN KEY (`SupportRepId`) REFERENCES `Employee` (`EmployeeId`)")



In [98]:
## Employee
cursor.execute("ALTER TABLE `Employee` ADD CONSTRAINT `FK_EmployeeReportsTo` FOREIGN KEY (`ReportsTo`) REFERENCES `Employee` (`EmployeeId`)")



In [99]:
## Invoice
cursor.execute("ALTER TABLE `Invoice` ADD CONSTRAINT `FK_InvoiceCustomerId` FOREIGN KEY (`CustomerId`) REFERENCES `Customer` (`CustomerId`)")



In [100]:
## Invoice Line
cursor.execute("ALTER TABLE `InvoiceLine` ADD CONSTRAINT `FK_InvoiceLineInvoiceId` FOREIGN KEY (`InvoiceId`) REFERENCES `Invoice` (`InvoiceId`)")
cursor.execute("ALTER TABLE `InvoiceLine` ADD CONSTRAINT `FK_InvoiceLineTrackId` FOREIGN KEY (`TrackId`) REFERENCES `Track` (`TrackId`)")

In [101]:
## Playlist Track 
cursor.execute("ALTER TABLE `PlaylistTrack` ADD CONSTRAINT `FK_PlaylistTrackPlaylistId` FOREIGN KEY (`PlaylistId`) REFERENCES `Playlist` (`PlaylistId`)")
cursor.execute("ALTER TABLE `PlaylistTrack` ADD CONSTRAINT `FK_PlaylistTrackTrackId` FOREIGN KEY (`TrackId`) REFERENCES `Track` (`TrackId`)")


In [102]:
## Track
cursor.execute("ALTER TABLE `Track` ADD CONSTRAINT `FK_TrackAlbumId` FOREIGN KEY (`AlbumId`) REFERENCES `Album` (`AlbumId`)")
cursor.execute("ALTER TABLE `Track` ADD CONSTRAINT `FK_TrackGenreId` FOREIGN KEY (`GenreId`) REFERENCES `Genre` (`GenreId`)")
cursor.execute("ALTER TABLE `Track` ADD CONSTRAINT `FK_TrackMediaTypeId` FOREIGN KEY (`MediaTypeId`) REFERENCES `MediaType` (`MediaTypeId`)")


In [103]:
cursor.close()
conn.close()

## (END)