------------
# Loading Data into MySQL

In [1]:
import pandas as pd
import mysql.connector as msql
from mysql.connector import Error
# This file contains our login information for the MySQL server
import credentials as C

Let's connect to the MySQL server and create a database called "creditcard_capstone".  If there is a connection error or if the table already exists then I will see an exception thrown.

In [2]:
try:
    conn=msql.connect(host = C.host_name, user = C.user_name, password = C.password)
    if conn.is_connected():
        cursor=conn.cursor()
        cursor.execute("CREATE DATABASE creditcard_capstone")
        print('Database is created')
except Error as e:
    print('Error while connecting to MySQL',e)

Error while connecting to MySQL 1007 (HY000): Can't create database 'creditcard_capstone'; database exists


Let's load the data that we will be loading onto the db.  We are using pandas instead of pyspark because of the structure of the json files.  When I created the json files I used a pandas function to save them.  Pandas saves the json in a columnar format but spark requires the json to be structured in a row based format.  I would have to add ```, orient='records', lines=True``` options to the pandas .to_json() write function in order to have json files that can be used by spark.  Since we won't be doing any more transformations on the data then I don't see a need to use pyspark anymore.

In [3]:
branch_df = pd.read_json("clean_data/branch.json")
cust_df = pd.read_json("clean_data/customer.json")
cc_df = pd.read_json("clean_data/credit_card.json")

Now lets establish the schema for the tables we will want to create

In [4]:
branch_table = (
"CREATE TABLE IF NOT EXISTS `cdw_sapp_branch` ("
"  `BRANCH_CODE` INT(3) NOT NULL,"
"  `BRANCH_NAME` VARCHAR(50) NULL,"
"  `BRANCH_STREET` VARCHAR(50) NULL,"
"  `BRANCH_CITY` VARCHAR(40) NULL,"
"  `BRANCH_STATE` VARCHAR(2) NULL,"
"  `BRANCH_ZIP` INT(5) ZEROFILL NULL,"  #had to use ZEROFILL because the leading 0s were being dropped
"  `BRANCH_PHONE` VARCHAR(13) NULL,"
"  `LAST_UPDATED` TIMESTAMP NULL,"
"  PRIMARY KEY (`BRANCH_CODE`))"
"ENGINE = InnoDB  ")

In [5]:
# I can't place the below comment because if I break up the string the query won't execute

cc_table = (
"CREATE TABLE IF NOT EXISTS `cdw_sapp_credit_card` ("
"  `TRANSACTION_ID` INT(5) NOT NULL,"
"  `CUST_CC_NO` VARCHAR(16) NULL,"
"  `TIMEID` VARCHAR(8) NULL,"
"  `CUST_SSN` INT(9) NULL,"
"  `BRANCH_CODE` INT(3) NULL,"
"  `TRANSACTION_TYPE` VARCHAR(40) NULL,"
"  `TRANSACTION_VALUE` DOUBLE(7,2) NULL," #had to limit the digits after the decimal because trans #46687 ended up being 5.56000000005
"  PRIMARY KEY (`TRANSACTION_ID`))"
"ENGINE = InnoDB  ")

In [6]:
cust_table = (
"CREATE TABLE IF NOT EXISTS `cdw_sapp_customer` ("
"  `SSN` INT(9) NOT NULL,"
"  `FIRST_NAME` VARCHAR(25) NULL,"
"  `MIDDLE_NAME` VARCHAR(25) NULL,"
"  `LAST_NAME` VARCHAR(25) NULL,"
"  `Credit_card_no` VARCHAR(16) NULL,"
"  `FULL_STREET_ADDRESS` VARCHAR(80) NULL,"
"  `CUST_CITY` VARCHAR(40) NULL,"
"  `CUST_STATE` VARCHAR(2) NULL,"
"  `CUST_COUNTRY` VARCHAR(50) NULL,"
"  `CUST_ZIP` INT(5) UNSIGNED ZEROFILL NULL,"  #had to use ZEROFILL because the leading 0s were being dropped
"  `CUST_PHONE` VARCHAR(15) NULL,"
"  `CUST_EMAIL` VARCHAR(60) NULL,"
"  `LAST_UPDATED` TIMESTAMP NULL,"
"  PRIMARY KEY (`SSN`))"
"ENGINE = InnoDB  ")

Let's load the branch data into the database.

In [7]:
try:
    conn = msql.connect(host = C.host_name, database = 'creditcard_capstone',
                        user = C.user_name, password = C.password)
    if conn.is_connected():
        cursor = conn.cursor()
        cursor.execute("select database();")
        record = cursor.fetchone()
        print("You're connected to database: ", record)

        #lets add the branch data into the db
        cursor.execute('DROP TABLE IF EXISTS cdw_sapp_branch;')
        print('Creating cdw_sapp_branch table....')
        cursor.execute(branch_table)
        print("cdw_sapp_branch table is created....")
        #loop through the data frame
        for i,row in branch_df.iterrows():
            #here %S represents each object in the row, one object per column
            sql = "INSERT INTO creditcard_capstone.cdw_sapp_branch\
                  VALUES (%s,%s,%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql, tuple(row))
            print(f"{i+1} Branch Records inserted")
            # the connection is not auto committed by default so we must commit to save our changes
            conn.commit()
        print("Branch data fulled loaded")

        #lets add the credit card transaction data to the db
        cursor.execute('DROP TABLE IF EXISTS cdw_sapp_credit_card;')
        print('Creating cdw_sapp_credit_card table....')
        cursor.execute(cc_table)
        print("cdw_sapp_credit_card table is created....")
        #loop through the data frame
        for i,row in cc_df.iterrows():
            sql = "INSERT INTO creditcard_capstone.cdw_sapp_credit_card\
                  VALUES (%s,%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql, tuple(row))
            print(f"{i+1} CC Transaction Records inserted")
            conn.commit()
        print("Credit Card transaction data fulled loaded")

        #lets add the customer data to the db
        cursor.execute('DROP TABLE IF EXISTS cdw_sapp_customer;')
        print('Creating cdw_sapp_customer table....')
        cursor.execute(cust_table)
        print("cdw_sapp_customer table is created....")
        #loop through the data frame
        for i,row in cust_df.iterrows(): 
            sql = "INSERT INTO creditcard_capstone.cdw_sapp_customer\
                  VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql, tuple(row))
            print(f"{i+1} Customer Records inserted")
            conn.commit()
        print("Customer data fulled loaded")
except Error as e:
            print("Error while connecting to MySQL", e)

You're connected to database:  ('creditcard_capstone',)
Creating cdw_sapp_branch table....
cdw_sapp_branch table is created....
1 Branch Records inserted
2 Branch Records inserted
3 Branch Records inserted
4 Branch Records inserted
5 Branch Records inserted
6 Branch Records inserted
7 Branch Records inserted
8 Branch Records inserted
9 Branch Records inserted
10 Branch Records inserted
11 Branch Records inserted
12 Branch Records inserted
13 Branch Records inserted
14 Branch Records inserted
15 Branch Records inserted
16 Branch Records inserted
17 Branch Records inserted
18 Branch Records inserted
19 Branch Records inserted
20 Branch Records inserted
21 Branch Records inserted
22 Branch Records inserted
23 Branch Records inserted
24 Branch Records inserted
25 Branch Records inserted
26 Branch Records inserted
27 Branch Records inserted
28 Branch Records inserted
29 Branch Records inserted
30 Branch Records inserted
31 Branch Records inserted
32 Branch Records inserted
33 Branch Records