# Extraction

In [21]:
import requests
import pandas as pd
#from pyspark.sql import SparkSession
import mysql.connector as msql
from mysql.connector import Error
# This file contains our login information for the MySQL server
import credentials as C

In [19]:
# store API url
url = 'https://raw.githubusercontent.com/platformps/LoanDataset/main/loan_data.json'

# assign the headers- not always necessary, but something we have to do with the GitHub API
headers = {'Accept': 'application/vnd.github.v3+json'}

# assign the requests method
r = requests.get(url, headers=headers)

# print a status update for the requests command
print(f"Status code: {r.status_code}")

# store API response to variable
api_results = r.json()
loan_data_df = pd.DataFrame(api_results)
print("Dataframe succefully created")

Status code: 200
Dataframe succefully created


In [25]:
loan_data_df.head()

Unnamed: 0,Application_ID,Gender,Married,Dependents,Education,Self_Employed,Credit_History,Property_Area,Income,Application_Status
0,LP001002,Male,No,0,Graduate,No,1,Urban,medium,Y
1,LP001003,Male,Yes,1,Graduate,No,1,Rural,medium,N
2,LP001005,Male,Yes,0,Graduate,Yes,1,Urban,low,Y
3,LP001006,Male,Yes,0,Not Graduate,No,1,Urban,low,Y
4,LP001008,Male,No,0,Graduate,No,1,Urban,medium,Y


Lets see what the unique values are in each of the dataframe columns

In [28]:
for column in loan_data_df.columns:
    # skip the first column since that column is all unique
    if column == 'Application_ID':
        continue
    unique_values = loan_data_df[column].unique()
    print(f'Column: {column}')
    print(unique_values)
    print()

Column: Gender
['Male' 'Female']

Column: Married
['No' 'Yes']

Column: Dependents
['0' '1' '2' '3+']

Column: Education
['Graduate' 'Not Graduate']

Column: Self_Employed
['No' 'Yes']

Column: Credit_History
[1 0]

Column: Property_Area
['Urban' 'Rural' 'Semiurban']

Column: Income
['medium' 'low' 'high']

Column: Application_Status
['Y' 'N']



Let's connect to the database.

In [23]:
try:
    conn = msql.connect(host = C.host_name, database = 'creditcard_capstone',
                        user = C.user_name, password = C.password)
    if conn.is_connected():
        cursor = conn.cursor()
        cursor.execute("select database();")
        record = cursor.fetchone()
        print("You're connected to database: ", record)
except Error as e:
    print('Error while connecting to MySQL',e)

You're connected to database:  ('creditcard_capstone',)


Let's create the sql query to create the table.

In [29]:
loan_table = (
"CREATE TABLE IF NOT EXISTS `cdw_sapp_loan_application` ("
"  `ID` CHAR(8) NOT NULL,"
"  `Gender` VARCHAR(6) NULL,"
"  `Married` VARCHAR(3) NULL,"
"  `Dependents` VARCHAR(2) NULL,"
"  `Education` VARCHAR(12) NULL,"
"  `Self_Employed` VARCHAR(3) NULL," 
"  `Credit_History` VARCHAR(1) NULL,"
"  `Property_Area` VARCHAR(10) NULL,"
"  `Income` VARCHAR(8) NULL,"
"  `Application_Status` VARCHAR(1) NULL,"
"  PRIMARY KEY (`ID`))"
"ENGINE = InnoDB  ")

In [30]:
try:
    if conn.is_connected():
        #lets add the loan into the db
        cursor.execute('DROP TABLE IF EXISTS cdw_sapp_loan_application;')
        print('Creating cdw_sapp_loan_application table....')
        cursor.execute(loan_table)
        print("cdw_sapp_branch table is created....")
        #loop through the data frame
        for i,row in loan_data_df.iterrows():
            #here %S represents each object in the row, one object per column
            sql = "INSERT INTO creditcard_capstone.cdw_sapp_loan_application\
                  VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql, tuple(row))
            print(f"{i+1} Loan Application Records inserted")
            # the connection is not auto committed by default so we must commit to save our changes
            conn.commit()
        print("Loan Application data fulled loaded")
except Error as e:
    print('Error while connecting to MySQL',e)

Creating cdw_sapp_loan_application table....
cdw_sapp_branch table is created....
1 Loan Application Records inserted
2 Loan Application Records inserted
3 Loan Application Records inserted
4 Loan Application Records inserted
5 Loan Application Records inserted
6 Loan Application Records inserted
7 Loan Application Records inserted
8 Loan Application Records inserted
9 Loan Application Records inserted
10 Loan Application Records inserted
11 Loan Application Records inserted
12 Loan Application Records inserted
13 Loan Application Records inserted
14 Loan Application Records inserted
15 Loan Application Records inserted
16 Loan Application Records inserted
17 Loan Application Records inserted
18 Loan Application Records inserted
19 Loan Application Records inserted
20 Loan Application Records inserted
21 Loan Application Records inserted
22 Loan Application Records inserted
23 Loan Application Records inserted
24 Loan Application Records inserted
25 Loan Application Records inserted
26