# Table of Contents

* [Import all csv files into SQLite database](#Import-all-csv-files-into-SQLite-database)
* [Save the files permanently to the SQLite database](#Save-the-files-permanently-to-the-SQLite-database)

## Import all csv files into SQLite database

In [1]:
import pandas as pd
import numpy as np
from numpy.random import seed
import matplotlib.pyplot as plt
%matplotlib inline  
import statistics
from scipy import stats
from scipy.stats import t
from scipy.stats import norm
import seaborn as sns

In [2]:
# tutorial on creating database using sqlite3
# https://www.sqlitetutorial.net/sqlite-python/
# https://stackoverflow.com/questions/2887878/importing-a-csv-file-into-a-sqlite3-database-table-using-python
# to drop a table in database, example: cur.execute('DROP TABLE IF EXISTS app_sql')
import sqlite3
from sqlite3 import Error
import csv

In [3]:
def create_connection(db_file):
    """ create a database connection to a SQLite database """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print(sqlite3.version)
    except Error as e:
        print(e)
    finally:
        if conn:
            conn.close()
 
#if __name__ == '__main__':
create_connection(r"pythonsqlite.db")

2.6.0


In [4]:
# remember to close the connection at the end of the code
con = sqlite3.connect(r"pythonsqlite.db")
cur = con.cursor()

In [5]:
# define a function that takes in dataframe name df and output the string combining variable name and type to 
# feed in the create table statement in sql
def create_table(df, tablename, cur): 
    """ create a table with proper variable types in the sqlite database """
    keep_dtype = ''
    type_list=[]
    for cols in df.columns:
        if str(df[cols].dtype) == 'int64':
            keep_dtype = 'INTEGER'
        elif str(df[cols].dtype) == 'float64':
            keep_dtype = 'REAL'
        elif str(df[cols].dtype) == 'object':
            keep_dtype = 'TEXT'
        else:
            keep_dtype = str(df[cols].dtype)
            
        type_list.append(cols + ' ' + str(keep_dtype))

    createtbl_string = "CREATE TABLE {} ({});".format(tablename, ', '.join(type_list))
    
    print(createtbl_string)
    try:
        cur.execute(createtbl_string)
        return 'Table {} is created successfully!'.format(tablename)
    except:
        return 'Create Table {} Failed!'.format(tablename)


In [6]:
def insert_values(df, tablename, filename, cur):
    """ insert values into a table in sqlite database """
    insert_string = 'INSERT INTO {} VALUES ({});'.format(tablename, ','.join(list("?"*len(df.columns))))
    
    with open('{}.csv'.format(filename),'r', newline='') as fin: # `with` statement available in 2.5+
        filereader = csv.reader(fin) 
        next(filereader) # skip header
        for row in filereader:
            cur.execute(insert_string, row)

In [7]:
# read in all the data files 
app = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/application_train.csv')
prev = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/previous_application.csv')
bureau = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/bureau.csv')
bureau_bal = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/bureau_balance.csv')
card_bal = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/credit_card_balance.csv')
inst_pay = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/installments_payments.csv')
pos_bal = pd.read_csv('/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/POS_CASH_balance.csv')

In [8]:
create_table(app, 'app_sql', cur)
create_table(prev, 'prev_sql', cur)
create_table(bureau, 'bureau_sql', cur)
create_table(bureau_bal, 'bureau_bal_sql', cur)
create_table(card_bal, 'card_bal_sql', cur)
create_table(inst_pay, 'inst_pay_sql', cur)
create_table(pos_bal, 'pos_bal_sql', cur)

CREATE TABLE app_sql (SK_ID_CURR INTEGER, TARGET INTEGER, NAME_CONTRACT_TYPE TEXT, CODE_GENDER TEXT, FLAG_OWN_CAR TEXT, FLAG_OWN_REALTY TEXT, CNT_CHILDREN INTEGER, AMT_INCOME_TOTAL REAL, AMT_CREDIT REAL, AMT_ANNUITY REAL, AMT_GOODS_PRICE REAL, NAME_TYPE_SUITE TEXT, NAME_INCOME_TYPE TEXT, NAME_EDUCATION_TYPE TEXT, NAME_FAMILY_STATUS TEXT, NAME_HOUSING_TYPE TEXT, REGION_POPULATION_RELATIVE REAL, DAYS_BIRTH INTEGER, DAYS_EMPLOYED INTEGER, DAYS_REGISTRATION REAL, DAYS_ID_PUBLISH INTEGER, OWN_CAR_AGE REAL, FLAG_MOBIL INTEGER, FLAG_EMP_PHONE INTEGER, FLAG_WORK_PHONE INTEGER, FLAG_CONT_MOBILE INTEGER, FLAG_PHONE INTEGER, FLAG_EMAIL INTEGER, OCCUPATION_TYPE TEXT, CNT_FAM_MEMBERS REAL, REGION_RATING_CLIENT INTEGER, REGION_RATING_CLIENT_W_CITY INTEGER, WEEKDAY_APPR_PROCESS_START TEXT, HOUR_APPR_PROCESS_START INTEGER, REG_REGION_NOT_LIVE_REGION INTEGER, REG_REGION_NOT_WORK_REGION INTEGER, LIVE_REGION_NOT_WORK_REGION INTEGER, REG_CITY_NOT_LIVE_CITY INTEGER, REG_CITY_NOT_WORK_CITY INTEGER, LIVE_CIT

'Table pos_bal_sql is created successfully!'

In [9]:
# load data into a sqlite table one by one
insert_values(app, 'app_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/application_train', cur)
insert_values(prev, 'prev_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/previous_application', cur)
insert_values(bureau, 'bureau_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/bureau', cur)
insert_values(bureau_bal, 'bureau_bal_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/bureau_balance', cur)
insert_values(card_bal, 'card_bal_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/credit_card_balance', cur)
insert_values(inst_pay, 'inst_pay_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/installments_payments', cur)
insert_values(pos_bal, 'pos_bal_sql', '/Users/lisalb168/Desktop/Springboard materials/home-credit-default-risk/data/POS_CASH_balance', cur)

## Save the files permanently to the SQLite database

In [10]:
# save changes
con.commit()

In [11]:
# close connection
con.close()