In [1]:
import sqlalchemy as sa
import pandas as pd
from IPython.core.display import display, HTML, Markdown
display(HTML("<style>.container { width:95% !important; }</style>"))
import json
import pyodbc

In [2]:
pyodbc.version

'4.0.0-unsupported'

In [3]:
drivers = pyodbc.drivers()

In [4]:
drivers

['ODBC Driver 17 for SQL Server', 'ODBC Driver 13 for SQL Server']

In [5]:
driver = drivers[0]  # Need to pass this as a query param in the connection string below.

In [10]:
with open("../creds/creds.json", "r") as f:
    all_creds = json.load(f)

In [11]:
creds = all_creds["ms-sqlserver"]

In [12]:
locals().update(creds)  # Creating local variables from the JSON entries for this database

In [13]:
connxn_string = f"mssql+pyodbc://{username}:{password}@{host}:{port}/{db}?driver={driver}"

In [14]:
engine = sa.create_engine(
    connxn_string, 
    connect_args={'autocommit':True}  # This is necessary for some reason.
)

In [15]:
# engine.url

In [16]:
insp = sa.inspect(engine)

In [17]:
insp.get_schema_names()

['db_accessadmin',
 'db_backupoperator',
 'db_datareader',
 'db_datawriter',
 'db_ddladmin',
 'db_denydatareader',
 'db_denydatawriter',
 'db_owner',
 'db_securityadmin',
 'dbo',
 'guest',
 'INFORMATION_SCHEMA',
 'sys']

### Note:
The next cell is a bit confusing, but when you set up an AWS RDS instance of MS SQL Server, you don't have the option to specify a default database name. Instead, AWS automatically creates a database called `rdsadmin`. Unfortuantely, you can't create a new schema or a new database while using this database.

You first need to change the database you're using to `master`, *then* create a new database, then change to that database, and then create a new schema.

In [18]:
# with engine.connect() as con:
#     con.execute("USE master;") 
#     con.execute("CREATE DATABASE my_db;")
#     con.execute("USE my_db;")
#     con.execute("CREATE SCHEMA mortgage;")

In [19]:
# If you've already ran the code above, and the db and schema already exist, run the code below to simply change to the correct db:
with engine.connect() as con:
    con.execute("USE my_db;")

In [20]:
engine.table_names("mortgage")

['auto_insurance_claim',
 'auto_insurance_policy',
 'customer',
 'customer_activity',
 'customer_attrition',
 'customer_offer',
 'mortgage_applicant',
 'mortgage_customer',
 'mortgage_default',
 'mortgage_property']

## Load csvs

In [21]:
csvs = """
Auto Insurance Claim.csv
Auto Insurance Policy.csv
Customer Activity.csv
Customer Attrition.csv
Customer Offers.csv
Customer.csv
Mortgage Applicant.csv
Mortgage Customer.csv
Mortgage Default.csv
Mortgage Property.csv
"""

In [23]:
for file in csvs.strip("\n").split("\n"):
    k = file.strip(".csv").lower().replace(" ", "_")
    print(k)
    df = pd.read_csv(f"../cp4d-mortgage-artifacts/{file}", infer_datetime_format=True)
    display(df.head())
    df.to_sql(name=k, 
              con=engine, 
              if_exists="replace", 
              schema="mortgage",
              index=False
#               method="multi",  # Multi-line insert doesn't work for SQL Server
             )

auto_insurance_claim


Unnamed: 0,CUSTOMER,CLAIM_ID,POLICY_ID,FIRST_NOTICE_OF_LOSS_DATE,RESPONSE,CLAIM_REASON,INCIDENT_SUMMARY,CLAIM_AMOUNT,TOTAL_CLAIM_AMOUNT
0,CH85057,5373215598,PM4788594,2016-08-08,No,Other,,1146.399097,540.141566
1,CH85444,8989129787,MT0378990,2016-10-14,No,Collision,,414.571186,1067.333126
2,CH85444,9636701890,OF0128306,2017-01-02,No,Collision,,1067.725385,611.918994
3,CH85444,9177470640,SX8636429,2017-03-12,No,Scratch/Dent,,807.224559,282.458203
4,CH97539,16599104,AE6086634,2017-05-31,Yes,Hail,,387.364705,701.708239


auto_insurance_policy


Unnamed: 0,CUSTOMER,POLICY_ID,COVERAGE,EFFECTIVE_TO_DATE,POLICY_TYPE,POLICY,RENEW_OFFER,SALES_CHANNEL,AGENT_ID,FAMILY_MEMBERS_INCLUDED,...,DENIED_CLAIMS,CLAIMS_FILED,AVG_LENGTH_OF_HANDLING_PROCESS,ADD_DRIVER_INTO_POLICY,ADDRESS_CHANGE_COUNT,MAINTENANCE_COST,AUTO_MODEL_YEAR,AUTO_MAKE,AUTO_MODEL,AUTO_COLOR
0,CD48033,DG0591347,Basic,2019-02-12,Personal Auto,Personal L1,offer1,Branch,-,2,...,0,0,14,0,0,7.3,2017,HONDA,ACCORD,WHITE
1,CF15086,JF3642903,Basic,2019-01-24,Corporate Auto,Corporate L2,offer3,Web,-,2,...,0,7,3,0,0,6.7,2007,NISSAN,MURANO,SILVER
2,CU13711,KZ6551376,Basic,2019-01-16,Personal Auto,Personal L1,offer4,Web,-,3,...,0,8,26,1,1,6.9,2006,CHEVROLET,PICKUP,GRAY
3,CU13711,SB4987297,Basic,2019-01-15,Personal Auto,Personal L3,offer2,Branch,-,1,...,7,15,23,0,5,18.6,2012,TOYOTA,PRIUS,GRAY
4,CU19735,ZN6161626,Basic,2019-01-05,Corporate Auto,Corporate L3,offer2,Agent,Agent-80,1,...,5,16,29,0,3,7.3,2009,HONDA,CIVIC,BLUE


customer_activity


Unnamed: 0,ID,CUSTOMER,TOTAL_DOLLAR_VALUE_TRADED,TOTAL_UNITS_TRADED,LARGEST_SINGLE_TRANSACTION,SMALLEST_SINGLE_TRANSACTION,PERCENT_CHANGE_CALCULATION,DAYS_SINCE_LAST_LOGIN,DAYS_SINCE_LAST_TRADE,NET_REALIZED_GAINS_YTD,NET_REALIZED_LOSSES_YTD
0,100,BA66627,15251.53,70,7625.765,762.5765,17.5,3,6,762.5765,0.0
1,101,BA75404,4580.46,8,1145.115,114.5115,1.2,2,18,0.0,229.023
2,102,BA92673,6360.64,15,1590.16,159.016,2.25,1,12,0.0,159.016
3,103,BB11622,570.06,23,142.515,14.2515,3.45,4,17,0.0,28.503
4,104,BB82067,4910.49,6,1227.6225,122.76225,0.9,3,14,0.0,122.76225


customer_attrition


Unnamed: 0,ID,CHURNRISK
0,5,High
1,6,Medium
2,7,High
3,8,Medium
4,9,Medium


customer_offer


Unnamed: 0,ID,GENDER,STATUS,TOTAL_DOLLAR_VALUE_TRADED,TOTAL_UNITS_TRADED,OFFER_ACCEPTED,OFFER_COST
0,1,F,M,29782.98,45,2 Free Trades,10
1,3,M,S,26132.61,32,5 Free Trades and Consult,50
2,8,M,S,9380.94,38,5 Free Trades,20
3,10,F,M,17441.74,49,2 Free Trades,10
4,24,F,S,21372.14,293,2 Free Trades,10


customer


Unnamed: 0,CUSTOMER,NAME,COUNTRY,LATITUDE,LONGITUDE,STREET_ADDRESS,CITY,STATE,STATE_CODE,ZIP_CODE,...,EDUCATION,EMPLOYMENT_STATUS,INCOME,MARITAL_STATUS,CUSTOMER_LIFETIME_VALUE,NUMBER_OF_POLICIES,NUMBER_OF_CLOSED_COMPLAINTS,NUMBER_OF_COMMUNICATIONS,NUMBER_OF_COMPLAINTS,NUMBER_OF_OPEN_COMPLAINTS
0,AA10041,Rosa Pays,US,37.954863,-121.290404,222 North El Dorado Street,Stockton,California,CA,95202,...,Bachelor,Employed,24964,Married,9421.101961,3,3,8,4,0
1,AA71604,Janine Cockshot,US,33.599728,-111.98813,12602 N Paradise Village Pkwy,Phoenix,Arizona,AZ,85032,...,Master,Employed,87560,Married,2802.621642,1,0,8,1,0
2,AB13432,Tiphanie Paquet,US,26.1805,-97.7209,1002 Dixieland Rd,Harlingen,Texas,TX,78552,...,Bachelor,Unemployed,0,Single,10628.06415,3,0,3,8,4
3,AB21519,Myrvyn Morriss,US,42.010641,-87.829686,15 S. PROSPECT AVE.,Park Ridge,Illinois,IL,60068,...,College,Employed,93272,Married,2705.987629,1,0,2,2,1
4,AB31813,Gayler Haburne,US,38.954005,-77.366611,12197 Sunset Hills Rd,Reston,Virginia,VA,20190,...,High School or Below,Unemployed,0,Married,3801.282266,1,3,6,1,1


mortgage_applicant


Unnamed: 0,ID,NAME,STREET_ADDRESS,CITY,STATE,STATE_CODE,ZIP_CODE,EMAIL_ADDRESS,PHONE_NUMBER,GENDER,SOCIAL_SECURITY_NUMBER,EDUCATION,EMPLOYMENT_STATUS,MARITAL_STATUS
0,100679,Madelle Augie,1420 Beaumont Avenue,Beaumont,California,CA,92223,maugie7s@home.pl,603-982-4172,Female,886-82-6564,High School or Below,Unemployed,Single
1,100537,Rosa Pays,222 North El Dorado Street,Stockton,California,CA,95202,rpaysp8@homestead.com,865-749-5448,Female,419-84-5922,Bachelor,Employed,Married
2,100458,Janine Cockshot,12602 N Paradise Village Pkwy,Phoenix,Arizona,AZ,85032,jcockshotqc@wikimedia.org,808-976-1894,Female,788-59-0419,Master,Employed,Married
3,101432,Tiphanie Paquet,1002 Dixieland Rd,Harlingen,Texas,TX,78552,tpaquet54@gmpg.org,612-256-1393,Female,628-23-7014,Bachelor,Unemployed,Single
4,100599,Myrvyn Morriss,15 S. PROSPECT AVE.,Park Ridge,Illinois,IL,60068,mmorrissbm@wordpress.com,203-751-1286,Male,403-49-3264,College,Employed,Married


mortgage_customer


Unnamed: 0,ID,INCOME,APPLIEDONLINE,RESIDENCE,YRS_AT_CURRENT_ADDRESS,YRS_WITH_CURRENT_EMPLOYER,NUMBER_OF_CARDS,CREDITCARD_DEBT,LOANS,LOAN_AMOUNT
0,100537,45081,YES,Owner Occupier,14,15,2,713,1,8430
1,100458,46645,YES,Owner Occupier,19,4,1,884,0,6045
2,101432,44202,YES,Owner Occupier,1,23,2,2611,0,12915
3,100599,52495,YES,Owner Occupier,18,16,2,2527,1,10375
4,100782,43608,YES,Owner Occupier,2,20,1,452,0,7610


mortgage_default


Unnamed: 0,ID,MORTGAGE_DEFAULT
0,101600,YES
1,101731,NO
2,100548,YES
3,101472,NO
4,100562,YES


mortgage_property


Unnamed: 0,ID,SALEPRICE,LOCATION
0,101472,340000,L100
1,100624,190000,L100
2,100532,190000,L100
3,100403,161000,L100
4,100477,192500,L100


## Check to make sure it worked

In [24]:
for file in csvs.strip("\n").split("\n"):
    k = file.strip(".csv").lower().replace(" ", "_")
    display(Markdown(f"### {k}"))
    df = pd.read_sql_table(k, con=engine, schema="mortgage")
    print(df.shape)
    display(df.head())

### auto_insurance_claim

(1000, 9)


Unnamed: 0,CUSTOMER,CLAIM_ID,POLICY_ID,FIRST_NOTICE_OF_LOSS_DATE,RESPONSE,CLAIM_REASON,INCIDENT_SUMMARY,CLAIM_AMOUNT,TOTAL_CLAIM_AMOUNT
0,CH85057,5373215598,PM4788594,2016-08-08,No,Other,,1146.399097,540.141566
1,CH85444,8989129787,MT0378990,2016-10-14,No,Collision,,414.571186,1067.333126
2,CH85444,9636701890,OF0128306,2017-01-02,No,Collision,,1067.725385,611.918994
3,CH85444,9177470640,SX8636429,2017-03-12,No,Scratch/Dent,,807.224559,282.458203
4,CH97539,16599104,AE6086634,2017-05-31,Yes,Hail,,387.364705,701.708239


### auto_insurance_policy

(915, 24)


Unnamed: 0,CUSTOMER,POLICY_ID,COVERAGE,EFFECTIVE_TO_DATE,POLICY_TYPE,POLICY,RENEW_OFFER,SALES_CHANNEL,AGENT_ID,FAMILY_MEMBERS_INCLUDED,...,DENIED_CLAIMS,CLAIMS_FILED,AVG_LENGTH_OF_HANDLING_PROCESS,ADD_DRIVER_INTO_POLICY,ADDRESS_CHANGE_COUNT,MAINTENANCE_COST,AUTO_MODEL_YEAR,AUTO_MAKE,AUTO_MODEL,AUTO_COLOR
0,CD48033,DG0591347,Basic,2019-02-12,Personal Auto,Personal L1,offer1,Branch,-,2,...,0,0,14,0,0,7.3,2017,HONDA,ACCORD,WHITE
1,CF15086,JF3642903,Basic,2019-01-24,Corporate Auto,Corporate L2,offer3,Web,-,2,...,0,7,3,0,0,6.7,2007,NISSAN,MURANO,SILVER
2,CU13711,KZ6551376,Basic,2019-01-16,Personal Auto,Personal L1,offer4,Web,-,3,...,0,8,26,1,1,6.9,2006,CHEVROLET,PICKUP,GRAY
3,CU13711,SB4987297,Basic,2019-01-15,Personal Auto,Personal L3,offer2,Branch,-,1,...,7,15,23,0,5,18.6,2012,TOYOTA,PRIUS,GRAY
4,CU19735,ZN6161626,Basic,2019-01-05,Corporate Auto,Corporate L3,offer2,Agent,Agent-80,1,...,5,16,29,0,3,7.3,2009,HONDA,CIVIC,BLUE


### customer_activity

(419, 11)


Unnamed: 0,ID,CUSTOMER,TOTAL_DOLLAR_VALUE_TRADED,TOTAL_UNITS_TRADED,LARGEST_SINGLE_TRANSACTION,SMALLEST_SINGLE_TRANSACTION,PERCENT_CHANGE_CALCULATION,DAYS_SINCE_LAST_LOGIN,DAYS_SINCE_LAST_TRADE,NET_REALIZED_GAINS_YTD,NET_REALIZED_LOSSES_YTD
0,100,BA66627,15251.53,70,7625.765,762.5765,17.5,3,6,762.5765,0.0
1,101,BA75404,4580.46,8,1145.115,114.5115,1.2,2,18,0.0,229.023
2,102,BA92673,6360.64,15,1590.16,159.016,2.25,1,12,0.0,159.016
3,103,BB11622,570.06,23,142.515,14.2515,3.45,4,17,0.0,28.503
4,104,BB82067,4910.49,6,1227.6225,122.76225,0.9,3,14,0.0,122.76225


### customer_attrition

(419, 2)


Unnamed: 0,ID,CHURNRISK
0,5,High
1,6,Medium
2,7,High
3,8,Medium
4,9,Medium


### customer_offer

(419, 7)


Unnamed: 0,ID,GENDER,STATUS,TOTAL_DOLLAR_VALUE_TRADED,TOTAL_UNITS_TRADED,OFFER_ACCEPTED,OFFER_COST
0,1,F,M,29782.98,45,2 Free Trades,10
1,3,M,S,26132.61,32,5 Free Trades and Consult,50
2,8,M,S,9380.94,38,5 Free Trades,20
3,10,F,M,17441.74,49,2 Free Trades,10
4,24,F,S,21372.14,293,2 Free Trades,10


### customer

(419, 28)


Unnamed: 0,CUSTOMER,NAME,COUNTRY,LATITUDE,LONGITUDE,STREET_ADDRESS,CITY,STATE,STATE_CODE,ZIP_CODE,...,EDUCATION,EMPLOYMENT_STATUS,INCOME,MARITAL_STATUS,CUSTOMER_LIFETIME_VALUE,NUMBER_OF_POLICIES,NUMBER_OF_CLOSED_COMPLAINTS,NUMBER_OF_COMMUNICATIONS,NUMBER_OF_COMPLAINTS,NUMBER_OF_OPEN_COMPLAINTS
0,AA10041,Rosa Pays,US,37.954863,-121.290404,222 North El Dorado Street,Stockton,California,CA,95202,...,Bachelor,Employed,24964,Married,9421.101961,3,3,8,4,0
1,AA71604,Janine Cockshot,US,33.599728,-111.98813,12602 N Paradise Village Pkwy,Phoenix,Arizona,AZ,85032,...,Master,Employed,87560,Married,2802.621642,1,0,8,1,0
2,AB13432,Tiphanie Paquet,US,26.1805,-97.7209,1002 Dixieland Rd,Harlingen,Texas,TX,78552,...,Bachelor,Unemployed,0,Single,10628.06415,3,0,3,8,4
3,AB21519,Myrvyn Morriss,US,42.010641,-87.829686,15 S. PROSPECT AVE.,Park Ridge,Illinois,IL,60068,...,College,Employed,93272,Married,2705.987629,1,0,2,2,1
4,AB31813,Gayler Haburne,US,38.954005,-77.366611,12197 Sunset Hills Rd,Reston,Virginia,VA,20190,...,High School or Below,Unemployed,0,Married,3801.282266,1,3,6,1,1


### mortgage_applicant

(419, 14)


Unnamed: 0,ID,NAME,STREET_ADDRESS,CITY,STATE,STATE_CODE,ZIP_CODE,EMAIL_ADDRESS,PHONE_NUMBER,GENDER,SOCIAL_SECURITY_NUMBER,EDUCATION,EMPLOYMENT_STATUS,MARITAL_STATUS
0,100679,Madelle Augie,1420 Beaumont Avenue,Beaumont,California,CA,92223,maugie7s@home.pl,603-982-4172,Female,886-82-6564,High School or Below,Unemployed,Single
1,100537,Rosa Pays,222 North El Dorado Street,Stockton,California,CA,95202,rpaysp8@homestead.com,865-749-5448,Female,419-84-5922,Bachelor,Employed,Married
2,100458,Janine Cockshot,12602 N Paradise Village Pkwy,Phoenix,Arizona,AZ,85032,jcockshotqc@wikimedia.org,808-976-1894,Female,788-59-0419,Master,Employed,Married
3,101432,Tiphanie Paquet,1002 Dixieland Rd,Harlingen,Texas,TX,78552,tpaquet54@gmpg.org,612-256-1393,Female,628-23-7014,Bachelor,Unemployed,Single
4,100599,Myrvyn Morriss,15 S. PROSPECT AVE.,Park Ridge,Illinois,IL,60068,mmorrissbm@wordpress.com,203-751-1286,Male,403-49-3264,College,Employed,Married


### mortgage_customer

(419, 10)


Unnamed: 0,ID,INCOME,APPLIEDONLINE,RESIDENCE,YRS_AT_CURRENT_ADDRESS,YRS_WITH_CURRENT_EMPLOYER,NUMBER_OF_CARDS,CREDITCARD_DEBT,LOANS,LOAN_AMOUNT
0,100537,45081,YES,Owner Occupier,14,15,2,713,1,8430
1,100458,46645,YES,Owner Occupier,19,4,1,884,0,6045
2,101432,44202,YES,Owner Occupier,1,23,2,2611,0,12915
3,100599,52495,YES,Owner Occupier,18,16,2,2527,1,10375
4,100782,43608,YES,Owner Occupier,2,20,1,452,0,7610


### mortgage_default

(419, 2)


Unnamed: 0,ID,MORTGAGE_DEFAULT
0,101600,YES
1,101731,NO
2,100548,YES
3,101472,NO
4,100562,YES


### mortgage_property

(419, 3)


Unnamed: 0,ID,SALEPRICE,LOCATION
0,101472,340000,L100
1,100624,190000,L100
2,100532,190000,L100
3,100403,161000,L100
4,100477,192500,L100
