In [1]:
from snowflake.snowpark import Session

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')
import configparser

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-s8d05z2d because the default path (/home/mosaic-ai/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


# Code to establish connection and read data from Snowflake

In [3]:
config = configparser.ConfigParser()
config.read("snowflake_connection.ini")

['snowflake_connection.ini']

In [4]:
connection_parameters = {
    "user": f'{config["Snowflake"]["user"]}',
    "password": f'{config["Snowflake"]["password"]}',
    #"password": os.getenv('snowflake_password'),
    "account": f'{config["Snowflake"]["account"]}',
    #"account": os.getenv('snowflake_account'),
    "WAREHOUSE": f'{config["Snowflake"]["WAREHOUSE"]}',
    "DATABASE": f'{config["Snowflake"]["DATABASE"]}',
    "SCHEMA": f'{config["Snowflake"]["SCHEMA"]}'
}

In [5]:
def snowflake_connector(conn):
    try:
        session = Session.builder.configs(conn).create()
        print("connection successful!")
    except:
        raise ValueError("error while connecting with db")
    return session

session = snowflake_connector(connection_parameters)

connection successful!


In [7]:
application_train_sf  = session.table("CRA_APPLICATION_TRAIN_DETAILS")
application_test_sf  = session.table("CRA_APPLICATION_TEST_DETAILS")
bureau_sf  = session.table("CRA_BUREAU_DETAILS")
bureau_balance_sf  = session.table("CRA_BUREAU_BALANCE_DETAILS")
credit_card_balance_sf  = session.table("CRA_CREDIT_CARD_BALANCE_DETAILS")
installments_payments_sf  = session.table("CRA_INSTALLMENTS_PAYMENTS_DETAILS")
previous_application_sf  = session.table("CRA_PREVIOUS_APPLICATION_DETAILS")
POS_CASH_balance_sf  = session.table("CRA_POS_CASH_BALANCE_DETAILS")

# Convert Snowflake data into Pandas dataframes

In [8]:
application_train = application_train_sf.to_pandas()

In [9]:
application_test = application_test_sf.to_pandas()

In [10]:
bureau = bureau_sf.to_pandas()

In [11]:
bureau_balance = bureau_balance_sf.to_pandas()

In [12]:
credit_card_balance = credit_card_balance_sf.to_pandas()

In [13]:
installments_payments = installments_payments_sf.to_pandas()

In [14]:
previous_application = previous_application_sf.to_pandas()

In [15]:
POS_CASH_balance = POS_CASH_balance_sf.to_pandas()

# Glimpse of Rows and Columns of each dataset

In [16]:
print("application_train -  rows:",application_train.shape[0]," columns:", application_train.shape[1])
print("application_test -  rows:",application_test.shape[0]," columns:", application_test.shape[1])
print("bureau -  rows:",bureau.shape[0]," columns:", bureau.shape[1])
print("bureau_balance -  rows:",bureau_balance.shape[0]," columns:", bureau_balance.shape[1])
print("credit_card_balance -  rows:",credit_card_balance.shape[0]," columns:", credit_card_balance.shape[1])
print("installments_payments -  rows:",installments_payments.shape[0]," columns:", installments_payments.shape[1])
print("previous_application -  rows:",previous_application.shape[0]," columns:", previous_application.shape[1])
print("POS_CASH_balance -  rows:",POS_CASH_balance.shape[0]," columns:", POS_CASH_balance.shape[1])

application_train -  rows: 307511  columns: 124
application_test -  rows: 48744  columns: 123
bureau -  rows: 1716428  columns: 19
bureau_balance -  rows: 27299925  columns: 5
credit_card_balance -  rows: 3840312  columns: 25
installments_payments -  rows: 13605401  columns: 10
previous_application -  rows: 1670214  columns: 39
POS_CASH_balance -  rows: 10001358  columns: 10


# Sample Data of each dataset

In [17]:
application_train.head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,CREATED_BY,CREATED_AT
0,307660,0,Cash loans,M,Y,Y,0,180000.0,1288350.0,41692.5,...,0,0,0.0,0.0,0.0,0.0,1.0,2.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
1,307662,0,Cash loans,F,N,Y,1,90000.0,93829.5,9981.0,...,0,0,0.0,0.0,0.0,0.0,0.0,4.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
2,307663,0,Cash loans,F,N,Y,0,360000.0,2517300.0,66402.0,...,0,0,,,,,,,manishh.singh@fosfor.com,2024-05-03 06:30:22
3,307664,0,Revolving loans,F,N,N,0,90000.0,270000.0,13500.0,...,0,0,0.0,0.0,0.0,0.0,0.0,1.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
4,307665,0,Revolving loans,M,Y,Y,0,225000.0,540000.0,27000.0,...,0,0,0.0,0.0,0.0,0.0,2.0,7.0,manishh.singh@fosfor.com,2024-05-03 06:30:22


In [18]:
application_test.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,...,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,CREATED_BY,CREATED_AT
0,100001,Cash loans,F,N,Y,0,135000.0,568800.0,20560.5,450000.0,...,0,0,0.0,0.0,0.0,0.0,0.0,0.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
1,100005,Cash loans,M,N,Y,0,99000.0,222768.0,17370.0,180000.0,...,0,0,0.0,0.0,0.0,0.0,0.0,3.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
2,100013,Cash loans,M,Y,Y,0,202500.0,663264.0,69777.0,630000.0,...,0,0,0.0,0.0,0.0,0.0,1.0,4.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
3,100028,Cash loans,F,N,Y,2,315000.0,1575000.0,49018.5,1575000.0,...,0,0,0.0,0.0,0.0,0.0,0.0,3.0,manishh.singh@fosfor.com,2024-05-03 06:30:22
4,100038,Cash loans,M,Y,N,1,180000.0,625500.0,32067.0,625500.0,...,0,0,,,,,,,manishh.singh@fosfor.com,2024-05-03 06:30:22
