In [3]:
import db_utils
import yaml, pandas as pd, numpy as np, re

with open('credentials.yaml') as file:
    credentials_dict = yaml.safe_load(file)
    
credentials = db_utils.RDSDatabaseConnector(credentials_dict)
loan_payments = credentials.initialise_database()

credentials.save_file(loan_payments)
database = pd.read_csv('new_file.csv', index_col = 'id')

cleaned_data = db_utils.DataTransform(database)

date_data = ['issue_date', 'earliest_credit_line', 'last_payment_date', 'next_payment_date',
             'last_credit_pull_date']

categorical_data = ['member_id', 'term', 'grade', 'sub_grade', 'employment_length', 'home_ownership', 'verification_status', 'loan_status', 
                    'payment_plan', 'purpose', 'policy_code', 'application_type']

database['term'] = [re.sub('\D', '', str(string).replace(' ', '')) for string in database['term']]

non_numeric_data = date_data + categorical_data 
column_headings = database.columns.values.tolist()
numeric_data = [column for column in column_headings if column not in non_numeric_data]

for date_column in date_data:
    database, date_column = cleaned_data.date_data(database, date_column)

for categories in categorical_data:
    database, categories = cleaned_data.change_type(database, categories, 'category')

df_info = db_utils.DataFrameInfo(database)

for column in column_headings:
    null_vals, null_percentage = df_info.missing(database, column)
    if null_percentage != float(0):
        print(f'{column}: {null_vals} null values, {null_percentage}%%\n')

funded_amount: 3007 null values, 23130.76923076923%%

int_rate: 5169 null values, 64612.5%%

employment_length: 2118 null values, 12458.823529411766%%

mths_since_last_delinq: 31002 null values, 140918.18181818182%%

mths_since_last_record: 48050 null values, 218409.0909090909%%

last_payment_date: 73 null values, 429.4117647058823%%

next_payment_date: 32608 null values, 191811.76470588235%%

last_credit_pull_date: 7 null values, 33.33333333333333%%

collections_12_mths_ex_med: 51 null values, 196.15384615384613%%

mths_since_last_major_derog: 46732 null values, 173081.4814814815%%

