# Lending Club Case Study

In [43]:
# Importing core libraries required for the case study
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
import seaborn as sea

## Loading Data

In [44]:
# Loading the complete dataset into variable df
df = pd.read_csv('data/loan.csv', low_memory=False)

# Save the original row count in a variable
rowcount = len(df)

## Data Cleaning

In [45]:
# Dropping all the columns whose all the records are NaN or Null
df1 = df.dropna(axis='columns', how="all")

# Dropping all columns with all zero values
df1 = df1.loc[:, (df1 != 0).any(axis=0)]
df1.shape

# Dropping columns who dont contribute to the overall analysis as they are either transactional ir discriptive in nature
df1 = df1.drop(['id','member_id', 'url', 'emp_title', 'desc', 'title'],  axis=1)

In [46]:
# Function to Drop all columns who have constant values (ignoring NA value)
def drop_constant_columns(df):
    for c in df.columns:
        if df[c].nunique(dropna=True) == 1:
            df = df.drop(c, axis=1)
    return df

# Drop all constant columns from df1 (definition of constant is constant value across the rows, this ignores Na values)
df1 = drop_constant_columns(df1)

In [47]:
# Printing column info to analyse missing values, empty values in a column
print(df1.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39717 entries, 0 to 39716
Data columns (total 42 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   loan_amnt                39717 non-null  int64  
 1   funded_amnt              39717 non-null  int64  
 2   funded_amnt_inv          39717 non-null  float64
 3   term                     39717 non-null  object 
 4   int_rate                 39717 non-null  object 
 5   installment              39717 non-null  float64
 6   grade                    39717 non-null  object 
 7   sub_grade                39717 non-null  object 
 8   emp_length               38642 non-null  object 
 9   home_ownership           39717 non-null  object 
 10  annual_inc               39717 non-null  float64
 11  verification_status      39717 non-null  object 
 12  issue_d                  39717 non-null  object 
 13  loan_status              39717 non-null  object 
 14  purpose               

## Saving Data Snapshot

In [48]:
# Save a cleaned snapshot to disk for some manual analysis
df1.to_csv('./data/loan_clensed.csv')