In [2]:
#improting all the required packages
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
np.random.seed(sum(map(ord, "aesthetics")))
%matplotlib inline
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')

In [8]:
#defining the file-directory
fileDir = os.path.dirname(os.path.realpath('__file__'))

In [13]:
#defining the processed data-directory
dataDir = fileDir.strip('analysis') + 'data\\processed_data\\'

# Reading and consolidating the data

In [15]:
#defining data frame for the consolidated loan data
dfLoanData = pd.DataFrame()

In [17]:
#reading loan data stats
for directory, subdirectory, filenames in  os.walk(dataDir + 'loan_data\\'):
    for filename in filenames:
        df = pd.read_csv(os.path.join(directory, filename), encoding = 'ISO-8859-1')
        dfLoanData =  pd.concat([df, dfLoanData], ignore_index=True)

In [25]:
#defining data frame for the consolidated Declined loan data
dfDecLoanData = pd.DataFrame()

In [26]:
#reading Declined loan data stats
for directory, subdirectory, filenames in  os.walk(dataDir + 'declined_loan_data\\'):
    for filename in filenames:
        df = pd.read_csv(os.path.join(directory, filename), encoding = 'ISO-8859-1')
        dfDecLoanData =  pd.concat([df, dfLoanData], ignore_index=True)

# Summarizing data by Sate

In [27]:
#group the acceptd loans by state

#by year and counting the total amount of accepted loans and Interest Rate
seriesCount = dfLoanData['LoanAmt'].groupby(dfLoanData['State']).count()

#by year and Average of Interest Rate
seriesInterest = dfLoanData['IntRate'].groupby(dfLoanData['State']).mean()

#by year and Average of Loan Amount
seriesLA = dfLoanData['LoanAmt'].groupby(dfLoanData['State']).mean()

#by year and mode of term
seriesTerm = dfLoanData['Term'].groupby(dfLoanData['State']).agg(lambda x: x.value_counts().index[0])

#by year and mode of grade
seriesGrade = dfLoanData['Grade'].groupby(dfLoanData['State']).agg(lambda x: x.value_counts().index[0])

#by year and avg of emp_length
seriesEL = dfLoanData['EmpLength'].groupby(dfLoanData['State']).mean()

#combining seriesCount and seriesTotAmt into summary Metrix data frame
columns=['State', 'Accepted Loans', 'Avg Interest Charged', 'Avg Loan Amount', 'Most Likely Term', 'Most Likely Grade', 'Avg EMP Length']
dfSummary = pd.DataFrame({'State':seriesCount.index,'Accepted Loans': seriesCount,'Avg Interest Charged':seriesInterest, 
                          'Avg Loan Amount':seriesLA, 'Most Likely Term':seriesTerm, 'Most Likely Grade':seriesGrade, 'Avg EMP Length':seriesEL})


In [29]:
for col in dfSummary.columns:
    dfSummary[col] = dfSummary[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
        [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]    

dfSummary['text'] = dfSummary['State'] + '<br>' +\
    'Avg Loan Amount ($ USD): '+dfSummary['Avg Loan Amount']+'<br>'+\
    'Number of loans: '+dfSummary['Accepted Loans']+'<br>'+\
    'Most Likely Term: '+dfSummary['Most Likely Term']+'<br>'+\
    'Most Likely Grade: '+dfSummary['Most Likely Grade']+'<br>'+\
    'Avg EMP Length: '+dfSummary['Avg EMP Length']

data = [ dict(
    type='choropleth',
    colorscale = scl,
    autocolorscale = False,
    locations = dfSummary['State'],
    z = dfSummary['Accepted Loans'], 
    locationmode = 'USA-states',
    text = dfSummary['text'],
    marker = dict(
        line = dict (
            color = 'rgb(255,255,255)',
            width = 2
        ) ),
    colorbar = dict(
        title = "Number of loans")
    ) ]

layout = dict(
    title = 'Total number of accepted-loans by state <br> (Hover over state for other metrics)',
    geo = dict(
        scope='usa',
        projection=dict( type='albers usa' ),
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
         )

fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map')

In [30]:
#group declined loans by State

#by Statewise and counting the total number of rejected loans
seriesCount = dfDecLoanData['LoanAmt'].groupby(dfDecLoanData['State']).count()

#by RiskCategories and counting the total of loan-amount
seriesTotAmt = dfDecLoanData['LoanAmt'].groupby(dfDecLoanData['State']).mean()

#combining seriesCount and seriesTotAmt into summary Metrix data frame
columns=['State', 'Rejected Loans', 'Avg Amount Requested']
dfSummary = pd.DataFrame({'State':seriesCount.index,'Rejected Loans': seriesCount,'Avg Amount Requested':seriesTotAmt})

In [31]:
#Summary Chart for State Wise Declined Data Analysis
for col in dfSummary.columns:
    dfSummary[col] = dfSummary[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
        [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]    

dfSummary['text'] = dfSummary['State'] + '<br>' +\
    'Avg Loan Amount ($ USD): '+dfSummary['Avg Amount Requested']+'<br>'+\
    'Number of rejected loans: '+dfSummary['Rejected Loans']

data = [ dict(
    type='choropleth',
    colorscale = scl,
    autocolorscale = False,
    locations = dfSummary['State'],
    z = dfSummary['Rejected Loans'], 
    locationmode = 'USA-states',
    text = dfSummary['text'],
    marker = dict(
        line = dict (
            color = 'rgb(255,255,255)',
            width = 2
        ) ),
    colorbar = dict(
        title = "Number of loans")
    ) ]

layout = dict(
    title = 'Total number of rejected loans by state <br> (Hover over state for other metrics)',
    geo = dict(
        scope='usa',
        projection=dict( type='albers usa' ),
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
         )

fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map')

In [None]:
# from the tutorial, leaving in just for now
for col in df_plot.columns:
    df_plot[col] = df_plot[col].astype(str)

    scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

dfSummary['text'] = dfSummary['State'] + '<br>' +\
    'Avg Loan Amount ($ USD): '+dfSummary['Avg Amount Requested']+'<br>'+\
    'Number of rejected loans: '+dfSummary['Rejected Loans']

data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = True,
        locations = df_plot['code'],
        z = df_plot['Default_Rate'], #.astype(int),
        locationmode = 'USA-states',
        text = df_plot['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "%")
        ) ]

layout = dict(
        title = 'Lending Club Portfolio<br> Default Rate By State <br> (Hover over state for other metrics)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )
    
fig = dict( data=data, layout=layout )
iplot( fig, filename='d3-cloropleth-map' )