General analysis of the condensed version on loans under $150k which is the dataset including gender, race and military status.

 

In [268]:
#import dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import date, datetime
import os, sys 
import io
import ee


In [269]:
## set up pandas dataframe using set_options to warn when we are working on a copy instead of ooriginal dataframe
pd.set_option('mode.chained_assignment','warn')

In [270]:
# dataframe will display without being truncated
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_columns', 25)
pd.set_option('display.width', 768)

In [271]:
#mounting the google drive to access the files
from google.colab import files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [272]:
#import csv that was cleaned up in cat_desc_normalize.ipynb
csv_path = "/content/drive/My Drive/p_cat_clean.csv"
path = "/content/drive/My Drive/all_loans_under_150k_max_info.csv"


In [273]:
#list of columns
cols=list(pd.read_csv(path, nrows=1))
cols


['Loan Amount',
 'City',
 'State',
 'Zip',
 'NAICS Code',
 'Business Type',
 'Race',
 'Gender',
 'Veteran',
 'Jobs Saved',
 'Date',
 'Bank',
 'NAICS Category',
 'Title',
 'Description',
 'description']

In [274]:
# backup of clean dataframe, types will not be fully preserved so on import reassign, but easier with unconformity removed
df = pd.read_csv(path, dtype={'Jobs Saved':int, 'NAICS Code': str, 'NAICS Category': str, 'Zip': str, }, parse_dates=[10] )
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description
1688268,83087.0,PROVINCETOWN,MA,2657,51112,Corporation,Unanswered,Unanswered,Unanswered,7,2020-06-03,TD Bank,51,Periodical PublishersT,See industry description for 511120.,Information


In [275]:
df.shape

(4093088, 16)

In [276]:
#found some weird characters
df['Bank']=df['Bank'].str.lstrip('\t')

In [277]:
#types are correct
df.dtypes

Loan Amount              float64
City                      object
State                     object
Zip                       object
NAICS Code                object
Business Type             object
Race                      object
Gender                    object
Veteran                   object
Jobs Saved                 int64
Date              datetime64[ns]
Bank                      object
NAICS Category            object
Title                     object
Description               object
description               object
dtype: object

General nationwide loan analysis


In [278]:
#check general statistics

nat_loan_stats=df['Loan Amount'].describe()
nat_loan_stats

count    4.093088e+06
mean     3.375040e+04
std      3.352086e+04
min     -1.996590e+05
25%      9.882000e+03
50%      2.083200e+04
75%      4.650000e+04
max      1.500000e+05
Name: Loan Amount, dtype: float64

In [279]:
#count of loans
national_loan_count = df['Loan Amount'].count()
national_loan_count

4093088

In [280]:
#average loan amount
national_loan_avg=df['Loan Amount'].mean()
national_loan_avg

33750.39600392841

In [281]:
#median national loan
national_loan_med=df['Loan Amount'].median()
national_loan_med

20832.0

In [282]:
#sum of all loans
national_loan_sum=df['Loan Amount'].sum()
national_loan_sum

138143340878.9052

In [283]:
#number of jobs saved
nat_jobs_saved=df['Jobs Saved'].sum()
nat_jobs_saved

16447792

In [284]:
#sum of loans
national_loan_sum=df['Loan Amount'].sum()
national_loan_sum

138143340878.9052

In [285]:
#difference between national average and median 
nat_pct_dif=national_loan_avg/national_loan_med * 100
nat_pct_dif

162.01226960411103

In [286]:
# loan per job
nat_ln_per_job = national_loan_sum / nat_jobs_saved
print(nat_ln_per_job)

8398.898823556694


In [287]:
#summary table of stats
nat_summary = pd.DataFrame({
    'Loan Avg': national_loan_avg,
    'Loan Median': national_loan_med,
    'Loan Sum': national_loan_sum,
    'Loan Count': national_loan_count,
    'Jobs Saved': nat_jobs_saved,
    'Avg over Median': nat_pct_dif,
    'Money per Job' : nat_ln_per_job,
},index=[0])
nat_summary.head(3)

Unnamed: 0,Loan Avg,Loan Median,Loan Sum,Loan Count,Jobs Saved,Avg over Median,Money per Job
0,33750.396004,20832.0,138143300000.0,4093088,16447792,162.01227,8398.898824


# Summary Table
`- Loan Avg : $33,750 `
`- Median Loan : $20,832 `
`- Loan Sum: $138,143,340,878 `
`- Loan Count: 4,093,088`
`- Jobs Saved: 16,447,792`
`- Avg loan amount over Median: 162%`
`- Money per Job: $8,398`





In [288]:
nat_summary['Loan Avg'] = nat_summary.loc[:,'Loan Avg'].map("${:,.2f}".format)
nat_summary['Loan Median'] = nat_summary.loc[:,'Loan Median'].map("${:,.2f}".format)
nat_summary['Loan Sum'] = nat_summary.loc[:,'Loan Sum'].map("{:,.2f}".format)
nat_summary['Loan Count'] = nat_summary.loc[:,'Loan Count'].map("{:,.2f}".format)
nat_summary['Jobs Saved'] = nat_summary.loc[:,'Jobs Saved'].map("{:,}".format)
nat_summary['Avg over Median'] = nat_summary.loc[:,'Avg over Median'].map("{:,.2f}%".format)
nat_summary['Money per Job'] = nat_summary.loc[:,'Money per Job'].map("${:,.2f}".format)
nat_summary


Unnamed: 0,Loan Avg,Loan Median,Loan Sum,Loan Count,Jobs Saved,Avg over Median,Money per Job
0,"$33,750.40","$20,832.00",138143340878.91,4093088.0,16447792,162.01%,"$8,398.90"


Lender overview


In [289]:
#who are the top lenders by total sum of loans issued? Creating a summary table that can be exported to csv for analysis in SQL/ Tableau later
top_bank_sums=df.groupby('Bank').sum()['Loan Amount']
top_bank_sums

Bank
121 Financial CU                     14437340.00
1880 Bank                             9598125.73
1st Advantage Bank                    4975444.64
1st Advantage FCU                     1649515.50
1st Bank                               419584.00
                                        ...     
cPort CU                              2645147.26
iTHINK Financial CU                  36972350.00
immito                                6929575.00
mBank                                44163510.00
the Farmers State Bank of Bucklin      424202.84
Name: Loan Amount, Length: 4805, dtype: float64

In [290]:
#top banks by median loan amount

top_bank_median=df.groupby('Bank').median()['Loan Amount']
top_bank_median

Bank
121 Financial CU                     20750.0
1880 Bank                            31457.5
1st Advantage Bank                   36562.5
1st Advantage FCU                    14782.7
1st Bank                              8107.0
                                      ...   
cPort CU                              8755.0
iTHINK Financial CU                  16800.0
immito                               20800.0
mBank                                27100.0
the Farmers State Bank of Bucklin    11192.5
Name: Loan Amount, Length: 4805, dtype: float64

In [291]:
#banks highest average loan
top_bank_average=df.groupby('Bank').mean()['Loan Amount']
top_bank_average

Bank
121 Financial CU                     33113.165138
1880 Bank                            41371.231595
1st Advantage Bank                   50257.016566
1st Advantage FCU                    22290.750000
1st Bank                             18242.782609
                                         ...     
cPort CU                             15378.763140
iTHINK Financial CU                  29089.181747
immito                               32686.674528
mBank                                41980.522814
the Farmers State Bank of Bucklin    13256.338750
Name: Loan Amount, Length: 4805, dtype: float64

In [292]:
#bank summary table
banks_summary_df=pd.DataFrame({
    'Loan Sum' : top_bank_sums,
    'Average Loan' : top_bank_average,
    'Median Loan' : top_bank_median
  })
banks_summary_df.head()


Unnamed: 0_level_0,Loan Sum,Average Loan,Median Loan
Bank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
121 Financial CU,14437340.0,33113.165138,20750.0
1880 Bank,9598125.73,41371.231595,31457.5
1st Advantage Bank,4975444.64,50257.016566,36562.5
1st Advantage FCU,1649515.5,22290.75,14782.7
1st Bank,419584.0,18242.782609,8107.0


In [293]:
#formatting
banks_summary_df['Loan Sum'] = banks_summary_df.loc[:,'Loan Sum'].map("${:,.2f}".format)
banks_summary_df['Average Loan'] = banks_summary_df.loc[:,'Average Loan'].map("${:,.2f}".format)
banks_summary_df['Median Loan'] = banks_summary_df.loc[:,'Median Loan'].map("${:,.2f}".format)
banks_summary_df.sample(30)


Unnamed: 0_level_0,Loan Sum,Average Loan,Median Loan
Bank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Caribe FCU,"$379,208.67","$13,076.16","$8,571.87"
The Bank of Marion,"$10,279,667.65","$24,130.68","$14,825.00"
Merchants & Farmers Bank,"$66,559.99","$3,915.29","$3,819.80"
State Bank of Table Rock,"$1,996,518.00","$14,680.28","$7,009.68"
Texas Heritage Bank,"$9,949,895.99","$38,268.83","$20,800.00"
Vast Bank,"$13,109,374.77","$32,773.44","$20,833.00"
Clackamas County Bank,"$5,024,776.01","$43,317.03","$35,244.05"
SmartBank,"$88,526,396.23","$36,855.29","$21,400.00"
The Millyard Bank,"$6,857,200.00","$42,857.50","$29,250.00"
Belco Community CU,"$2,830,943.71","$37,249.26","$22,726.97"


In [294]:
#csv of bank summary
banks_summary_df.to_csv('banks_summary_df.csv', index=True)
!cp banks_summary_df.csv "drive/My Drive/"

In [295]:
#some of the top institutions by state that considering they are regional have a relatively large percentage of the loans
# for example OH - Huntington bank did 3.3% of the loan count for this dataset Mississippi
top_bks_state=df.groupby('State')['Bank'].value_counts().nlargest(15)
top_bks_state

State  Bank                        
CA     Bank of America                 77027
FL     Bank of America                 48095
CA     Wells Fargo Bank                47562
NY     JPMorgan Chase Bank             42576
CA     JPMorgan Chase Bank             39719
       Cross River Bank                26682
       Celtic Bank Corporation         24851
       Customers Bank                  24215
FL     Kabbage                         24201
       Wells Fargo Bank                23432
TX     Bank of America                 22991
       JPMorgan Chase Bank             22272
CA     U.S. Bank                       22009
PR     Banco Popular de Puerto Rico    20200
FL     Celtic Bank Corporation         18113
Name: Bank, dtype: int64

In [296]:
#send top bank summary to csv
top_bks_state.to_csv('top_bks_state.csv', index=True)
!cp top_bks_state.csv "drive/My Drive/"

Business Category overview

In [297]:
#sum of loans by category
loans_cat=df.groupby('description')['Loan Amount'].sum()
loans_cat.head()

description
Accommodation and Food Services             1.359828e+10
Agriculture Forestry Fishing and Hunting    3.255056e+09
Arts Entertainment and Recreation           3.011095e+09
Construction                                1.436251e+10
Educational Services                        2.015011e+09
Name: Loan Amount, dtype: float64

In [298]:
#sum of jobs saved by category
jobs_cat=df.groupby('description')['Jobs Saved'].sum()
jobs_cat.head()

description
Accommodation and Food Services             2754502
Agriculture Forestry Fishing and Hunting     393149
Arts Entertainment and Recreation            511966
Construction                                1407955
Educational Services                         314071
Name: Jobs Saved, dtype: int64

In [299]:
#median loan by category
median_cat=df.groupby('description')['Loan Amount'].median()
median_cat.head()

description
Accommodation and Food Services             32118.0
Agriculture Forestry Fishing and Hunting    15416.0
Arts Entertainment and Recreation           17050.0
Construction                                23750.0
Educational Services                        17435.0
Name: Loan Amount, dtype: float64

In [300]:
#average loan by category 
mean_cat=df.groupby('description')['Loan Amount'].mean()
mean_cat.head()

description
Accommodation and Food Services             44024.330917
Agriculture Forestry Fishing and Hunting    25161.800218
Arts Entertainment and Recreation           27790.187468
Construction                                37777.101309
Educational Services                        29875.476607
Name: Loan Amount, dtype: float64

In [301]:
#summary category table
category_summary_df=pd.DataFrame({
    'Jobs Saved' : jobs_cat,
    'Median Loan' : median_cat,
    'Average Loan' : mean_cat,
    'Total Loans': loans_cat,
  
})
category_summary_df.head()

Unnamed: 0_level_0,Jobs Saved,Median Loan,Average Loan,Total Loans
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accommodation and Food Services,2754502,32118.0,44024.330917,13598280000.0
Agriculture Forestry Fishing and Hunting,393149,15416.0,25161.800218,3255056000.0
Arts Entertainment and Recreation,511966,17050.0,27790.187468,3011095000.0
Construction,1407955,23750.0,37777.101309,14362510000.0
Educational Services,314071,17435.0,29875.476607,2015011000.0


No surprise - Hospitality industry seemed to take the biggest hit with Construction and Health Care loans the next highest number of jobs saved. Seemingly unexpected are Professional and Technical Services and Waste Management which may deserve a "look under the hood".

In [302]:
#formatting
category_summary_df['Jobs Saved'] = category_summary_df.loc[:,'Jobs Saved'].map("{:,}".format)
category_summary_df['Median Loan'] = category_summary_df.loc[:,'Median Loan'].map("${:,.2f}".format)
category_summary_df['Average Loan'] = category_summary_df.loc[:,'Average Loan'].map("${:,.2f}".format)
category_summary_df['Total Loans'] = category_summary_df.loc[:,'Total Loans'].map("${:,.2f}".format)
category_summary_df.head(30)

Unnamed: 0_level_0,Jobs Saved,Median Loan,Average Loan,Total Loans
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accommodation and Food Services,2754502,"$32,118.00","$44,024.33","$13,598,279,357.97"
Agriculture Forestry Fishing and Hunting,393149,"$15,416.00","$25,161.80","$3,255,056,285.20"
Arts Entertainment and Recreation,511966,"$17,050.00","$27,790.19","$3,011,094,602.34"
Construction,1407955,"$23,750.00","$37,777.10","$14,362,513,923.89"
Educational Services,314071,"$17,435.00","$29,875.48","$2,015,011,270.73"
Finance and Insurance,414021,"$20,832.50","$31,330.23","$4,851,674,289.75"
Health Care and Social Assistance,2022888,"$29,626.50","$42,291.92","$17,851,164,400.12"
Information,176588,"$20,800.00","$32,883.62","$1,892,287,772.44"
Management of Companies and Enterprises,28959,"$23,149.00","$37,915.48","$274,318,529.65"
Manufacturing food related,193273,"$24,400.00","$38,122.65","$1,356,289,607.66"


In [303]:
#save to csv and will convert to a table in SQL / Tableau for further analysis 
category_summary_df.to_csv('category_summary.csv', index=True)
!cp category_summary.csv "drive/My Drive/"

Adding columns for day of the week and percentiles



In [304]:
# df['Loan PCL']=pd.qcut(df['Loan Amount'], q=10)

bin_labels_10= ['10%', '20%', '30%', '40%', '50%', '60%', '70%', '80%', '90%' ]
df['Loan Percentile'] = pd.qcut(df['Loan Amount'],
                              q=[0,  .10, .20, .30, .40, .50, .60, .70, .80, 1],
                              labels=bin_labels_10)
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile
2869096,23162.65,BELLAIRE,TX,77401,11111,Subchapter S Corporation,Unanswered,Unanswered,Unanswered,1,2020-04-27,Allegiance Bank,11,Soybean FarmingT,See industry description for 111110.,Agriculture Forestry Fishing and Hunting,60%


In [305]:
#binning quartiles
bin_labels_4 = ['25%', '45%', '50%', '75%', ]
df['Loan Quartile'] = pd.qcut(df['Loan Amount'],
                              q=[0, .25, .50, .75, 1],
                              labels=bin_labels_4)
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile
3543880,21600.0,COQUILLE,OR,97423,333243,Corporation,Unanswered,Unanswered,Unanswered,1,2020-05-11,Banner Bank,33,"Sawmill, Woodworking, and Paper Machinery Manu...",This U.S. industry comprises establishments pr...,Manufacturing hard materials,60%,50%


In [306]:
# loan sum by state
sum_by_state=df.groupby('State',)['Loan Amount'].sum() 
sum_by_state.nlargest(8)

State
CA    1.705610e+10
TX    1.113183e+10
FL    1.049829e+10
NY    9.019767e+09
IL    5.358123e+09
PA    4.886378e+09
GA    4.331550e+09
NJ    4.168571e+09
Name: Loan Amount, dtype: float64

In [307]:
# day of the week
df['DayNUM']= pd.to_datetime(df['Date']).dt.dayofweek
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile,DayNUM
1011077,11700.0,WALDOBORO,ME,4572,722511,Subchapter S Corporation,Unanswered,Unanswered,Unanswered,1,2020-05-19,First National Bank,72,Full-Service Restaurants,This U.S. industry comprises establishments pr...,Accommodation and Food Services,30%,45%,1


In [308]:
# monday most common day for loan issuance
df['DayNUM'].value_counts()

1    886508
2    760776
3    626077
0    607724
4    553053
6    468617
5    190333
Name: DayNUM, dtype: int64

The 90th percentile of loans account for 20% of total in terms of loan amount


In [309]:
df['Loan Quartile'].value_counts()

45%    1054786
25%    1023298
75%    1023086
50%     991918
Name: Loan Quartile, dtype: int64

In [310]:
# replace numerical representation with day name
day_labels= {'0': 'Mon', '1':'Tues', '2' : 'Wed', '3':'Thurs', '4': 'Fri', '5':'Sat', '6' :'Sun'}
df['Weekday'] = df['DayNUM'].astype(str).replace(day_labels)
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile,DayNUM,Weekday
229396,10000.0,SPRINGFIELD,IL,62701,722515,Limited Liability Company(LLC),Unanswered,Unanswered,Unanswered,1,2020-04-14,INB,72,Snack and Nonalcoholic Beverage Bars,This U.S. industry comprises establishments pr...,Accommodation and Food Services,30%,45%,1,Tues


In [311]:
# shorten names of business types and combine some that are similar such as joint venture and partnership
df['Business Type'] = df['Business Type'].replace({'Limited  Liability Company(LLC)':'LLC', 
                                                   'Subchapter S Corporation': 'S Corp', 
                                                   'Self-Employed Individuals':'Self Employed', 
                                                   'Independent Contractors' : 'Self Employed',
                                                   'Non-Profit Organization': 'Non-Profit',
                                                   'Limited Liability Partnership': 'Partnership',
                                                   'Joint Venture':'Partnership'
                                                   })
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile,DayNUM,Weekday
2755893,86300.0,AUSTIN,TX,78746,52392,LLC,Unanswered,Unanswered,Unanswered,5,2020-04-05,Horizon Bank,52,Portfolio Management,See industry description for 523920.,Finance and Insurance,90%,75%,6,Sun


In [312]:
# business types by count
df['Business Type'].value_counts()

LLC                                    1201328
Corporation                            1109556
Sole Proprietorship                     676932
S Corp                                  575375
Self Employed                           285943
Non-Profit                              135428
Partnership                              82008
Professional Association                 18374
Cooperative                               4442
Non-Profit Childcare Center               1949
Trust                                      910
Tenant in Common                           495
Employee Stock Ownership Plan(ESOP)        329
Rollover as Business Start-Ups (ROB         19
Name: Business Type, dtype: int64

Gender, race and military status explored

In [313]:
df['Gender'].value_counts()

Unanswered      3206594
Male Owned       671909
Female Owned     214585
Name: Gender, dtype: int64

In [314]:
# gender value shortened 
df['Gender'] = df['Gender'].replace({'Male Owned':'Male', 'Female Owned': 'Female', })
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile,DayNUM,Weekday
1359932,41070.0,MILLSTONE TOWNSHIP,NJ,8535,722515,LLC,White,Male,Non-Veteran,15,2020-04-13,TD Bank,72,Snack and Nonalcoholic Beverage Bars,This U.S. industry comprises establishments pr...,Accommodation and Food Services,80%,50%,0,Mon


In [315]:
#value counts of race
df['Race'].value_counts()

Unanswered                          3666880
White                                326861
Asian                                 49795
Hispanic                              33288
Black or African American             13922
American Indian or Alaska Native       2208
Puerto Rican                            125
Multi Group                               6
Eskimo & Aleut                            3
Name: Race, dtype: int64

In [316]:
#shorten race values
df['Race'] = df['Race'].replace({'Black or African American':'Black', 'American Indian or Alaska Native': 'Native American', 'Eskimo & Aleut':'Eskimo'})
df.sample()

Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile,DayNUM,Weekday
3090417,89000.0,SUBLETTE,KS,67877,11115,Partnership,White,Male,Non-Veteran,11,2020-04-28,Centera Bank,11,Corn FarmingT,See industry description for 111150.,Agriculture Forestry Fishing and Hunting,90%,75%,1,Tues


In [317]:
#send dataframe for all values to csv (additional columns and null values cleaned)
df.to_csv('full_under_150k_data.csv', index=True)
!cp full_monty_peeps.csv "drive/My Drive/"

In [318]:
#loan quartiles
loan_qt = np.percentile(df['Loan Amount'], [25, 50, 75, 100])
loan_qt


array([  9882.  ,  20832.  ,  46500.  , 149999.98])

In [319]:
# top 10 percent of loans
high_pct = df[(df["Loan Percentile"] == '90%')]

high_pct.head(1)


Unnamed: 0,Loan Amount,City,State,Zip,NAICS Code,Business Type,Race,Gender,Veteran,Jobs Saved,Date,Bank,NAICS Category,Title,Description,description,Loan Percentile,Loan Quartile,DayNUM,Weekday
0,149961.0,KANSAS CITY,MO,64108,54199,LLC,Unanswered,Unanswered,Unanswered,13,2020-04-13,Country Club Bank,54,"All Other Professional, Scientific, and Techni...",See industry description for 541990.,Professional Scientific and Technical Services,90%,75%,0,Mon


In [320]:
#save to csv and will convert to a table in SQL / Tableau for further analysis 

high_pct.to_csv('top_percentile_banks.csv', index=True)
!cp top_percentile_banks.csv "drive/My Drive/"

Take a look at Veteran status, but will explore deeper in another notebook.

In [321]:
#veterans loan median
vt_md=df.groupby(['Veteran'])['Loan Amount'].median()
vt_md

Veteran
Non-Veteran    29410.5
Unanswered     20700.0
Veteran        33800.0
Name: Loan Amount, dtype: float64

In [322]:
#veterans loan mean
vt_avg=df.groupby(['Veteran'])['Loan Amount'].mean()
vt_avg

Veteran
Non-Veteran    42277.587238
Unanswered     32273.659917
Veteran        45646.476189
Name: Loan Amount, dtype: float64

In [323]:
#veterans loan count
vt_ct=df.groupby(['Veteran'])['Loan Amount'].count()
vt_ct

Veteran
Non-Veteran     569404
Unanswered     3497651
Veteran          26033
Name: Loan Amount, dtype: int64

In [324]:
#veterans loan sum
vt_sum=df.groupby(['Veteran'])['Loan Amount'].sum()
vt_sum
           

Veteran
Non-Veteran    2.407303e+10
Unanswered     1.128820e+11
Veteran        1.188315e+09
Name: Loan Amount, dtype: float64

In [325]:
vt_cat=df.groupby(['Veteran'])['Loan Amount']
vt_cat

<pandas.core.groupby.generic.SeriesGroupBy object at 0x7f60c8a5cbe0>

In [326]:
military_df={
    'Average Loan' : vt_avg,
    'Sum of Loans' : vt_sum,
    'Count of Loans' : vt_ct,
    'Median Loan' : vt_md,   
    
}
mil_df=pd.DataFrame(military_df)
mil_df

Unnamed: 0_level_0,Average Loan,Sum of Loans,Count of Loans,Median Loan
Veteran,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Non-Veteran,42277.587238,24073030000.0,569404,29410.5
Unanswered,32273.659917,112882000000.0,3497651,20700.0
Veteran,45646.476189,1188315000.0,26033,33800.0


In [327]:
#formatting
mil_df['Average Loan'] = mil_df['Average Loan'].map('${:,.2f}'.format)
mil_df['Sum of Loans'] = mil_df['Sum of Loans'].map('${:,.2f}'.format)
mil_df['Count of Loans'] = mil_df['Count of Loans'].map('{:,}'.format)
mil_df['Median Loan'] = mil_df['Median Loan'].map('${:,.2f}'.format)
mil_df

Unnamed: 0_level_0,Average Loan,Sum of Loans,Count of Loans,Median Loan
Veteran,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Non-Veteran,"$42,277.59","$24,073,027,283.45",569404,"$29,410.50"
Unanswered,"$32,273.66","$112,881,998,880.83",3497651,"$20,700.00"
Veteran,"$45,646.48","$1,188,314,714.63",26033,"$33,800.00"


In [328]:
i = mil_df.index.get_loc('Veteran')
vet=mil_df.iloc[i:i+2]
vet

Unnamed: 0_level_0,Average Loan,Sum of Loans,Count of Loans,Median Loan
Veteran,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Veteran,"$45,646.48","$1,188,314,714.63",26033,"$33,800.00"
