In [88]:
# Dependencies and Setup
import pandas as pd

import numpy as np
import requests
import json
import datetime
from census import Census


In [89]:
#import census table
census_df = pd.read_csv("county_census_data.csv")
census_df.head(2)

Unnamed: 0,Name,County,Population,MedianAge,HouseholdIncome,PerCapitaIncome,UnemploymentCount,PovertyCount,PovertyRate,State,abbr,UnemploymentRate
0,"Corozal Municipio, Puerto Rico",Corozal Municipio,34933,38.1,14752,7887,3296,19146,54.807775,Puerto Rico,PR,0.094352
1,"Maunabo Municipio, Puerto Rico",Maunabo Municipio,11297,43.5,17636,8830,735,5945,52.624591,Puerto Rico,PR,0.065062


In [90]:
#import debt table
#note:  had to change the encoding from ANSI to UTF-8, I did this in notepad, save-as
debt_df = pd.read_csv("debtdata.csv")
debt_df.head()

Unnamed: 0,County,State,MedicalPercent,StudentDebtPercent,AutoDebtPercent,OtherPercent,MedicalAmount,StudentDebtAmount,AutoDebtAmount,OtherAmount,TotalDebt
0,Walker County,Alabama,0.44,0.08,0.26,0.22,81229.5,14769.0,47999.3,40614.8,184613
1,Jackson County,Alabama,0.36,0.08,0.25,0.31,66460.5,14769.0,46153.1,57229.9,152888
2,Etowah County,Alabama,0.4,0.1,0.27,0.23,73845.0,18461.3,49845.4,42460.9,149380
3,St. Clair County,Alabama,0.36,0.13,0.31,0.2,66460.5,23999.6,57229.9,36922.5,147531
4,Tuscaloosa County,Alabama,0.44,0.14,0.28,0.14,81229.5,25845.8,51691.5,25845.8,146907


In [91]:
debt_df.dtypes

County                 object
State                  object
MedicalPercent        float64
StudentDebtPercent    float64
AutoDebtPercent       float64
OtherPercent          float64
MedicalAmount         float64
StudentDebtAmount     float64
AutoDebtAmount        float64
OtherAmount           float64
TotalDebt               int64
dtype: object

In [92]:
#Merge the two table on state.  Keep all.
all_data = pd.merge(census_df,debt_df, how = 'left', on = ['County','State'])
all_data.head(100)

Unnamed: 0,Name,County,Population,MedianAge,HouseholdIncome,PerCapitaIncome,UnemploymentCount,PovertyCount,PovertyRate,State,...,UnemploymentRate,MedicalPercent,StudentDebtPercent,AutoDebtPercent,OtherPercent,MedicalAmount,StudentDebtAmount,AutoDebtAmount,OtherAmount,TotalDebt
0,"Corozal Municipio, Puerto Rico",Corozal Municipio,34933,38.1,14752,7887,3296,19146,54.807775,Puerto Rico,...,0.094352,,,,,,,,,
1,"Maunabo Municipio, Puerto Rico",Maunabo Municipio,11297,43.5,17636,8830,735,5945,52.624591,Puerto Rico,...,0.065062,,,,,,,,,
2,"Peñuelas Municipio, Puerto Rico",Peñuelas Municipio,21661,36.4,16868,7983,1601,12379,57.148793,Puerto Rico,...,0.073912,,,,,,,,,
3,"Ponce Municipio, Puerto Rico",Ponce Municipio,148863,39.5,16561,10775,7013,75187,50.507514,Puerto Rico,...,0.047110,,,,,,,,,
4,"San Sebastián Municipio, Puerto Rico",San Sebastián Municipio,38970,42.0,14275,8072,3307,21472,55.098794,Puerto Rico,...,0.084860,,,,,,,,,
5,"Toa Alta Municipio, Puerto Rico",Toa Alta Municipio,74169,37.1,29672,14162,4266,22070,29.756367,Puerto Rico,...,0.057517,,,,,,,,,
6,"Toa Baja Municipio, Puerto Rico",Toa Baja Municipio,81905,38.9,24583,12240,6565,29279,35.747512,Puerto Rico,...,0.080154,,,,,,,,,
7,"Vieques Municipio, Puerto Rico",Vieques Municipio,8931,41.4,16261,11136,428,3260,36.502071,Puerto Rico,...,0.047923,,,,,,,,,
8,"Yabucoa Municipio, Puerto Rico",Yabucoa Municipio,35025,41.6,15586,8672,2685,18297,52.239829,Puerto Rico,...,0.076660,,,,,,,,,
9,"Aguada Municipio, Puerto Rico",Aguada Municipio,39470,41.0,16199,9001,2538,20558,52.085128,Puerto Rico,...,0.064302,,,,,,,,,


In [93]:
all_data.dtypes

Name                   object
County                 object
Population              int64
MedianAge             float64
HouseholdIncome         int64
PerCapitaIncome         int64
UnemploymentCount       int64
PovertyCount            int64
PovertyRate           float64
State                  object
abbr                   object
UnemploymentRate      float64
MedicalPercent        float64
StudentDebtPercent    float64
AutoDebtPercent       float64
OtherPercent          float64
MedicalAmount         float64
StudentDebtAmount     float64
AutoDebtAmount        float64
OtherAmount           float64
TotalDebt             float64
dtype: object

In [94]:
#Group by State
all_data_byState = all_data.groupby(['State','abbr']).agg({'Population':'sum',
                                                           'MedianAge':'mean',
                                                           'HouseholdIncome':'mean',
                                                           'PerCapitaIncome':'mean',
                                                           'UnemploymentCount':'mean',
                                                           'PovertyCount':'mean',
                                                           'PovertyRate':'mean',
                                                           'UnemploymentRate':'mean',
                                                           'MedicalPercent':'mean',
                                                           'StudentDebtPercent':'mean',
                                                           'AutoDebtPercent':'mean',
                                                           'OtherPercent':'mean',
                                                           'MedicalAmount':'mean',
                                                           'StudentDebtAmount':'mean',
                                                           'AutoDebtAmount':'mean',
                                                           'OtherAmount':'mean',
                                                           'TotalDebt':'mean'}).reset_index()
all_data_byState.head()


Unnamed: 0,State,abbr,Population,MedianAge,HouseholdIncome,PerCapitaIncome,UnemploymentCount,PovertyCount,PovertyRate,UnemploymentRate,MedicalPercent,StudentDebtPercent,AutoDebtPercent,OtherPercent,MedicalAmount,StudentDebtAmount,AutoDebtAmount,OtherAmount,TotalDebt
0,Alabama,AL,4850771,40.449254,40271.402985,22180.492537,2468.522388,12682.074627,20.34308,0.037291,0.401351,0.134324,0.280541,0.183784,74094.489189,24797.959459,51791.297297,33928.797297,119683.945946
1,Alaska,AK,738565,36.944828,66425.448276,31670.551724,1018.896552,2530.344828,13.352178,0.047799,0.294,0.142,0.298,0.266,54276.1,26215.0,55014.56,49106.92,98649.6
2,Arizona,AZ,6809946,39.933333,45816.666667,22712.2,15060.733333,75203.066667,19.451091,0.03854,0.389167,0.125,0.301667,0.184167,71845.033333,23076.583333,55691.441667,33999.483333,131477.916667
3,Arkansas,AR,2977944,41.330667,39025.08,21449.306667,1135.04,6989.48,19.66899,0.030689,0.4096,0.1508,0.3208,0.1188,75617.304,27839.576,59223.712,22845.804167,97777.08
4,California,CA,38982847,39.474138,61046.758621,30416.482759,25709.413793,99541.517241,15.226059,0.037277,0.301333,0.108,0.273778,0.316889,55629.906667,19938.16,50542.811111,58501.662222,143733.911111


In [87]:
all_data_byState.describe()

Unnamed: 0,Population
count,52.0
mean,6239872.0
std,7124336.0
min,583200.0
25%,1791976.0
50%,4224752.0
75%,6899951.0
max,38982850.0


In [35]:
all_data_byState.dtypes

State                  object
abbr                   object
Population            float64
MedianAge             float64
HouseholdIncome       float64
PerCapitaIncome       float64
UnemploymentCount     float64
PovertyCount          float64
PovertyRate           float64
UnemploymentRate      float64
MedicialPercent       float64
StudentDebtPercent    float64
AutoDebtPercent       float64
OtherPercent          float64
MedicialAmount        float64
StudentDebtAmount     float64
AutoDebtAmount        float64
TotalDebt             float64
dtype: object

In [36]:
#Save to a csv files
all_data_byState.to_csv("county_census_debt_data_byState.csv",encoding ="utf-8",index=False)