In [28]:
# Imports 
%matplotlib inline

import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt

import patsy
import statsmodels.api as sm
import scipy.stats as stats
from scipy.stats import ttest_ind, chisquare, normaltest


In [29]:
df = pd.read_csv('PovertyReport.csv')

df.head(5)

Unnamed: 0,Textbox93,Textbox96,fips,county,ruc_code,total_est,total_Bmin,total_Bmax,under18,under18_Bmin,under18_Bmax
0,All people in poverty (2017),Children ages 0-17 in poverty (2017),6000.0,California,,5164169,5111205,5217133,1615913,1586526,1645300
1,All people in poverty (2017),Children ages 0-17 in poverty (2017),6001.0,Alameda,1.0,152087,142181,161993,35658,31083,40233
2,All people in poverty (2017),Children ages 0-17 in poverty (2017),6003.0,Alpine,8.0,202,155,249,67,50,84
3,All people in poverty (2017),Children ages 0-17 in poverty (2017),6005.0,Amador,6.0,3762,2896,4628,908,683,1133
4,All people in poverty (2017),Children ages 0-17 in poverty (2017),6007.0,Butte,3.0,40728,36647,44809,10208,8436,11980


In [30]:
# Renaming the columns of the dataframe
df.columns = ["Year", "Textbox106", "FIPS", "County", "RUC Code", "Total People in Poverty",
              "Lower Bound of all people in poverty", "upperBound of all people in poverty", "Number of children in poverty", "lower bound of children in poverty", "upperbound of children in poverty"]
df.head(5)

Unnamed: 0,Year,Textbox106,FIPS,County,RUC Code,Total People in Poverty,Lower Bound of all people in poverty,upperBound of all people in poverty,Number of children in poverty,lower bound of children in poverty,upperbound of children in poverty
0,All people in poverty (2017),Children ages 0-17 in poverty (2017),6000.0,California,,5164169,5111205,5217133,1615913,1586526,1645300
1,All people in poverty (2017),Children ages 0-17 in poverty (2017),6001.0,Alameda,1.0,152087,142181,161993,35658,31083,40233
2,All people in poverty (2017),Children ages 0-17 in poverty (2017),6003.0,Alpine,8.0,202,155,249,67,50,84
3,All people in poverty (2017),Children ages 0-17 in poverty (2017),6005.0,Amador,6.0,3762,2896,4628,908,683,1133
4,All people in poverty (2017),Children ages 0-17 in poverty (2017),6007.0,Butte,3.0,40728,36647,44809,10208,8436,11980


In [31]:
df = df.drop(columns = ['FIPS'])
df.head(5)

Unnamed: 0,Year,Textbox106,County,RUC Code,Total People in Poverty,Lower Bound of all people in poverty,upperBound of all people in poverty,Number of children in poverty,lower bound of children in poverty,upperbound of children in poverty
0,All people in poverty (2017),Children ages 0-17 in poverty (2017),California,,5164169,5111205,5217133,1615913,1586526,1645300
1,All people in poverty (2017),Children ages 0-17 in poverty (2017),Alameda,1.0,152087,142181,161993,35658,31083,40233
2,All people in poverty (2017),Children ages 0-17 in poverty (2017),Alpine,8.0,202,155,249,67,50,84
3,All people in poverty (2017),Children ages 0-17 in poverty (2017),Amador,6.0,3762,2896,4628,908,683,1133
4,All people in poverty (2017),Children ages 0-17 in poverty (2017),Butte,3.0,40728,36647,44809,10208,8436,11980


In [32]:
df=df.replace('All people in poverty (2017)', 2017)
df.head(5)

Unnamed: 0,Year,Textbox106,County,RUC Code,Total People in Poverty,Lower Bound of all people in poverty,upperBound of all people in poverty,Number of children in poverty,lower bound of children in poverty,upperbound of children in poverty
0,2017.0,Children ages 0-17 in poverty (2017),California,,5164169,5111205,5217133,1615913,1586526,1645300
1,2017.0,Children ages 0-17 in poverty (2017),Alameda,1.0,152087,142181,161993,35658,31083,40233
2,2017.0,Children ages 0-17 in poverty (2017),Alpine,8.0,202,155,249,67,50,84
3,2017.0,Children ages 0-17 in poverty (2017),Amador,6.0,3762,2896,4628,908,683,1133
4,2017.0,Children ages 0-17 in poverty (2017),Butte,3.0,40728,36647,44809,10208,8436,11980


In [33]:
df = df.drop(columns = ['Lower Bound of all people in poverty'])
df.head(5)

Unnamed: 0,Year,Textbox106,County,RUC Code,Total People in Poverty,upperBound of all people in poverty,Number of children in poverty,lower bound of children in poverty,upperbound of children in poverty
0,2017.0,Children ages 0-17 in poverty (2017),California,,5164169,5217133,1615913,1586526,1645300
1,2017.0,Children ages 0-17 in poverty (2017),Alameda,1.0,152087,161993,35658,31083,40233
2,2017.0,Children ages 0-17 in poverty (2017),Alpine,8.0,202,249,67,50,84
3,2017.0,Children ages 0-17 in poverty (2017),Amador,6.0,3762,4628,908,683,1133
4,2017.0,Children ages 0-17 in poverty (2017),Butte,3.0,40728,44809,10208,8436,11980


In [34]:
df = df.drop(columns = ['upperBound of all people in poverty'])
df.head(5)

Unnamed: 0,Year,Textbox106,County,RUC Code,Total People in Poverty,Number of children in poverty,lower bound of children in poverty,upperbound of children in poverty
0,2017.0,Children ages 0-17 in poverty (2017),California,,5164169,1615913,1586526,1645300
1,2017.0,Children ages 0-17 in poverty (2017),Alameda,1.0,152087,35658,31083,40233
2,2017.0,Children ages 0-17 in poverty (2017),Alpine,8.0,202,67,50,84
3,2017.0,Children ages 0-17 in poverty (2017),Amador,6.0,3762,908,683,1133
4,2017.0,Children ages 0-17 in poverty (2017),Butte,3.0,40728,10208,8436,11980


In [35]:
df = df.drop(columns = ['lower bound of children in poverty'])
df.head(5)

Unnamed: 0,Year,Textbox106,County,RUC Code,Total People in Poverty,Number of children in poverty,upperbound of children in poverty
0,2017.0,Children ages 0-17 in poverty (2017),California,,5164169,1615913,1645300
1,2017.0,Children ages 0-17 in poverty (2017),Alameda,1.0,152087,35658,40233
2,2017.0,Children ages 0-17 in poverty (2017),Alpine,8.0,202,67,84
3,2017.0,Children ages 0-17 in poverty (2017),Amador,6.0,3762,908,1133
4,2017.0,Children ages 0-17 in poverty (2017),Butte,3.0,40728,10208,11980


In [36]:
df = df.drop(columns = ['upperbound of children in poverty'])
df.head(5)

Unnamed: 0,Year,Textbox106,County,RUC Code,Total People in Poverty,Number of children in poverty
0,2017.0,Children ages 0-17 in poverty (2017),California,,5164169,1615913
1,2017.0,Children ages 0-17 in poverty (2017),Alameda,1.0,152087,35658
2,2017.0,Children ages 0-17 in poverty (2017),Alpine,8.0,202,67
3,2017.0,Children ages 0-17 in poverty (2017),Amador,6.0,3762,908
4,2017.0,Children ages 0-17 in poverty (2017),Butte,3.0,40728,10208


In [37]:
df = df.drop(columns = ['Textbox106'])
df.head(5)

Unnamed: 0,Year,County,RUC Code,Total People in Poverty,Number of children in poverty
0,2017.0,California,,5164169,1615913
1,2017.0,Alameda,1.0,152087,35658
2,2017.0,Alpine,8.0,202,67
3,2017.0,Amador,6.0,3762,908
4,2017.0,Butte,3.0,40728,10208


In [38]:
df.head(120)

Unnamed: 0,Year,County,RUC Code,Total People in Poverty,Number of children in poverty
0,2017.0,California,,5164169,1615913
1,2017.0,Alameda,1.0,152087,35658
2,2017.0,Alpine,8.0,202,67
3,2017.0,Amador,6.0,3762,908
4,2017.0,Butte,3.0,40728,10208
5,2017.0,Calaveras,6.0,5904,1590
6,2017.0,Colusa,6.0,2633,912
7,2017.0,Contra Costa,1.0,103883,29067
8,2017.0,Del Norte,7.0,6113,1949
9,2017.0,El Dorado,1.0,15620,3602


In [39]:
df.head()

Unnamed: 0,Year,County,RUC Code,Total People in Poverty,Number of children in poverty
0,2017.0,California,,5164169,1615913
1,2017.0,Alameda,1.0,152087,35658
2,2017.0,Alpine,8.0,202,67
3,2017.0,Amador,6.0,3762,908
4,2017.0,Butte,3.0,40728,10208
