In [10]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time
from census import Census
from us import states
import gmaps
from config import gkey
import scipy.stats as stats
from pprint import pprint

In [11]:
#Open/read CSV file and look at table head()
death_pd_base = pd.read_csv("Causes_of_Death.csv")
death_pd_base.head()

Unnamed: 0,Year,113 Cause Name,Cause Name,State,Deaths,Age-adjusted Death Rate
0,2012,"Nephritis, nephrotic syndrome and nephrosis (N...",Kidney disease,Vermont,21,2.6
1,2016,"Nephritis, nephrotic syndrome and nephrosis (N...",Kidney disease,Vermont,30,3.7
2,2013,"Nephritis, nephrotic syndrome and nephrosis (N...",Kidney disease,Vermont,30,3.8
3,2000,"Intentional self-harm (suicide) (*U03,X60-X84,...",Suicide,District of Columbia,23,3.8
4,2014,"Nephritis, nephrotic syndrome and nephrosis (N...",Kidney disease,Arizona,325,4.1


In [3]:
#Create table with mean of Age-adjusted death rates per cause from 1999-2016
#Drop "All causes" to focus on 10 leading causes
ppcauses = death_pd_base.groupby(["Year", "Cause Name"]).mean()["Age-adjusted Death Rate"].unstack(level=-1)
leadcause = ppcauses.drop("All causes", axis=1)
means = leadcause.mean()
leadcause = pd.DataFrame(means).reset_index()
leadcause.columns = ["Cause Name", "Rate"]
leadcause.sort_values('Rate',inplace=True)
leadcause

Unnamed: 0,Cause Name,Rate
8,Suicide,13.23547
6,Kidney disease,14.139423
5,Influenza and pneumonia,18.389103
3,Diabetes,23.471902
0,Alzheimer's disease,24.618697
9,Unintentional injuries,42.827671
1,CLRD,44.644658
7,Stroke,46.358333
2,Cancer,179.898825
4,Heart disease,199.782372


In [None]:
#Create a horizontal bar plot on descending order to show leading causes of death in USA
x = leadcause["Cause Name"]
y = leadcause["Rate"]

plt.barh(x, y, color='blue', alpha=0.65, align="center")
plt.title("Top 10 Leading Causes of Death in USA (1999-2016)")
plt.xlabel("Age-Adjusted Death Rate per 100,000 habitants")
plt.savefig("Leading Causes of Death USA.png")
plt.show()

In [None]:
#Group the years and plot the trend of the diseases in each group of years
bins = [1998, 2005, 2011, 2016]
group_names = ["1999-2004", "2005-2010", "2011-2016"]

death_pd["Year Range"] =death_pd_base(death_pd_base["Years"], bins, labels=group_names)
death_by_year = death_pd.groupby(["Year Range", "Cause Name"]).sum()["Deaths"].unstack(level=-1)
death_by_year

In [None]:
deaths_year = death_pd.groupby(["Year Range", "Cause Name"]).mean()["Age-adjusted Death Rate"].unstack(level=-1)
deaths_year

In [None]:
deaths_year = deaths_year.drop("All causes", axis=1)
deaths_year

In [None]:
#Plot the trend of the diseases in each group of years
deaths_year.plot(kind="line")

# Null Hypothesis #1

-The trend observed for Heart disease and Cancer (meaning, the decrease on age-adjusted death rate per 100,000 habitants) does not represent a significant change. 

In [None]:
#Test the null hypothesis with T-test



In [None]:
#Create a table showing data per year on each State during 1999-2016
death_pd= death_pd_base[death_pd_base['Cause Name'] != 'All causes'] 
by_state= death_pd[['State','Cause Name','Deaths']] 
grouped= by_state.groupby(['State', 'Cause Name'])
grouped.sum()

#grouped_state=pd.DataFrame(grouped.sum())
#grouped_state.plot(kind="bar",)


In [None]:
#Create a table with mean of age-adjusted rate per State and disease during 1999-2016 period

death_pd= death_pd_base[death_pd_base['Cause Name'] != 'All causes']
by_state_adr= death_pd[['State','Cause Name','Year','Age-adjusted Death Rate']]
grouped= by_state.groupby(['State', 'Cause Name'])

grouped.first()
by_state_adr=pd.DataFrame(death_pd[['State','Cause Name','Age-adjusted Death Rate']])




death_by_state_adr= death_pd.groupby(["State","Cause Name"]).mean()["Age-adjusted Death Rate"].unstack(level=-1)

death_by_state_adr



In [12]:
#Create a heat map showing deaths rates for Heart Disease
death_pd= death_pd_base[death_pd_base['Cause Name'] == 'Heart disease']
by_heartdisease= death_pd[['State','Cause Name','Deaths']] 
grouped= by_heartdisease.groupby(['State', 'Cause Name'])



by_heartdisease_grouped = by_heartdisease.groupby(["State"])
by_heartdisease_df=pd.DataFrame(by_heartdisease_grouped.sum())
by_heartdisease_df.reset_index(level='State',inplace=True)
by_heartdisease_df.head()


Unnamed: 0,State,Deaths
0,Alabama,227433
1,Alaska,12107
2,Arizona,193063
3,Arkansas,138372
4,California,1141776


In [13]:
coord_states = {"Alabama": [32.806671, -86.791130], "Alaska": [61.370716, -152.404419],
"Arizona": [33.729759, -111.431221], "Arkansas": [34.969704, -92.373123],
"California": [36.116203, -119.681564], "Colorado": [39.059811, -105.311104],
"Connecticut": [41.597782, -72.755371], "Delaware": [39.318523, -75.507141],
"District of Columbia": [38.897438, -77.026817], "Florida": [27.766279, -81.686783],
"Georgia": [33.040619, -83.643074], "Hawaii": [21.094318, -157.498337],
"Idaho": [44.240459, -114.478828], "Illinois": [40.349457, -88.986137],
"Indiana": [39.849426, -86.258278], "Iowa": [42.011539, -93.210526],
"Kansas": [38.526600, -96.726486], "Kentucky": [37.668140, -84.670067],
"Louisiana": [31.169546, -91.867805], "Maine": [44.693947, -69.381927],
"Maryland": [39.063946, -76.802101], "Massachusetts": [42.230171, -71.530106],
"Michigan": [43.326618, -84.536095], "Minnesota": [45.694454, -93.900192],
"Mississippi": [32.741646, -89.678696], "Missouri": [38.456085, -92.288368],
"Montana": [46.921925, -110.454353], "Nebraska": [41.125370, -98.268082],
"Nevada": [38.313515, -117.055374], "New Hampshire": [43.452492, -71.563896],
"New Jersey": [40.298904, -74.521011], "New Mexico": [34.840515, -106.248482],
"New York": [42.165726, -74.948051], "North Carolina": [35.630066, -79.806419],
"North Dakota": [47.528912, -99.784012], "Ohio": [40.388783, -82.764915],
"Oklahoma": [35.565342, -96.928917], "Oregon": [44.572021, -122.070938],
"Pennsylvania": [40.590752, -77.209755], "Rhode Island": [41.680893, -71.511780],
"South Carolina": [33.856892, -80.945007], "South Dakota": [44.299782, -99.438828],
"Tennessee": [35.747845, -86.692345], "Texas": [31.054487, -97.563461],
"Utah": [40.150032, -111.862434], "Vermont": [44.045876, -72.710686],
"Virginia": [37.769337, -78.169968], "Washington": [47.400902, -121.490494],
"West Virginia": [38.491226, -80.954453], "Wisconsin": [44.268543, -89.616508],
"Wyoming": [42.755966, -107.302490]}

coord_states_df = pd.DataFrame(coord_states)
coord_states_df = coord_states_df.T.reset_index()
coord_states_df.columns = ["State", "Lat", "Lon"]
heartdisease_df = pd.merge(by_heartdisease_df, coord_states_df, on='State')
locations = heartdisease_df[["Lat", "Lon"]].astype(float)
heartdisease = heartdisease_df["Deaths"].astype(float)

heartdisease_df.head()



Unnamed: 0,State,Deaths,Lat,Lon
0,Alabama,227433,32.806671,-86.79113
1,Alaska,12107,61.370716,-152.404419
2,Arizona,193063,33.729759,-111.431221
3,Arkansas,138372,34.969704,-92.373123
4,California,1141776,36.116203,-119.681564


In [14]:
coord_states1 = []
for state in coord_states:
    coord_states1.append(coord_states[state])

list_heartdisease = list(heartdisease)
int_heartdisease = []
for i in range(len(list_heartdisease)):
    int_heartdisease.append(int(list_heartdisease[i]))

In [28]:

    
fig = gmaps.figure()

heartdisease_layer = gmaps.heatmap_layer(locations, weights=heartdisease, dissipating=False, max_intensity=5,
                                 point_radius=3, gradient=['white','yellow','red'])



#heartdisase = list(heartdisease)
#hearrtdisease1_layer = gmaps.symbol_layer(coord_states1, fill_color='red',
                                    #stroke_color='red', scale=int_heartdisease)


fig.add_layer(heartdisease_layer)
#fig.add_layer(heartdisease1_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [29]:
#Create a heat map showing deaths rates for Cancer


death_pd= death_pd_base[death_pd_base['Cause Name'] == 'Cancer']
by_cancer= death_pd[['State','Cause Name','Deaths']] 



by_cancer_grouped = by_cancer.groupby(["State"])
by_cancer_df=pd.DataFrame(by_cancer_grouped.sum())
by_cancer_df.reset_index(level='State',inplace=True)
by_cancer_df.head()


Unnamed: 0,State,Deaths
0,Alabama,180780
1,Alaska,15032
2,Arizona,184958
3,Arkansas,115074
4,California,1002719


In [30]:
coord_states = {"Alabama": [32.806671, -86.791130], "Alaska": [61.370716, -152.404419],
"Arizona": [33.729759, -111.431221], "Arkansas": [34.969704, -92.373123],
"California": [36.116203, -119.681564], "Colorado": [39.059811, -105.311104],
"Connecticut": [41.597782, -72.755371], "Delaware": [39.318523, -75.507141],
"District of Columbia": [38.897438, -77.026817], "Florida": [27.766279, -81.686783],
"Georgia": [33.040619, -83.643074], "Hawaii": [21.094318, -157.498337],
"Idaho": [44.240459, -114.478828], "Illinois": [40.349457, -88.986137],
"Indiana": [39.849426, -86.258278], "Iowa": [42.011539, -93.210526],
"Kansas": [38.526600, -96.726486], "Kentucky": [37.668140, -84.670067],
"Louisiana": [31.169546, -91.867805], "Maine": [44.693947, -69.381927],
"Maryland": [39.063946, -76.802101], "Massachusetts": [42.230171, -71.530106],
"Michigan": [43.326618, -84.536095], "Minnesota": [45.694454, -93.900192],
"Mississippi": [32.741646, -89.678696], "Missouri": [38.456085, -92.288368],
"Montana": [46.921925, -110.454353], "Nebraska": [41.125370, -98.268082],
"Nevada": [38.313515, -117.055374], "New Hampshire": [43.452492, -71.563896],
"New Jersey": [40.298904, -74.521011], "New Mexico": [34.840515, -106.248482],
"New York": [42.165726, -74.948051], "North Carolina": [35.630066, -79.806419],
"North Dakota": [47.528912, -99.784012], "Ohio": [40.388783, -82.764915],
"Oklahoma": [35.565342, -96.928917], "Oregon": [44.572021, -122.070938],
"Pennsylvania": [40.590752, -77.209755], "Rhode Island": [41.680893, -71.511780],
"South Carolina": [33.856892, -80.945007], "South Dakota": [44.299782, -99.438828],
"Tennessee": [35.747845, -86.692345], "Texas": [31.054487, -97.563461],
"Utah": [40.150032, -111.862434], "Vermont": [44.045876, -72.710686],
"Virginia": [37.769337, -78.169968], "Washington": [47.400902, -121.490494],
"West Virginia": [38.491226, -80.954453], "Wisconsin": [44.268543, -89.616508],
"Wyoming": [42.755966, -107.302490]}

coord_states_df = pd.DataFrame(coord_states)
coord_states_df = coord_states_df.T.reset_index()
coord_states_df.columns = ["State", "Lat", "Lon"]
cancer_df = pd.merge(by_cancer_df, coord_states_df, on='State')
locations = cancer_df[["Lat", "Lon"]].astype(float)
cancer = cancer_df["Deaths"].astype(float)

cancer_df.head()



Unnamed: 0,State,Deaths,Lat,Lon
0,Alabama,180780,32.806671,-86.79113
1,Alaska,15032,61.370716,-152.404419
2,Arizona,184958,33.729759,-111.431221
3,Arkansas,115074,34.969704,-92.373123
4,California,1002719,36.116203,-119.681564


In [31]:
coord_states1 = []
for state in coord_states:
    coord_states1.append(coord_states[state])

list_cancer = list(cancer)
int_cancer = []
for i in range(len(list_cancer)):
    int_cancer.append(int(list_cancer[i]))
    
fig = gmaps.figure()

cancer_layer = gmaps.heatmap_layer(locations, weights=heartdisease, dissipating=False, max_intensity=5,
                                 point_radius=3, gradient=['white','yellow','red'])




cancer = list(heartdisease)
cancer1_layer = gmaps.symbol_layer(coord_states1, fill_color='red',
                                    stroke_color='red', scale=int_cancer)


fig.add_layer(cancer_layer)
fig.add_layer(cancer1_layer)

fig

Figure(layout=FigureLayout(height='420px'))

# Null Hypothesis #2

-The suicide rate is not significantly relate to lower temperatures throught the year.

In [None]:
#Create a table with mean of age-adjusted rate per State and suicide



In [None]:
#Create a heat map showing deaths rates for suicide



In [None]:
#Test the null hypothesis with ANOVA test?



In [None]:
#Do a map with most common age-adjusted cause of death by state in these 15 years

