In [1]:
import chart_studio.plotly as py
from plotly.graph_objs import *
import plotly.graph_objects as go
import sqlite3
import pandas as pd

# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#from pandas_profiling import ProfileReport
import plotly.express as px




In [2]:
mortality_src = 'documents/github/project_2/assets/data/mort.csv'
mortality = pd.read_csv(mortality_src)

In [3]:
#   ETL STEPS to clean the data for export to SQLite
#remove the (min) and (max) for each year col
mortality = mortality.filter([col for col in mortality if '(' not in col])

In [4]:
#clean col names
mortality.rename(columns={"Mortality Rate, 1980*": "1980",
        "Mortality Rate, 1985*": "1985",
        "Mortality Rate, 1990*": "1990",
        "Mortality Rate, 1995*": "1995",
        "Mortality Rate, 2000*": "2000",
        "Mortality Rate, 2005*": "2005",
        "Mortality Rate, 2010*": "2010",
        "Mortality Rate, 2014*": "2014",
        "% Change in Mortality Rate, 1980-2014": "Change_1980_2014"},inplace=True)

In [5]:
#melt year columns into rows
mortality = mortality.melt(id_vars=['Location', 'FIPS', 'Category', 'Change_1980_2014'], 
        var_name="Date", 
        value_name="Value")

In [6]:
#replace vals to shorten categories
new_vals = {"Neonatal disorders":"Neonatal disorders",
            "HIV/AIDS and tuberculosis":"HIV/AIDS and TB",
            "Musculoskeletal disorders":"Musculoskeletal disorders",
            "Diabetes, urogenital, blood, and endocrine diseases":"Diabetes",
            "Digestive diseases":"Digestive diseases",
            "Chronic respiratory diseases":"Chronic resp",
            "Neurological disorders":"Neurological disorders",
            "Cirrhosis and other chronic liver diseases":"Chronic liver",
            "Mental and substance use disorders":"Mental disorders",
            "Forces of nature, war, and legal intervention":"Non Natural",
            "Unintentional injuries":"Unintentional injuries",
            "Nutritional deficiencies":"Nutritional deficiencies",
            "Other communicable, maternal, neonatal, and nutritional diseases":"Other communicable",
            "Cardiovascular diseases":"Cardiovascular",
            "Diarrhea, lower respiratory, and other common infectious diseases":"Diarrhea",
            "Maternal disorders":"Maternal disorders",
            "Other non-communicable diseases":"Other non-communicable diseases",
            "Self-harm and interpersonal violence":"Violence",
            "Neoplasms":"Neoplasms",
            "Transport injuries":"Transport injuries",
            "Neglected tropical diseases and malaria":"Tropical diseases"}

mortality = mortality.replace(new_vals)

In [7]:
#dict to hold our dataframes
df_dict = {}

#split out County dataframe
df_dict['mortality_county'] = mortality.query('FIPS > 1000').copy().reset_index(drop=True)
df_dict['mortality_county'][['County','State']] = df_dict['mortality_county']['Location'].str.rsplit(',',expand=True)
df_dict['mortality_county']['FIPS'] = df_dict['mortality_county']['FIPS'].astype('int')
df_dict['mortality_county']['FIPS'] = df_dict['mortality_county']['FIPS'].apply(lambda x: '{0:0>5}'.format(x)).astype('str')
df_dict['mortality_county'].drop(columns='Location',inplace=True)

In [8]:
#Split out state dataframe
df_dict['mortality_state'] = mortality.query('FIPS < 1000').copy().reset_index(drop=True)
df_dict['mortality_state']['FIPS'] = df_dict['mortality_state']['FIPS'].astype('int')
df_dict['mortality_state']['FIPS'] = df_dict['mortality_state']['FIPS'].apply(lambda x: '{0:0>2}'.format(x)).astype('str')
df_dict['mortality_state'].rename(columns={"Location":"State"},inplace=True)

In [9]:
#split out us dataframe
df_dict['mortality_us'] = mortality[mortality['FIPS'].isnull()].copy().reset_index(drop=True)
df_dict['mortality_us'].drop(columns='FIPS',inplace=True)

In [39]:
df_dict['mortality_us'].head(25)

Unnamed: 0,Location,Category,Change_1980_2014,Date,Value
0,United States,Neonatal disorders,-63.85,1980,9.18
1,United States,HIV/AIDS and TB,74.35,1980,1.52
2,United States,Musculoskeletal disorders,3.23,1980,2.77
3,United States,Diabetes,20.98,1980,46.2
4,United States,Digestive diseases,-28.88,1980,19.93
5,United States,Chronic resp,29.73,1980,40.79
6,United States,Neurological disorders,18.73,1980,80.32
7,United States,Chronic liver,-15.64,1980,19.91
8,United States,Mental disorders,188.39,1980,4.64
9,United States,Non Natural,-79.54,1980,0.26


In [34]:
grouped_mortality_df = df_dict['mortality_us'].groupby(['Date'])
grouped_mortality_df

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x125e2c510>

In [12]:
df_dict['mortality_county'].Category.unique()

array(['Neonatal disorders', 'HIV/AIDS and TB',
       'Musculoskeletal disorders', 'Diabetes', 'Digestive diseases',
       'Chronic resp', 'Neurological disorders', 'Chronic liver',
       'Mental disorders', 'Non Natural', 'Unintentional injuries',
       'Nutritional deficiencies', 'Other communicable', 'Cardiovascular',
       'Diarrhea', 'Maternal disorders',
       'Other non-communicable diseases', 'Violence', 'Neoplasms',
       'Transport injuries', 'Tropical diseases'], dtype=object)

In [13]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

counties["features"][0]

{'type': 'Feature',
 'properties': {'GEO_ID': '0500000US01001',
  'STATE': '01',
  'COUNTY': '001',
  'NAME': 'Autauga',
  'LSAD': 'County',
  'CENSUSAREA': 594.436},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-86.496774, 32.344437],
    [-86.717897, 32.402814],
    [-86.814912, 32.340803],
    [-86.890581, 32.502974],
    [-86.917595, 32.664169],
    [-86.71339, 32.661732],
    [-86.714219, 32.705694],
    [-86.413116, 32.707386],
    [-86.411172, 32.409937],
    [-86.496774, 32.344437]]]},
 'id': '01001'}

In [41]:
py.sign_in('jorgeelimenas', 'XeXPRspizdq0Xcyj1wCX')
labels = ['Neonatal disorders', 'HIV/AIDS and TB',
       'Musculoskeletal disorders', 'Diabetes', 'Digestive diseases',
       'Chronic resp', 'Neurological disorders', 'Chronic liver',
       'Mental disorders', 'Non Natural', 'Unintentional injuries',
       'Nutritional deficiencies', 'Other communicable', 'Cardiovascular',
       'Diarrhea', 'Maternal disorders',
       'Other non-communicable diseases', 'Violence', 'Neoplasms',
       'Transport injuries', 'Tropical diseases']
values = [9.18,1.52,2.77,46.2,19.93,40.79,80.32,19.91,4.64,0.26,23.85,1.73,
          1.85,507.37,38.54,0.27,9.74,25.16,240.24,25.24,0.04 ]

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.show()
plot_url = py.plot(fig)