In [1]:
import plotly.plotly as py
from state_converter import state_to_abbrev as state_to_abbrev
import pandas as pd

%matplotlib inline

In [2]:
std_df = pd.read_csv('STDs_Nationally_Ranked_By_State.csv')

In [3]:
std_df.head()

Unnamed: 0,Sort,Year,State,Chlamydia_Count,Chlamydia_Rate,Chlamydia_RankByCount,Chlamydia_RankByRate,Gonorrhea_Count,Gonorrhea_Rate,Gonorrhea_RankByCount,Gonorrhea_RankByRate,Primary_Secondary_Syphilis_Count,Primary_Secondary_Syphilis_Rate,Primary_Secondary_Syphilis_RankByCount,Primary_Secondary_Syphilis_RankByRate
0,1,2000,Alabama,15323,344.2,14,6,12063,271.0,12,3,123,2.8,17,13.0
1,2,2000,Alaska,2569,409.3,40,2,361,57.5,41,35,0,0.0,39,
2,3,2000,Arizona,12591,243.7,21,22,4130,79.9,23,28,189,3.7,13,10.0
3,4,2000,Arkansas,6219,232.2,31,25,3642,136.0,24,17,104,3.9,19,9.0
4,5,2000,California,95392,280.5,1,12,21619,63.6,4,32,325,1.0,9,19.0


In [4]:
std_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 15 columns):
Sort                                      800 non-null int64
Year                                      800 non-null int64
State                                     800 non-null object
Chlamydia_Count                           800 non-null int64
Chlamydia_Rate                            800 non-null float64
Chlamydia_RankByCount                     800 non-null int64
Chlamydia_RankByRate                      800 non-null int64
Gonorrhea_Count                           800 non-null int64
Gonorrhea_Rate                            800 non-null float64
Gonorrhea_RankByCount                     800 non-null int64
Gonorrhea_RankByRate                      800 non-null int64
Primary_Secondary_Syphilis_Count          800 non-null int64
Primary_Secondary_Syphilis_Rate           800 non-null float64
Primary_Secondary_Syphilis_RankByCount    800 non-null int64
Primary_Secondary_Syphilis_RankByR

In [5]:
std_df.rename(columns=lambda x: x.lower().strip(), inplace=True)

In [6]:
scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]


In [7]:
std15_df = std_df[std_df['year'] == 2015].copy()

In [8]:
std15_df['total_count_stds'] = (std15_df.chlamydia_count.astype(float)
                              + std15_df.gonorrhea_count.astype(float) 
                              + std15_df.primary_secondary_syphilis_count.astype(float))

In [9]:
std15_df['state_abbreviations'] = std15_df['state'].apply(state_to_abbrev)

In [10]:
std15_df['text'] = (std15_df['state'] 
                  + '<br>' 
                  + 'Chlamydia Rate: '
                  + std15_df['chlamydia_rate'].astype(str))
                  

In [11]:
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = std15_df['state_abbreviations'],
        z = std15_df['chlamydia_rate'],
        locationmode = 'USA-states',
        text = std15_df['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            ) ),
        colorbar = dict(
            title = "STD Rate")
        ) ]

In [12]:
layout = dict(
        title = '2015 Chlamydia Rate (per 100,000) by State<br>(Hover for breakdown)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)'),
             )

In [13]:
fig = dict( data=data, layout=layout )
py.iplot( fig, filename='d3-cloropleth-map' )