In [13]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt 
import plotly.offline as py
import plotly.express as px
import nltk

In [2]:
# Import data from csv
df = pd.read_csv("consumer_complaints.csv", header = 0, low_memory=False)

In [3]:
#Get count by state and convert into usable dataframe
state_data = df['state'].value_counts().to_frame().reset_index()
state_data.columns = ['state', 'complaints']
print(state_data)

   state  complaints
0     CA       81700
1     FL       53673
2     TX       41352
3     NY       38266
4     GA       24548
..   ...         ...
57    MH          27
58    MP          19
59    AS          17
60    PW           9
61    AA           9

[62 rows x 2 columns]


In [4]:
# However, this data must be normalized to per-capita
populations = pd.read_csv("population.csv", header = 0)
state_data['new'] = state_data.complaints.div(state_data.state.map(populations.set_index('code').pop_2014)) * 1000
    #print(row['c1'], row['c2'])
print(state_data)

   state  complaints       new
0     CA       81700  2.105534
1     FL       53673  2.698044
2     TX       41352  1.534001
3     NY       38266  1.937889
4     GA       24548  2.431135
..   ...         ...       ...
57    MH          27       NaN
58    MP          19       NaN
59    AS          17       NaN
60    PW           9       NaN
61    AA           9       NaN

[62 rows x 3 columns]


In [40]:
# Template from https://datascience.stackexchange.com/questions/9616/how-to-create-us-state-choropleth-map

scl = [[0.0, 'rgb(242,240,247)'], [0.5, 'rgb(84,39,143)'], [1.0, 'rgb(42, 10, 84)']]
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = state_data['state'],
        z = state_data['new'],
        locationmode = 'USA-states',
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
         colorbar = dict(
            title = "Complaints per 1000 people"
        )

    ) ]
layout = dict(
        title = 'Complaints by state (per 1000)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
        ),
    )
fig = dict( data=data, layout=layout )

url = py.iplot( fig)

From this text, we can tell that the coastal areas seem to have a much higher complaints per capita rate. 