In [None]:
!pip install plotly==4.13.0
!pip install chart_studio
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import chart_studio
import chart_studio.plotly as py
import chart_studio.tools as tls
import datetime as dt
from sklearn.cluster import KMeans 
from sklearn import metrics 
from scipy.spatial.distance import cdist 
import numpy as np

data = pd.read_csv('https://raw.githubusercontent.com/washingtonpost/data-police-shootings/master/fatal-police-shootings-data.csv')
pop = pd.read_csv('https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1_mid.csv')

pop_total=pop[['ORIGIN','RACE','TOTAL_POP']]

data['count']=1
data['race_csum'] = data.groupby(['race'])['count'].cumsum()
data=data.dropna(subset=['race'])
data['year'] = pd.DatetimeIndex(data['date']).year
data['month'] = pd.DatetimeIndex(data['date']).month

race_key = [['W', 1], ['B', 2], ['A', 4], ['N', 3], ['N', 5]]
race_df = pd.DataFrame(data=race_key, columns=['race', 'key']) 
origin_key = [['H',2]]
origin_df = pd.DataFrame(data=origin_key, columns=['origin', 'key'])

race_data_df = race_df.merge(data, left_on='race', right_on='race')
origin_data_df = origin_df.merge(data, left_on='origin', right_on='race')

pop_race_df = pop_total.merge(race_data_df, left_on='RACE', right_on='key')
pop_race_df = pop_race_df.drop_duplicates(subset=['id']).reset_index(drop=True)

pop_origin_df = pop_total.merge(origin_data_df, left_on='RACE', right_on='key')
pop_origin_df = pop_origin_df.drop_duplicates(subset=['id']).reset_index(drop=True)
pop_origin_df = pop_origin_df.drop(columns=['origin'])

final = pop_race_df.append(pop_origin_df).reset_index(drop=True)
final["race_sum_per_year"] = final.groupby(["race", "year"])["count"].transform(sum)

race_emp = [['W', 'White alone'], ['B', 'Black alone'], ['A', 'Asian alone'], ['H','Hispanic origin'], ['N', 'Native alone']] 
race_emp_df = pd.DataFrame(race_emp, columns = ['race', 'Persons'])
final = final.merge(race_emp_df, on='race')

final_race_year = final[['race_sum_per_year','year','Persons','TOTAL_POP']].drop_duplicates()
final_race_year['race_perc_per_year'] = (final_race_year['race_sum_per_year']/final_race_year['TOTAL_POP'])*1000000
final_race_year['race_perc_per_year'] = final_race_year['race_perc_per_year'].apply(lambda x: round(x, 2))

final_race_year=final_race_year[final_race_year.year != 2015]

Collecting plotly==4.13.0
[?25l  Downloading https://files.pythonhosted.org/packages/4c/f3/93bc71d449828098efc7dda0a682937762d0c17f6140dcbc6fc6fa2a467d/plotly-4.13.0-py2.py3-none-any.whl (13.1MB)
[K     |████████████████████████████████| 13.1MB 305kB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.13.0
Collecting chart_studio
[?25l  Downloading https://files.pythonhosted.org/packages/ca/ce/330794a6b6ca4b9182c38fc69dd2a9cbff60fd49421cb8648ee5fee352dc/chart_studio-1.1.0-py3-none-any.whl (64kB)
[K     |████████████████████████████████| 71kB 3.8MB/s 
Installing collected packages: chart-studio
Successfully installed chart-studio-1.1.0


In [None]:
fig = px.line(final_race_year, x='year', y='race_perc_per_year', color='Persons',
              color_discrete_sequence=["#44D2EF", "#5C0A3F", "#ECDA69", "#E87B3F", "#338186"],
              category_orders={'Persons': ["Black alone", "Hispanic origin", "White alone", "Asian alone", "Native alone"]})
fig.update_traces(mode="markers+lines", hovertemplate=None)

fig.update_layout(
    hoverlabel=dict(
        font_size=12,
    ),
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showline=False,
        showticklabels=False,
    ),
    autosize=False,
    margin=dict(
        autoexpand=False,
        l=100,
        r=50,
        t=110,
    ),
    showlegend=False,
    plot_bgcolor='white'
)

annotations = []
# Title
#annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.1,
#                              xanchor='left', yanchor='bottom',
#                              text='Fatalities by police',
#                              font=dict(family='Arial',
#                                        size=24,
#                                        color='rgb(37,37,37)'),
#                              showarrow=False))
# subtitle
#annotations.append(dict(xref='paper', yref='paper', x=0.0, y=1.1,
#                             xanchor='left', yanchor='bottom',
#                              text='Broken down by race, adjusted to population proportions',
#                              font=dict(family='Arial',
#                                        size=12,
#                                        color='rgb(37,37,37)'),
#                              showarrow=False))
# Source
#annotations.append(dict(xref='paper', yref='paper', x=0.5, y=-0.2,
#                              xanchor='center', yanchor='top',
#                              text='Sources: Washington Post &' +
#                                   ' 2017 National Population Projections',
#                              font=dict(family='Arial',
#                                        size=11,
#                                        color='rgb(150,150,150)'),
#                              showarrow=False))

fig.update_layout(hovermode = 'x unified', annotations=annotations,
                  xaxis_title="", yaxis_title="Deaths/Population * 1M")
fig.show()

username = 'sebasp41'
api_key = 'VsTHn4GoFewKIiND1Ki1'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
py.plot(fig, filename = 'police_fatalities', auto_open=True)
tls.get_embed('https://plotly.com/~sebasp41/1/')

'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~sebasp41/1.embed" height="525" width="100%"></iframe>'

In [None]:
mapbox_key = 'pk.eyJ1Ijoic2ViYXNwNDEiLCJhIjoiY2tpbTFkZzB1MG91MTJycngxdWZjZzg0diJ9.IavpO5Rh-2JRYm0Qp1PKgQ'

final['month_year'] = pd.to_datetime(final['date']).dt.to_period('M')
final['month_year'] = final['month_year'].map(lambda x: x.strftime('%Y-%m'))

map = px.scatter_mapbox(
    data_frame = final, lat="latitude", lon="longitude", color="Persons", zoom=2.5,
    color_discrete_sequence=["#44D2EF", "#5C0A3F", "#ECDA69", "#E87B3F", "#338186"],
    category_orders={'Persons': ["Black alone", "Hispanic origin", "White alone", "Asian alone", "Native alone"]}, opacity=1, width=630, height=450,
    hover_name = 'name',
    hover_data={'latitude':False, 'longitude': False, 'age':True, 'city':True, 'state':True, 'year':True})

map.update_layout(mapbox_style="white-bg", mapbox_accesstoken=mapbox_key, showlegend=False, autosize=False,
                  hoverlabel=dict(bgcolor="white", font_size=12),
                  updatemenus=[go.layout.Updatemenu(
        active=0,
        x=.25,
            xanchor="center",
            y=1.25,
            yanchor="top",
        buttons=list(
            [dict(label = 'All persons',
                  method = 'update',
                  args = [{'visible': [True, True, True, True, True]},
                          {'title': False,
                           'showlegend':False}]),
             dict(label = 'Black persons only',
                  method = 'update',
                  args = [{'visible': [True, False, False, False, False]}, # the index of True aligns with the indices of plot traces
                          {'title': False,
                           'showlegend':False}]),
             dict(label = 'Of Hispanic origin',
                  method = 'update',
                  args = [{'visible': [False, True, False, False, False]},
                          {'title':False,
                           'showlegend':False}]),
             dict(label = 'White persons only',
                  method = 'update',
                  args = [{'visible': [False, False, True, False, False]},
                          {'title':False,
                           'showlegend':False}]),
             dict(label = 'Asian persons only',
                  method = 'update',
                  args = [{'visible': [False, False, False, True, False]},
                          {'title':False,
                           'showlegend':False}]),
             dict(label = 'Native persons only',
                  method = 'update',
                  args = [{'visible': [False, False, False,False, True]},
                          {'title':False,
                           'showlegend':False}]),
            ])
        )
    ])


map.show()

username = 'sebasp41'
api_key = 'VsTHn4GoFewKIiND1Ki1'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
py.plot(map, filename = 'police_fatalities_map', auto_open=True)


'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~sebasp41/4.embed" height="525" width="100%"></iframe>'

In [None]:
final1=final.dropna(subset = ['latitude', 'longitude'])

x = list(final1.longitude)
y = list(final1.latitude)

X = np.array(list(zip(x, y))).reshape(len(x), 2) 

distortions = [] 
inertias = [] 
mapping1 = {} 
mapping2 = {} 
K = range(1,40) 
  
for k in K: 
    #Building and fitting the model 
    kmeanModel = KMeans(n_clusters=k).fit(X) 
    kmeanModel.fit(X)     
      
    distortions.append(sum(np.min(cdist(X, kmeanModel.cluster_centers_, 
                      'euclidean'),axis=1)) / X.shape[0]) 
    inertias.append(kmeanModel.inertia_) 
  
    mapping1[k] = sum(np.min(cdist(X, kmeanModel.cluster_centers_, 
                 'euclidean'),axis=1)) / X.shape[0] 
    mapping2[k] = kmeanModel.inertia_ 

for key,val in mapping1.items(): 
    print(str(key)+' : '+str(val)) 

dist_df = pd.DataFrame(mapping1.items(), columns=['K', 'Distortion'])

In [None]:
elb = px.line(dist_df, x='K', y='Distortion', color_discrete_sequence=["#44D2EF"], width=630, height=450 )
elb.update_traces(mode="markers+lines", hovertemplate=None)

elb.update_layout(
    hoverlabel=dict(
        font_size=12,
    ),
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showline=False,
        showticklabels=False,
    ),
    autosize=False,
    showlegend=False,
    plot_bgcolor='white'
)

elb.update_layout(hovermode = 'x unified', xaxis_title="Values of K", yaxis_title="Distortion")
elb.show()

username = 'sebasp41'
api_key = 'VsTHn4GoFewKIiND1Ki1'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
py.plot(elb, filename = 'elb', auto_open=True)

'https://plotly.com/~sebasp41/94/'

In [None]:
X=pd.DataFrame(X)
kmeanModel = KMeans(n_clusters=8)
kmeanModel.fit(X)
X['k_means'] = kmeanModel.predict(X)

ksc=px.scatter(X, x=0, y=1,color='k_means', template='simple_white', width=630, height=450)
ksc.update_layout(coloraxis_showscale=False)

ksc.update_layout(
    hoverlabel=dict(
        font_size=12,
    ),
    xaxis=dict(
        showline=False,
        showgrid=False,
        showticklabels=False,
        linecolor='rgb(204, 204, 204)',
        linewidth=2,
        ticks='',
        tickfont=dict(
            family='Arial',
            size=16,
            color='rgb(82, 82, 82)',
        ),
    ),
    yaxis=dict(
        ticks = "",
        showgrid=False,
        zeroline=False,
        showline=False,
        showticklabels=False,
    ),
    autosize=False,
    showlegend=False,
    plot_bgcolor='white'
)

ksc.update_layout(hovermode = 'x unified', xaxis_title="Longitude", yaxis_title="Latitude")

ksc.show()

username = 'sebasp41'
api_key = 'VsTHn4GoFewKIiND1Ki1'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)
py.plot(ksc, filename = 'ksc', auto_open=True)

'https://plotly.com/~sebasp41/96/'

Works Cited

Tate, Julie, et al. “Fatal Force.” data-police-shootings, https://raw.githubusercontent.com/washingtonpost/data-police-shootings/master/fatal-police-shootings-data.csv. Accessed daily.

U.S. Census Bureau, Population Division. (2020). 2017 National Population Projections Datasets. https://www2.census.gov/programs-surveys/popproj/datasets/2017/2017-popproj/np2017_d1_mid.csv. Retrieved from https://www.census.gov/data/datasets/2017/demo/popproj/2017-popproj.html

U.S. Census Bureau, Population Division. (2020). 2017 National Population Projections Datasets. https://www2.census.gov/programs-surveys/popproj/technical-documentation/file-layouts/2017/np2017_d1.pdf. Retrieved from https://www.census.gov/data/datasets/2017/demo/popproj/2017-popproj.html

In [None]:


tls.get_embed('https://plotly.com/~sebasp41/1/')
tls.get_embed('https://plotly.com/~sebasp41/4/')
tls.get_embed('https://plotly.com/~sebasp41/94/')
tls.get_embed('https://plotly.com/~sebasp41/96/')

'<iframe id="igraph" scrolling="no" style="border:none;" seamless="seamless" src="https://plotly.com/~sebasp41/96.embed" height="525" width="100%"></iframe>'