In [40]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# plotly
# import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

from sklearn.preprocessing import MinMaxScaler

In [4]:
borough_mapping = pd.DataFrame({
    "borough_name": ["Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island"],
    "boro_code": [1, 2, 3, 4, 5],
    "boro_short1": ["M", "X", "B", "Q", "R"],
    "boro_short2": ["M", "B", "K", "Q", "S"],
    "lat": [40.776676,40.837048,40.650002,40.742054,40.579021],
    "lon": [-73.971321,-73.865433,-73.949997,-73.769417, -74.151535]
})
borough_mapping

Unnamed: 0,borough_name,boro_code,boro_short1,boro_short2,lat,lon
0,Manhattan,1,M,M,40.776676,-73.971321
1,Bronx,2,X,B,40.837048,-73.865433
2,Brooklyn,3,B,K,40.650002,-73.949997
3,Queens,4,Q,Q,40.742054,-73.769417
4,Staten Island,5,R,S,40.579021,-74.151535


In [26]:
nycPopulation = pd.read_csv("../data/New_York_City_Population_by_Borough__1950_-_2040.csv")
nycPopulation = pd.DataFrame(nycPopulation, columns = ['Age Group', 'Borough', '1950', '1960', '1970', '1980', '1990', '2000', '2010', '2020', '2030', '2040'])
nycPopulation = nycPopulation.melt(id_vars=['Age Group', 'Borough'], var_name='Year', value_name='Population')
nycPopulation = nycPopulation[(nycPopulation['Year'] == '2020') & (nycPopulation['Borough'] != 'NYC Total')]
nycPopulation.head(10)

Unnamed: 0,Age Group,Borough,Year,Population
43,Total Population,Bronx,2020,1446788
44,Total Population,Brooklyn,2020,2648452
45,Total Population,Manhattan,2020,1638281
46,Total Population,Queens,2020,2330295
47,Total Population,Staten Island,2020,487155


In [52]:
nyc_air = pd.read_csv("../data/social/NYCgov_Air_Quality.csv")
nyc_air = nyc_air[nyc_air['Name'] == 'Boiler Emissions- Total PM2.5 Emissions']
pm_by_borough = nyc_air[(nyc_air['Geo Type Name'] == 'Borough') & (nyc_air['Time Period'] == '2015')]
pm_by_borough = pm_by_borough[['Geo Place Name', 'Data Value', 'Measure', 'Name']]

scaler = MinMaxScaler()
pm_by_borough['air_scaled'] = scaler.fit_transform(pm_by_borough['Data Value'].values.reshape(-1,1)) 

pm_by_borough.head()

Unnamed: 0,Geo Place Name,Data Value,Measure,Name,air_scaled
6610,Manhattan,3.7,Number per km2,Boiler Emissions- Total PM2.5 Emissions,1.0
6611,Brooklyn,0.1,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.027027
6612,Bronx,0.9,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.243243
6613,Queens,0.2,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.054054
6614,Staten Island,0.0,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.0


In [34]:
nypd_arrests_2022 = pd.read_csv('../data/crime/arrests_2022.csv')
date = nypd_arrests_2022['ARREST_DATE'].str.split("/", n = 3, expand = True)
nypd_arrests_2022['year'] = date[2].astype('int32')
nypd_arrests_2022['day'] = date[1].astype('int32')
nypd_arrests_2022['month'] = date[0].astype('int32')
nypd_arrests_2022 = pd.merge(nypd_arrests_2022, borough_mapping, left_on='ARREST_BORO', right_on='boro_short2')
nypd_arrests_2022_grouped = pd.DataFrame(nypd_arrests_2022.groupby(['borough_name'], as_index=False).agg(
    crimes_committed=pd.NamedAgg(column='ARREST_BORO', aggfunc='count'),
))
nypd_arrests_2022_grouped = pd.merge(nypd_arrests_2022_grouped, nycPopulation, left_on='borough_name', right_on='Borough')
#nypd_arrests_2022_grouped = nypd_arrests_2022_grouped['borough_name', 'crimes_committed', 'Population']
nypd_arrests_2022_grouped.drop(['Age Group', 'Borough', 'Year'], axis=1, inplace=True)
nypd_arrests_2022_grouped.head()

Unnamed: 0,borough_name,crimes_committed,Population
0,Bronx,44084,1446788
1,Brooklyn,50961,2648452
2,Manhattan,46807,1638281
3,Queens,39547,2330295
4,Staten Island,8375,487155


In [50]:
# divide the number of crimes committed in an area by the population, then multiply by 100,000
nypd_arrests_2022_grouped['crime_rate'] = (nypd_arrests_2022_grouped['crimes_committed'] / nypd_arrests_2022_grouped['Population']) * 100000

scaler = MinMaxScaler()
nypd_arrests_2022_grouped['crime_rate_scaled'] = scaler.fit_transform(nypd_arrests_2022_grouped['crime_rate'].values.reshape(-1,1)) 
nypd_arrests_2022_grouped.head()

Unnamed: 0,borough_name,crimes_committed,Population,crime_rate,crime_rate_scaled
0,Bronx,44084,1446788,3047.025549,1.0
1,Brooklyn,50961,2648452,1924.180616,0.168229
2,Manhattan,46807,1638281,2857.080074,0.859294
3,Queens,39547,2330295,1697.081271,0.0
4,Staten Island,8375,487155,1719.165358,0.016359


In [55]:
radar_data = pd.merge(nypd_arrests_2022_grouped, pm_by_borough, left_on='borough_name', right_on='Geo Place Name')
radar_data_bronx = radar_data[radar_data['borough_name'] == 'Bronx']
radar_data_bronx = radar_data_bronx[['borough_name', 'crime_rate_scaled', 'air_scaled']]
radar_data.head()

Unnamed: 0,borough_name,crimes_committed,Population,crime_rate,crime_rate_scaled,Geo Place Name,Data Value,Measure,Name,air_scaled
0,Bronx,44084,1446788,3047.025549,1.0,Bronx,0.9,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.243243
1,Brooklyn,50961,2648452,1924.180616,0.168229,Brooklyn,0.1,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.027027
2,Manhattan,46807,1638281,2857.080074,0.859294,Manhattan,3.7,Number per km2,Boiler Emissions- Total PM2.5 Emissions,1.0
3,Queens,39547,2330295,1697.081271,0.0,Queens,0.2,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.054054
4,Staten Island,8375,487155,1719.165358,0.016359,Staten Island,0.0,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.0


In [56]:
categories = ['Crime Rate', 'Poverty', 'Education', 'Traffic', 'Housing']

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=[
        0.243243, 1, 2, 2, 3
      ],
      theta=categories,
      fill='toself',
      name='Bronx'
))
fig.add_trace(go.Scatterpolar(
      r=[4, 3, 2.5, 1, 2],
      theta=categories,
      fill='toself',
      name='Brooklyn'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 5]
    )),
  showlegend=True
)

fig.show()