In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# plotly
# import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

from sklearn.preprocessing import MinMaxScaler

In [2]:
borough_mapping = pd.DataFrame({
    "borough_name": ["Manhattan", "Bronx", "Brooklyn", "Queens", "Staten Island"],
    "boro_code": [1, 2, 3, 4, 5],
    "boro_short1": ["M", "X", "B", "Q", "R"],
    "boro_short2": ["M", "B", "K", "Q", "S"],
    "lat": [40.776676,40.837048,40.650002,40.742054,40.579021],
    "lon": [-73.971321,-73.865433,-73.949997,-73.769417, -74.151535]
})
borough_mapping

Unnamed: 0,borough_name,boro_code,boro_short1,boro_short2,lat,lon
0,Manhattan,1,M,M,40.776676,-73.971321
1,Bronx,2,X,B,40.837048,-73.865433
2,Brooklyn,3,B,K,40.650002,-73.949997
3,Queens,4,Q,Q,40.742054,-73.769417
4,Staten Island,5,R,S,40.579021,-74.151535


In [3]:
nycPopulation = pd.read_csv("../data/New_York_City_Population_by_Borough__1950_-_2040.csv")
nycPopulation = pd.DataFrame(nycPopulation, columns = ['Age Group', 'Borough', '1950', '1960', '1970', '1980', '1990', '2000', '2010', '2020', '2030', '2040'])
nycPopulation = nycPopulation.melt(id_vars=['Age Group', 'Borough'], var_name='Year', value_name='Population')
nycPopulation = nycPopulation[(nycPopulation['Year'] == '2020') & (nycPopulation['Borough'] != 'NYC Total')]
nycPopulation.head(10)

Unnamed: 0,Age Group,Borough,Year,Population
43,Total Population,Bronx,2020,1446788
44,Total Population,Brooklyn,2020,2648452
45,Total Population,Manhattan,2020,1638281
46,Total Population,Queens,2020,2330295
47,Total Population,Staten Island,2020,487155


In [4]:
nyc_air = pd.read_csv("../data/social/NYCgov_Air_Quality.csv")
nyc_air = nyc_air[nyc_air['Name'] == 'Boiler Emissions- Total PM2.5 Emissions']
pm_by_borough = nyc_air[(nyc_air['Geo Type Name'] == 'Borough') & (nyc_air['Time Period'] == '2015')]
pm_by_borough = pm_by_borough[['Geo Place Name', 'Data Value', 'Measure', 'Name']]

scaler = MinMaxScaler()
pm_by_borough['air_scaled'] = scaler.fit_transform(pm_by_borough['Data Value'].values.reshape(-1,1)) 

pm_by_borough.head()

Unnamed: 0,Geo Place Name,Data Value,Measure,Name,air_scaled
6610,Manhattan,3.7,Number per km2,Boiler Emissions- Total PM2.5 Emissions,1.0
6611,Brooklyn,0.1,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.027027
6612,Bronx,0.9,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.243243
6613,Queens,0.2,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.054054
6614,Staten Island,0.0,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.0


In [5]:
nypd_arrests_2022 = pd.read_csv('../data/crime/arrests_2022.csv')
date = nypd_arrests_2022['ARREST_DATE'].str.split("/", n = 3, expand = True)
nypd_arrests_2022['year'] = date[2].astype('int32')
nypd_arrests_2022['day'] = date[1].astype('int32')
nypd_arrests_2022['month'] = date[0].astype('int32')
nypd_arrests_2022 = pd.merge(nypd_arrests_2022, borough_mapping, left_on='ARREST_BORO', right_on='boro_short2')
nypd_arrests_2022_grouped = pd.DataFrame(nypd_arrests_2022.groupby(['borough_name'], as_index=False).agg(
    crimes_committed=pd.NamedAgg(column='ARREST_BORO', aggfunc='count'),
))
nypd_arrests_2022_grouped = pd.merge(nypd_arrests_2022_grouped, nycPopulation, left_on='borough_name', right_on='Borough')
#nypd_arrests_2022_grouped = nypd_arrests_2022_grouped['borough_name', 'crimes_committed', 'Population']
nypd_arrests_2022_grouped.drop(['Age Group', 'Borough', 'Year'], axis=1, inplace=True)
nypd_arrests_2022_grouped.head()

Unnamed: 0,borough_name,crimes_committed,Population
0,Bronx,44084,1446788
1,Brooklyn,50961,2648452
2,Manhattan,46807,1638281
3,Queens,39547,2330295
4,Staten Island,8375,487155


In [6]:
# divide the number of crimes committed in an area by the population, then multiply by 100,000
nypd_arrests_2022_grouped['crime_rate'] = (nypd_arrests_2022_grouped['crimes_committed'] / nypd_arrests_2022_grouped['Population']) * 100000

scaler = MinMaxScaler()
nypd_arrests_2022_grouped['crime_rate_scaled'] = scaler.fit_transform(nypd_arrests_2022_grouped['crime_rate'].values.reshape(-1,1)) 
nypd_arrests_2022_grouped.head()

Unnamed: 0,borough_name,crimes_committed,Population,crime_rate,crime_rate_scaled
0,Bronx,44084,1446788,3047.025549,1.0
1,Brooklyn,50961,2648452,1924.180616,0.168229
2,Manhattan,46807,1638281,2857.080074,0.859294
3,Queens,39547,2330295,1697.081271,0.0
4,Staten Island,8375,487155,1719.165358,0.016359


In [7]:
radar_data = pd.merge(nypd_arrests_2022_grouped, pm_by_borough, left_on='borough_name', right_on='Geo Place Name')
radar_data_bronx = radar_data[radar_data['borough_name'] == 'Bronx']
radar_data_bronx = radar_data_bronx[['borough_name', 'crime_rate_scaled', 'air_scaled']]
radar_data.head()

Unnamed: 0,borough_name,crimes_committed,Population,crime_rate,crime_rate_scaled,Geo Place Name,Data Value,Measure,Name,air_scaled
0,Bronx,44084,1446788,3047.025549,1.0,Bronx,0.9,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.243243
1,Brooklyn,50961,2648452,1924.180616,0.168229,Brooklyn,0.1,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.027027
2,Manhattan,46807,1638281,2857.080074,0.859294,Manhattan,3.7,Number per km2,Boiler Emissions- Total PM2.5 Emissions,1.0
3,Queens,39547,2330295,1697.081271,0.0,Queens,0.2,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.054054
4,Staten Island,8375,487155,1719.165358,0.016359,Staten Island,0.0,Number per km2,Boiler Emissions- Total PM2.5 Emissions,0.0


In [8]:
boro_indicators = pd.read_csv('../data/social/boro_cd_attributes.csv')
nyc_indicators = pd.read_csv('../data/social/city_cd_attributes.csv')
nyc_indicators['borough'] = 'New York City'
indicators = pd.concat([boro_indicators, nyc_indicators]).reset_index(drop=True)
indicators.head(6)

Unnamed: 0,borough,under18_rate,moe_under18_rate,over65_rate,moe_over65_rate,lep_rate,moe_lep_rate,pct_hh_rent_burd,moe_hh_rent_burd,poverty_rate,pct_bach_deg,moe_bach_deg,unemployment,moe_unemployment,mean_commute,moe_mean_commute,pct_clean_strts,crime_count,crime_per_1000,moe_over65
0,Bronx,25.1,0.1,12.1,0.1,26.0,0.3,50.9,0.6,27.9,19.8,0.3,6.3,0.2,44.8,0.3,88.3,18859,13.1,
1,Brooklyn,23.1,,13.2,0.1,22.3,0.3,45.4,0.4,20.4,36.5,0.3,4.4,0.1,42.6,0.2,93.2,26514,10.2,
2,Queens,20.3,,14.8,,29.1,0.3,46.2,0.5,17.3,31.5,0.3,4.0,0.1,44.1,0.2,98.3,18526,8.1,
3,Manhattan,14.4,0.1,15.8,0.1,15.3,0.3,36.4,0.5,14.1,60.8,0.4,3.8,0.1,32.1,0.2,97.0,26271,16.1,
4,Staten Island,22.0,0.1,15.5,0.1,10.9,0.4,46.5,1.7,15.3,33.0,0.7,2.8,0.2,44.5,0.6,99.7,2308,4.9,
5,New York City,20.9,0.1,14.1,,22.8,0.1,44.2,0.3,19.7,37.4,0.2,4.4,0.1,41.2,0.1,95.0,92480,11.0,0.1


In [9]:
legend = {
    'under18_rate': 'Age under 18', 
    'over65_rate': 'Age 65 & Over',
    'lep_rate': 'Limited English Proficiency', 
    'pct_hh_rent_burd': 'Rent Burdened',
    'poverty_rate': 'Poverty Rate',
    'unemployment': 'Unemployment Rate',
    'crime_per_1000': 'Crime Rate',
    }
indicators.rename(columns=legend, inplace=True)

In [10]:
categories = [
    'Age under 18', 
    'Age 65 & Over',
    'Limited English Proficiency', 
    'Rent Burdened',
    'Poverty Rate',
    'Unemployment Rate',
    'Crime Rate',
]

fig = go.Figure()

for i in range(0, len(indicators)):
  fig.add_trace(go.Scatterpolar(
      r=indicators[categories].iloc[i].values,
      theta=categories,
      fill='toself',
      name=indicators['borough'][i],
      
  mode='markers'
))
  


fig.update_layout(
  #polar=dict(
  #  radialaxis=dict(
  #    visible=True,
  #    range=[0, 50]
  #  )),
  showlegend=True,
  hovermode="x unified",
)

fig.show()

In [28]:
categories = [
    'Age under 18', 
    'Age 65 & Over',
    'Limited English Proficiency', 
    'Rent Burdened',
    'Poverty Rate',
    'Unemployment Rate',
    'Crime Rate',
]

fig = go.Figure()

fig.add_trace(go.Bar(
    y=indicators['Age under 18'],
    x=indicators['borough'],
    text=indicators['Age under 18'],
))

fig.update_layout(
    title='Category Title',
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Percentage %',
        titlefont_size=16,
        tickfont_size=14,
    ),
    barmode='group',
    bargap=0.0, # gap between bars of adjacent location coordinates.
    bargroupgap=0.0 # gap between bars of the same location coordinate.
)
  
#fig.update_traces(width=.3)

fig.update_layout(
  #polar=dict(
  #  radialaxis=dict(
  #    visible=True,
  #    range=[0, 50]
  #  )),
  showlegend=False,
  #hovermode="y unified",
)

fig.show()

In [29]:
clickData = {'points': [{'curveNumber': 5, 'pointNumber': 0, 'pointIndex': 0, 'r': 20.9, 'theta': 'Age under 18', 'bbox': {'x0': 569.56, 'x1': 575.56, 'y0': 1034, 'y1': 1040}}]}

fig_radar_bar = go.Figure(go.Bar())
#print("clickData: ", clickData)
selected_indicator = clickData['points'][0]['theta']
print("selected_indicator: ", selected_indicator)
fig_radar_bar.add_trace(go.Bar(
    y=indicators[selected_indicator],
    x=indicators['borough'],
    text=indicators[selected_indicator],
))
fig_radar_bar.update_layout(
    title=selected_indicator,
    xaxis_tickfont_size=14,
    yaxis=dict(
        title='Percentage %',
        titlefont_size=16,
        tickfont_size=14,
    ),
    barmode='group',
    bargap=0.0, # gap between bars of adjacent location coordinates.
    bargroupgap=0.0 # gap between bars of the same location coordinate.
)
fig_radar_bar.update_layout(
showlegend=False,
#hovermode="y unified",
)
fig_radar_bar.show()

selected_indicator:  Age under 18
