In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import altair as alt
import squarify
import geopandas as gpd
import json
from collections import defaultdict
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
data_2022 = pd.read_csv("Data/2022.csv", usecols=['INCIDENT_NUMBER', 'DISTRICT', 'CATEGORY'], dtype={'INCIDENT_NUMBER':str, 'DISTRICT':str, 'CATEGORY':str})
rox_2022 = data_2022.loc[data_2022['DISTRICT'] == 'B2']
fen_2022 = data_2022.loc[data_2022['DISTRICT'] == 'D4']
downtown_2022 = data_2022.loc[data_2022['DISTRICT'] == 'A1']
df_rox_2022 = rox_2022.groupby(['CATEGORY'])['INCIDENT_NUMBER'].count()
df_rox_2022 = df_rox_2022.sort_values(ascending=False)
df_rox_2022 = df_rox_2022.head(10)
rox_values_2022_sorted = squarify.normalize_sizes(df_rox_2022, 1000, 1000)
df_fen_2022 = fen_2022.groupby(['CATEGORY'])['INCIDENT_NUMBER'].count()
df_fen_2022_sorted = df_fen_2022.sort_values(ascending=False)
df_fen_2022 = df_fen_2022.head(10)
fenway_values_2022_sorted = squarify.normalize_sizes(df_fen_2022, 1000, 1000)
df_downtown_2022 = downtown_2022.groupby(['CATEGORY'])['INCIDENT_NUMBER'].count()
df_downtown_2022_sorted = df_downtown_2022.sort_values(ascending=False)
df_downtown_2022 = df_downtown_2022.head(10)
downtown_values_2022_sorted = squarify.normalize_sizes(df_downtown_2022, 1000, 1000)

In [4]:
rox_output_json = pd.DataFrame(squarify.padded_squarify(sizes=rox_values_2022_sorted, x=0, y=0, dx=1000, dy=1000))
rox_output_json.insert(0, "Incident Count", df_rox_2022.array)
rox_output_json.insert(0, "Category", df_rox_2022.keys())
rox_output_json["x2"] = rox_output_json["x"] + rox_output_json["dx"]
rox_output_json["y2"] = rox_output_json["y"] + rox_output_json["dy"]

fen_output_json = pd.DataFrame(squarify.padded_squarify(sizes=fenway_values_2022_sorted, x=0, y=0, dx=1000, dy=1000))
fen_output_json.insert(0, "Incident Count", df_fen_2022.array)
fen_output_json.insert(0, "Category", df_fen_2022.keys())
fen_output_json["x2"] = fen_output_json["x"] + fen_output_json["dx"]
fen_output_json["y2"] = fen_output_json["y"] + fen_output_json["dy"]

downtown_output_json = pd.DataFrame(squarify.padded_squarify(sizes=downtown_values_2022_sorted, x=0, y=0, dx=1000, dy=1000))
downtown_output_json.insert(0, "Incident Count", df_downtown_2022.array)
downtown_output_json.insert(0, "Category", df_downtown_2022.keys())
downtown_output_json["x2"] = downtown_output_json["x"] + downtown_output_json["dx"]
downtown_output_json["y2"] = downtown_output_json["y"] + downtown_output_json["dy"]


# Treemap

### Roxbury

In [10]:
roxbury_treemap = alt.Chart(rox_output_json).mark_rect().encode(
    x=alt.X('x:Q', axis=None),
    x2='x2:Q',
    y=alt.Y('y:Q', axis=None),
    y2='y2:Q',
    color=alt.Color('Incident Count:Q', scale=alt.Scale(scheme="blues")),
    tooltip=['Category:N', 'Incident Count:Q']
).properties(
    title='Ten Most Numerous Criminal Incidents, Roxbury 2023',
    width=300,
    height=300
)
roxbury_treemap.save('treemap_roxbury.html');

### Fenway

In [11]:
fenway_treemap = alt.Chart(fen_output_json).mark_rect().encode(
    x=alt.X('x:Q', axis=None),
    x2='x2:Q',
    y=alt.Y('y:Q', axis=None),
    y2='y2:Q',
    color=alt.Color('Incident Count:Q', scale=alt.Scale(scheme="reds")),
    tooltip=['Category:N', 'Incident Count:Q']
).properties(
    title='Ten Most Numerous Criminal Incidents, Fenway 2023',
    width=300,
    height=300
)

fenway_treemap.save('treemap_fenway.html')
fenway_treemap

### Downtown

In [12]:
downtown_treemap = alt.Chart(downtown_output_json).mark_rect().encode(
    x=alt.X('x:Q', axis=None),
    x2='x2:Q',
    y=alt.Y('y:Q', axis=None),
    y2='y2:Q',
    color=alt.Color('Incident Count:Q', scale=alt.Scale(scheme="greens")),
    tooltip=['Category:N', 'Incident Count:Q']
).properties(
    title='Ten Most Numerous Criminal Incidents, Downtown 2023',
    width=300,
    height=300
)
downtown_treemap.save('treemap_downtown.html')
downtown_treemap

# Choropleth Map + Bar Chart

In [16]:
capital_plan = pd.read_excel('Data/capital_data.xlsx')
boston_map = gpd.read_file("Data/city-council-districts.geojson")

single_selection = alt.selection_multi()

## bugfix taken from https://github.com/altair-viz/altair/issues/1612
boston_map.crs = {'init' :'epsg:27700'}
boston_map = boston_map.to_crs({'init': 'epsg:4326'})
choro_json = json.loads(boston_map.to_json())
boston_chart = alt.Chart(boston_map).mark_geoshape(
    strokeWidth=1,
    fill='none',
    fillOpacity=0,
    stroke='black'
).encode().properties(
    width=800,
    height=800
)

neighborhoods_map = gpd.read_file("Data/boston-neighborhoods.geojson");
neighborhoods_map["Neighborhood"] = neighborhoods_map["Name"]
all_data = neighborhoods_map.merge(capital_plan, how = "left", on="Neighborhood")

all_data.crs = {'init' :'epsg:27700'}
all_data = all_data.to_crs({'init': 'epsg:4326'})
choro_json = json.loads(all_data.to_json())
neighborhoods_chart = alt.Chart(all_data
).mark_geoshape(
    stroke='black',
    strokeWidth=.2,
).encode(
    color=alt.condition(single_selection, 'Funding:Q', alt.value('lightgray'), scale=alt.Scale(scheme = 'plasma')),
    tooltip = ['Name:N', alt.Tooltip('Funding:Q', format="$,.2f")]
).properties(
    width=500,
    height=500
).add_selection(
    single_selection
);

base = alt.Chart(all_data).mark_bar().encode(
    x='Neighborhood',
    y='Funding:Q',
).encode(
    color=alt.condition(single_selection, 'Funding:Q', alt.value('lightgray'), scale=alt.Scale(scheme = 'plasma')),
    tooltip=alt.Tooltip("Funding", format="$,.2f"),
).properties(
    width=300,
    height=300
).add_selection(single_selection);

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [17]:
alt.hconcat(neighborhoods_chart, base).properties(title="Boston Districts by Total Budget").save('vis2.html')

# Line Chart + Grouped Bar Chart

In [12]:
crime = pd.read_excel("Data/crime_changes.xlsx")
crime = crime.head(63)
crime = crime.dropna(1, how = "any")

selector = alt.selection_interval(encodings=['x'])

base = alt.Chart(crime).mark_line().encode(
    x = "Year:O",
    y = alt.Y("Count", axis = alt.Axis(title = "Number of calls")),
    color = alt.Color("Offense"),
).properties(
    title = "Number of calls made to Boston Police in Roxbury by offense, 2015-2021",
    width = 400
            ).transform_filter(selector)

scatter = alt.Chart(crime).mark_point().encode(
    x = "Year:O",
    y = "Count",
    color = alt.Color("Offense", legend=alt.Legend(
        orient='left',
        direction='vertical',
        titleAnchor='middle'))
).properties(
    width = 400).add_selection(selector)

bar = alt.Chart(crime, title = "Percentage change in offense type year-to-year").mark_bar().encode(
    x = "Year:O",
    y = alt.Y("Year-to-year percentage change", axis = alt.Axis(title = "Year-to-year percentage change (%)"), scale = alt.Scale(domain=(-100, 500))),
    column = "Offense",
    color = "Offense",
    tooltip = ["Year", "Year-to-year percentage change"]
).transform_filter(selector).properties(
    width = 68)


alt.vconcat(base + scatter, bar).save('vis3.html')

  crime = crime.dropna(1, how = "any")
