# Assignment 2

## Formalia:

Please read the [assignment overview page](https://github.com/suneman/socialdata2025/wiki/Assignments) carefully before proceeding. This page contains information about formatting (including formats etc.), group sizes, and many other aspects of handing in the assignment. 

_If you fail to follow these simple instructions, it will negatively impact your grade!_

**Due date and time**: 
 - The assignment is due on Monday March 31st, 2025 at 23:55. 
 - Hand via DTU Learn. 
 - You should simply hand in the link to the github page with your short data story.

## A2: A short data story

This assignment is to create a short data-story based on the work we've done in class so far. See **Exercises Week 8, Part 2** for full details on how the story should be constructed.

In [147]:
# packages
import utils
from bokeh.plotting import show, output_file
from bokeh.models import ColumnDataSource, FactorRange
import matplotlib.pyplot as plt
from urllib.request import urlopen
import json
import plotly.express as px
import plotly.io as pio

In [148]:
df = utils.get_df_focused_crimes()
df_burglary = df[(df["Category"] == "BURGLARY")]
df_burglary = df_burglary[df_burglary["Year"] != 2025] # remove 2025

In [149]:
df_yrs = df_burglary.value_counts("Year")
df_yrs = df_yrs.sort_index()
df_yrs.head()

plt.style.use('ggplot')
plt.rcParams["font.family"] = "monospace"
plt.figure(figsize=(12, 6))  # Set figure size

df_yrs.plot(kind="bar", title="Burglary incidents reported in San Fransisco\n2003 - 2024")
plt.xlabel("Year", fontsize=12)
plt.ylabel("Number of Incidents", fontsize=12)
#plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()





In [150]:
df_yrs



In [151]:
# reading the geojson file
with urlopen('https://raw.githubusercontent.com/suneman/socialdata2025/main/files/sfpd.geojson') as response:
    data = json.load(response)

In [175]:
import os
from uuid import uuid4  # To generated a new random directory for each notebook

import plotly.io as pio
from plotly.io._base_renderers import IFrameRenderer

pio.renderers["custom"] = IFrameRenderer(
    html_directory=os.path.join("../assets/notebooks", "html_" + str(uuid4())),
    include_plotlyjs="cdn",
)

pio.renderers.default = "custom"

In [152]:
# Filter dataset for relevant years
df_burglary_sorted = df_burglary.sort_values("FullDate")
df_aggregated = df_burglary_sorted[df_burglary_sorted["Year"] >= 2019]

# Get unique years
years = sorted(df_aggregated["Year"].unique())

# Compute baseline (2019) crime counts
df_initial = df_aggregated[df_aggregated["Year"] == 2019].groupby('PdDistrict').size().reset_index(name='Count')
district_baseline = dict(zip(df_initial['PdDistrict'], df_initial['Count']))

# Function to calculate percentage change
def calculate_percentage_change(df, baseline):
    df["Percentage_Change"] = df.apply(lambda row: ((row['Count'] - baseline.get(row['PdDistrict'], 1)) / baseline.get(row['PdDistrict'], 1)) * 100, axis=1)
    return df

# Initial 2019 visualization (set as 0% change for reference)
df_initial["Percentage_Change"] = 0
range_vals = (-100, 100)  # Set fixed color range for consistency

fig = px.choropleth_map(df_initial, geojson=data, locations='PdDistrict',
                           color='Percentage_Change', color_continuous_scale=px.colors.diverging.RdYlGn_r,
                           range_color=range_vals, map_style="carto-positron",
                           zoom=10.5, center={"lat": 37.7749, "lon": -122.4194},
                           opacity=0.5, labels={'Percentage_Change': "Percentile change in crime", 'PdDistrict': 'District'},
                           height=800, width=800)

# Add slider for year selection
steps = []
for year in years:
    df_year = df_aggregated[df_aggregated["Year"] == year].groupby('PdDistrict').size().reset_index(name='Count')
    df_year = calculate_percentage_change(df_year, district_baseline)
    
    steps.append(
        dict(
            method="update",
            args=[{"z": [df_year['Percentage_Change']], "locations": [df_year['PdDistrict']]}],
            label=str(year)
        )
    )

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Year: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    font_family="monospace",
    sliders=sliders,
    margin={"r": 10, "t": 100, "l": 10, "b": 10},
    title={
        'text': "Burglaries in San Franciscos districts <br> Calculated as percentile change from 2018",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    }
)

fig.update_coloraxes(
    colorbar=dict(
        title="% Change in Crime", 
        tickvals=[-100, 0, 100], 
        ticktext=["Decrease", "No Change", "Increase"]
    )
)

fig.show()
# Save the figure as an HTML file
pio.write_html(fig, "sf-districts.html")




In [153]:
df_burglary.head()



In [154]:
df_year = df_burglary[(df_burglary['Year'] >= 2019) & (df_burglary['Year'] <= 2024)]

df_grouped = df_year.groupby(['Year', 'PdDistrict']).size().unstack()
df_grouped.head()



In [155]:
df_normalized = df_grouped.div(df_grouped.sum(), axis=1)
df_normalized.head()



In [156]:
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Legend
from bokeh.io import output_notebook
from bokeh.transform import dodge
from bokeh.palettes import Spectral10

In [157]:
df_normalized["year"] = df_normalized.index
df_normalized




In [158]:
source = ColumnDataSource(df_normalized)
districts = list(df_normalized.columns[:-1])
years = df_normalized["year"].astype(str).tolist()
print(districts)
print(*years)

# on the x-axis I want years. 2019 to 2024
# on the y-axis I want the values for each district. Normalized
# on the button thingy I want to have each district



In [173]:
# Ensure the 'year' column is a string
df_normalized["year"] = df_normalized["year"].astype(str)

# Create the ColumnDataSource
source = ColumnDataSource(df_normalized)

# Create the plot
p = figure(height=350, x_range=FactorRange(*years),  # Ensure years are strings
           title="Burglaries in SF post corona - divided by districts", toolbar_location=None, tools="") 

# Add legend
p.add_layout(Legend(), 'right')
p.legend.label_text_font = 'monospace'
p.legend.title_text_font = 'monospace'
p.legend.click_policy = 'mute'
p.legend.title = 'District'

# Add vbars for each district
for district, color in zip(districts, Spectral10):
    p.vbar(x='year', top=district, width=0.8, source=source,
            color=color,
            legend_label=district,
            alpha=0.6,
            muted=True,
            muted_alpha=0)

p.y_range.start = 0
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Year"
p.xaxis.major_label_text_font = 'monospace'
p.xaxis.axis_label_text_font = 'monospace'

p.outline_line_color = None
p.yaxis.axis_label = "(Normalized) crime count"
p.yaxis.major_label_text_font = 'monospace'
p.yaxis.axis_label_text_font = 'monospace'

p.title.text_font = 'monospace'

#output_notebook()  # Only show it in the notebook
output_file("bokeh_plot.html")
show(p)



