# Mass shooting frequency

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [6]:
import pandas as pd
import geopandas as gpd
import numpy as np
import altair as alt
import altair_stiles as altstiles
import us

In [60]:
import altair as alt
from vega_datasets import data

df = data.la_riots()

n = alt.topo_feature(
    "https://gist.githubusercontent.com/irisslee/70039051188dac8f64e14182b5a459a9/raw/2412c45551cff577f7b10604ca523bd3f4dd31d3/countytopo.json",
    "county",
)

LAbasemap = (
    alt.Chart(n)
    .mark_geoshape(fill="lightgray", stroke="white")
    .properties(width=400, height=400)
    .project("mercator")
)

points = (
    alt.Chart()
    .mark_circle()
    .encode(
        longitude="longitude:Q",
        latitude="latitude:Q",
        size=alt.value(15),
        color="gender:N",
    )
)

alt.layer(LAbasemap, points, data=df).facet("gender:N")

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [5]:
today = dt.date.today().strftime("%Y-%m-%d")

---

In [9]:
incident_src = (
    pd.read_csv(
        "data/processed/mass_shootings_involving_deaths.csv",
        dtype={"incident_id": str},
        parse_dates=["incident_date"],
    )
    .drop(
        [
            "address",
            "state",
        ],
        axis=1,
    )
    .drop_duplicates(subset="incident_id", keep="last")
)

In [37]:
mass_killings = incident_src[
    (incident_src["no_killed"] >= 3) & (incident_src["incident_date"] > "2013-01-01")
].copy()

In [38]:
mass_killings["month"] = mass_killings["incident_date"].dt.month_name()
mass_killings["year"] = mass_killings["incident_date"].dt.year
mass_killings["day"] = mass_killings["incident_date"].dt.day_name()

In [41]:
mass_killings_grouped = (
    mass_killings.groupby(["year"])
    .agg(
        {
            "incident_id": "size",
            "no_killed": sum,
            "no_injured": sum,
            "total": sum,
        }
    )
    .round()
    .reset_index()
    .sort_values(["year", "no_killed"], ascending=False)
    .rename(columns={"incident_id": "count"})
)

In [49]:
mass_killings_grouped["deaths_per_incident"] = (
    mass_killings_grouped["no_killed"] / mass_killings_grouped["count"]
).round(2)

In [50]:
mass_killings_grouped

Unnamed: 0,year,count,no_killed,no_injured,total,deaths_per_incident
9,2022,18,99,47,146,5.5
8,2021,77,306,173,479,3.97
7,2020,47,185,63,248,3.94
6,2019,54,255,172,427,4.72
5,2018,48,217,110,327,4.52
4,2017,45,255,517,772,5.67
3,2016,54,260,142,402,4.81
2,2015,46,212,112,324,4.61
1,2014,40,155,65,220,3.88
0,2013,38,161,54,215,4.24


In [58]:
alt.Chart(mass_killings_grouped).mark_bar().encode(
    x="year:O", y="no_killed"
).properties(width=650)

In [59]:
alt.Chart(mass_killings_grouped).mark_bar().encode(
    x="year:O", y="deaths_per_incident"
).properties(width=650)