# Shootings

#### Load python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_stiles as altstiles
import glob
from pathlib import Path
import numpy as np

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [5]:
src = pd.read_csv(
    "data/raw/gun-violence-archive-children.csv",
    dtype={"Incident ID": str},
    parse_dates=["Incident Date"],
)

In [6]:
src.head()

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,# Killed,# Injured,Operations
0,2310278,2022-05-24,Texas,Uvalde,715 Old Carrizo Rd,13,2,
1,2308129,2022-05-21,Maine,Wells,97 Crediford Rd,1,2,
2,2307608,2022-05-20,North Carolina,Roanoke Rapids,100 block of Delta Dr,1,0,
3,2309465,2022-05-20,Minnesota,Mound,Shoreline Dr and Bartlett Blvd,1,0,
4,2306421,2022-05-19,Texas,Houston,9721 Cypresswood Dr,4,0,


In [7]:
src.columns = (
    src.columns.str.replace(" ", "_", regex=False)
    .str.replace("#", "no", regex=False)
    .str.lower()
)

In [8]:
src.head()

Unnamed: 0,incident_id,incident_date,state,city_or_county,address,no_killed,no_injured,operations
0,2310278,2022-05-24,Texas,Uvalde,715 Old Carrizo Rd,13,2,
1,2308129,2022-05-21,Maine,Wells,97 Crediford Rd,1,2,
2,2307608,2022-05-20,North Carolina,Roanoke Rapids,100 block of Delta Dr,1,0,
3,2309465,2022-05-20,Minnesota,Mound,Shoreline Dr and Bartlett Blvd,1,0,
4,2306421,2022-05-19,Texas,Houston,9721 Cypresswood Dr,4,0,


---

In [9]:
len(src)

1824

In [10]:
src.no_killed.sum()

3003

In [11]:
src[src["state"] == "Rhode Island"]

Unnamed: 0,incident_id,incident_date,state,city_or_county,address,no_killed,no_injured,operations


In [12]:
src.no_injured.sum()

479

In [13]:
src["year"] = pd.to_datetime(src["incident_date"]).dt.strftime("%Y")
src["month_year"] = pd.to_datetime(src["incident_date"]).dt.strftime("%m-%Y")

---

In [14]:
pop = pd.read_html(
    "https://www.childrensdefense.org/policy/resources/soac-2020-child-population-tables/"
)[0]

In [15]:
pop = pop[[0, 1, 2]].drop([0, 1, 53], axis=0).reset_index(drop=True)

In [16]:
pop.columns = ["state", "under_five", "under_18"]

In [17]:
pop.head(10)

Unnamed: 0,state,under_five,under_18
0,Alabama,293203,1089840
1,Alaska,53115,183816
2,Arizona,435936,1642657
3,Arkansas,190343,703180
4,California,2441300,8989955
5,Colorado,336854,1265235
6,Connecticut,183134,735193
7,Delaware,54811,203616
8,District of Columbia,45617,127494
9,Florida,1143183,4229081


In [18]:
len(pop)

51

---

In [19]:
state = (
    src.groupby(["state"])
    .agg({"incident_id": "count"})
    .reset_index()
    .rename(columns={"incident_id": "count"})
    .sort_values("count", ascending=False)
)

In [20]:
state.head()

Unnamed: 0,state,count
42,Texas,176
9,Florida,105
35,Ohio,96
25,Missouri,93
10,Georgia,91


In [21]:
len(state)

50

In [22]:
states = state.merge(pop, on="state")

#### AP States

In [23]:
ap_states = {
    "Alabama": "Ala.",
    "Alaska": "Alaska",
    "Arizona": "Ariz.",
    "Arkansas": "Ark.",
    "California": "Calif.",
    "Colorado": "Colo.",
    "Connecticut": "Conn.",
    "Delaware": "Del.",
    "Florida": "Fla.",
    "Georgia": "Ga.",
    "Hawaii": "Hawaii",
    "Idaho": "Iowa",
    "Illinois": "Idaho",
    "Indiana": "Ill.",
    "Iowa": "Ind.",
    "Kansas": "Kan.",
    "Kentucky": "Ky.",
    "Louisiana": "La.",
    "Maine": "Md.",
    "Maryland": "Mass.",
    "Massachusetts": "Maine",
    "Michigan": "Mich.",
    "Minnesota": "Minn.",
    "Mississippi": "Miss.",
    "Missouri": "Mo.",
    "Montana": "Mont.",
    "Nebraska": "Neb.",
    "Nevada": "Nev.",
    "New Hampshire": "N.H.",
    "New Jersey": "N.J.",
    "New Mexico": "N.M.",
    "New York": "N.Y.",
    "North Carolina": "N.C.",
    "North Dakota": "N.D.",
    "Ohio": "Ohio",
    "Oklahoma": "Okla.",
    "Oregon": "Ore.",
    "Pennsylvania": "Pa.",
    "Rhode Island": "R.I.",
    "South Carolina": "S.C.",
    "South Dakota": "S.D.",
    "Tennessee": "Tenn.",
    "Texas": "Texas",
    "Utah": "Utah",
    "Vermont": "Vt.",
    "Virginia": "Va.",
    "Washington": "Wash.",
    "West Virginia": "W.Va.",
    "Wisconsin": "Wis.",
    "Wyoming": "Wyo",
}

In [24]:
states["ap_state"] = states["state"].map(ap_states)

In [25]:
states.head()

Unnamed: 0,state,count,under_five,under_18,ap_state
0,Texas,176,2024126,7398099,Texas
1,Florida,105,1143183,4229081,Fla.
2,Ohio,96,694789,2593325,Ohio
3,Missouri,93,372713,1376830,Mo.
4,Georgia,91,657414,2505751,Ga.


In [26]:
states["rate_per_100k_kids"] = (
    (states["count"] / states["under_18"].astype(int)) * 100000
).round(2)

In [27]:
states.to_csv("data/processed/children_killed_states_2013-2022.csv", index=False)

In [28]:
states.sort_values("rate_per_100k_kids", ascending=False).head()

Unnamed: 0,state,count,under_five,under_18,ap_state,rate_per_100k_kids
33,Alaska,14,53115,183816,Alaska,7.62
7,Louisiana,76,307019,1095916,La.,6.93
16,Mississippi,48,185477,706141,Miss.,6.8
3,Missouri,93,372713,1376830,Mo.,6.75
9,South Carolina,72,292391,1105945,S.C.,6.51


---

In [29]:
years = (
    src.groupby(["year"])
    .agg({"incident_id": "count"})
    .reset_index()
    .rename(columns={"incident_id": "count"})
    .sort_values("year", ascending=True)
)

In [30]:
years = years[(years["year"] > "2013")]

In [31]:
years["rolling_three_year"] = years["count"].rolling(3).mean().round()

In [32]:
years

Unnamed: 0,year,count,rolling_three_year
2,2014,170,
3,2015,177,
4,2016,205,184.0
5,2017,206,196.0
6,2018,179,197.0
7,2019,191,192.0
8,2020,281,217.0
9,2021,291,254.0
10,2022,116,229.0


In [33]:
years.to_csv("data/processed/children_killed_year.csv", index=False)

---

In [34]:
month_year = (
    src.groupby(["month_year"])
    .agg({"incident_id": "count"})
    .reset_index()
    .rename(columns={"incident_id": "count"})
    .sort_values("month_year", ascending=True)
)

In [35]:
month_year["rolling_12_month"] = month_year["count"].rolling(12).mean().round()

In [36]:
month_year.to_csv("data/processed/children_killed_month_year.csv", index=False)

---