# CDC WONDER: Gun deaths

#### Import Python tools

In [1]:
%load_ext lab_black

In [256]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np
import us

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

## Read data

#### Years

In [6]:
years_df = pd.read_csv(
    "data/raw/gun_deaths_usa_wonder_2018-2021.txt",
    sep="\t",
    skipfooter=39,
    engine="python",
)[["Year Code", "Deaths", "Population", "Crude Rate"]]

In [7]:
years_df.columns = (
    years_df.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("crude_", "")
    .str.replace("_code", "")
)

In [8]:
years_df

Unnamed: 0,year,deaths,population,rate
0,2018,39740,327167434,12.1
1,2019,39707,328239523,12.1
2,2020,45222,329484123,13.7
3,2021,48832,329484123,14.8


#### Make the dataframe wide and export

In [18]:
years_df.pivot_table(columns="year", values="rate").to_csv(
    "data/processed/gun_deaths_years_dw_wide.csv", index=False
)

In [19]:
years_df[["year", "rate"]].to_csv("data/processed/gun_deaths_years_dw.csv", index=False)

---

In [10]:
intent_df = pd.read_csv(
    "data/raw/gun_deaths_usa_intent_wonder_2018-2021.txt",
    sep="\t",
    skipfooter=41,
    engine="python",
)[["Year Code", "UCD - Injury Intent", "Deaths", "Population", "Crude Rate"]]

In [11]:
intent_df.columns = (
    intent_df.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("crude_", "")
    .str.replace("_code", "")
    .str.replace("ucd_-_injury_intent", "intent")
)

In [12]:
intent_df["nu_intent"] = intent_df["intent"]

In [13]:
intent_df.loc[
    intent_df["intent"].isin(
        ["Unintentional", "Undetermined ", "Legal Intervention / Operations of War"]
    ),
    "nu_intent",
] = "Other"

In [14]:
intent_grouped = intent_df.groupby(["year", "nu_intent"])["deaths"].sum().reset_index()

In [15]:
intent_grouped.pivot_table(
    columns="nu_intent", values="deaths", index="year"
).reset_index()[["year", "Suicide", "Homicide", "Other"]].to_csv(
    "data/processed/gun_deaths_intent_dw.csv", index=False
)

In [16]:
intent_grouped.pivot_table(
    columns="nu_intent", values="deaths", index="year"
).reset_index()[["year", "Suicide", "Homicide", "Other"]]

nu_intent,year,Suicide,Homicide,Other
0,2018,24432,13958,1350
1,2019,23941,14414,1352
2,2020,24292,19384,1546
3,2021,26322,20966,1544


---

#### Race

In [172]:
race_df = pd.read_csv(
    "data/raw/gun_deaths_race_wonder_2018-2021.txt",
    sep="\t",
    engine="python",
)[
    [
        "Year Code",
        "Single Race 6",
        "Deaths",
        "Population",
        "Crude Rate",
    ]
]

In [173]:
race_df.head()

Unnamed: 0,Year Code,Single Race 6,Deaths,Population,Crude Rate
0,2021.0,American Indian or Alaska Native,504.0,4292990.0,11.7
1,2021.0,Asian,588.0,20012278.0,2.9
2,2021.0,Black or African American,15262.0,44531112.0,34.3
3,2021.0,Native Hawaiian or Other Pacific Islander,79.0,843693.0,9.4
4,2021.0,White,30750.0,250309724.0,12.3


In [174]:
race_df.columns = (
    race_df.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("crude_", "")
    .str.replace("_code", "")
    .str.replace("single_race_6", "race")
)

In [185]:
race_df_pivot = (
    race_df.pivot_table(index="race", values=["deaths", "population", "rate"])
    .reset_index()
    .round(2)
)

In [186]:
race_df_pivot["Population %"] = (
    (race_df_pivot["population"] / race_df_pivot["population"].sum()) * 100
).round(2)

In [187]:
race_df_pivot["Deaths %"] = (
    (race_df_pivot["deaths"] / race_df_pivot["deaths"].sum()) * 100
).round(2)

In [188]:
race_df_pivot.sort_values("Population %", ascending=False).head(3)

Unnamed: 0,race,deaths,population,rate,Population %,Deaths %
5,White,30750.0,250309724.0,12.3,75.97,64.3
2,Black or African American,15262.0,44531112.0,34.3,13.52,31.91
1,Asian,588.0,20012278.0,2.9,6.07,1.23


In [189]:
race_df_pivot["race"] = race_df_pivot["race"].str.replace(
    "Black or African American", "African American"
)

In [196]:
race_df_pivot

Unnamed: 0,race,deaths,population,rate,Population %,Deaths %
0,American Indian or Alaska Native,504.0,4292990.0,11.7,1.3,1.05
1,Asian,588.0,20012278.0,2.9,6.07,1.23
2,African American,15262.0,44531112.0,34.3,13.52,31.91
3,More than one race,640.0,9494326.0,6.7,2.88,1.34
4,Native Hawaiian or Other Pacific Islander,79.0,843693.0,9.4,0.26,0.17
5,White,30750.0,250309724.0,12.3,75.97,64.3


In [195]:
race_df_pivot.sort_values("Population %", ascending=False).head(4).to_csv(
    "data/processed/cdc_gun_deaths_rate_race_2021.csv", index=False
)

---

#### Long-term trend

In [213]:
trend_df = pd.read_csv("data/raw/wisqars_gun_deaths-1981-2021.csv", dtype={"year": str})

In [214]:
suicides = trend_df[(trend_df["type"] == "Suicides")]

In [215]:
suicides[suicides["rate"] == suicides["rate"].max()]

Unnamed: 0,year,deaths,population,rate,type
162,2021,26322,329484123,8.0,Suicides


In [216]:
homicides = trend_df[(trend_df["type"] == "Homicides")]

In [217]:
homicides[homicides["rate"] == homicides["rate"].max()]

Unnamed: 0,year,deaths,population,rate,type
50,1993,18253,259918595,7.02,Homicides


In [218]:
all_violence = trend_df[(trend_df["type"] == "All violence")]
all_violence[all_violence["rate"] == all_violence["rate"].max()]

Unnamed: 0,year,deaths,population,rate,type
160,2021,47823,329484123,14.5,All violence


In [219]:
trend_df_pivot = trend_df.pivot_table(
    index="year", columns="type", values="rate"
).reset_index()

In [220]:
trend_df_pivot.head()

type,year,All intents,All violence,Homicides,Suicides
0,1981,14.84,13.73,6.58,7.03
1,1982,14.23,13.24,5.97,7.15
2,1983,13.3,12.36,5.15,7.1
3,1984,13.29,12.37,5.01,7.26
4,1985,13.27,12.37,4.97,7.3


In [221]:
trend_df.pivot_table(index="year", columns="type", values="rate").reset_index().to_csv(
    "data/processed/cdc_suicide_homicide_rate_1981-2021.csv", index=False
)

---

In [223]:
trend_df_all_pivot = trend_df.pivot_table(
    index="year", columns="type", values="deaths"
).reset_index()

In [225]:
trend_df_all_pivot["Other"] = (
    trend_df_all_pivot["All intents"] - trend_df_all_pivot["All violence"]
)

In [233]:
trend_df_all_pivot[
    ["year", "All intents", "All violence", "Suicides", "Homicides", "Other"]
].tail(10).to_csv(
    "data/processed/cdc_suicide_homicide_rate_1981-2021_types.csv", index=False
)

In [315]:
trend_df_all_pivot[
    ["year", "All intents", "All violence", "Suicides", "Homicides", "Other"]
].tail()

type,year,All intents,All violence,Suicides,Homicides,Other
36,2017,39773,38949,23854,14542,824
37,2018,39740,38929,24432,13958,811
38,2019,39707,38850,23941,14414,857
39,2020,45222,44286,24292,19384,936
40,2021,48832,47823,26322,20966,1009


---

#### States

In [282]:
states_src = pd.read_csv(
    "data/raw/wonder_gun_deaths_states_2021.txt",
    sep="\t",
    dtype={"Occurrence State Code": str, "Year Code": str},
)[
    [
        "Year Code",
        "Occurrence State",
        "Occurrence State Code",
        "Deaths",
    ]
].dropna()

In [283]:
states_src.rename(
    columns={
        "Year Code": "year",
        "Occurrence State": "state",
        "Occurrence State Code": "fips",
        "Deaths": "deaths",
    },
    inplace=True,
)

#### AP names

In [259]:
fips_to_ap = us.states.mapping("fips", "ap_abbr")

In [284]:
states_src["apname"] = states_src["fips"].map(fips_to_ap)

In [285]:
states_src.head()

Unnamed: 0,year,state,fips,deaths,apname
0,2021,Alabama,1,1313.0,Ala.
1,2021,Alaska,2,181.0,Alaska
2,2021,Arizona,4,1365.0,Ariz.
3,2021,Arkansas,5,698.0,Ark.
4,2021,California,6,3580.0,Calif.


#### Population

In [286]:
states_pop = pd.read_csv(
    "../usa/data/processed/states_population.csv", names=["state", "pop"], header=0
)

#### Merge

In [310]:
states_df = pd.merge(states_src, states_pop, on="state")

In [311]:
states_df["rate"] = ((states_df["deaths"] / states_df["pop"]) * 100000).round(2)

In [312]:
states_df = states_df[states_df["state"] != "District of Columbia"].sort_values(
    "rate", ascending=False
)

In [313]:
states_df["rank"] = states_df["rate"].rank(method="max")

In [314]:
states_df

Unnamed: 0,year,state,fips,deaths,apname,pop,rate,rank
24,2021,Mississippi,28,916.0,Miss.,2981835,30.72,50.0
18,2021,Louisiana,22,1311.0,La.,4664616,28.11,49.0
31,2021,New Mexico,35,586.0,N.M.,2097021,27.94,48.0
0,2021,Alabama,1,1313.0,Ala.,4893186,26.83,47.0
26,2021,Montana,30,279.0,Mont.,1061705,26.28,46.0
50,2021,Wyoming,56,150.0,Wyo.,581348,25.8,45.0
1,2021,Alaska,2,181.0,Alaska,736990,24.56,44.0
42,2021,Tennessee,47,1653.0,Tenn.,6772268,24.41,43.0
25,2021,Missouri,29,1446.0,Mo.,6124160,23.61,42.0
3,2021,Arkansas,5,698.0,Ark.,3011873,23.17,41.0


In [292]:
states_df.to_csv("data/processed/states_gun_deaths_wonder.csv", index=False)