In [29]:
# Import all the modules we'll be using for this tutorial

import numpy as np
import matplotlib.pylab as plt

# For manipulating and plotting the data
import pandas as pd

# For plotting the data a little prettier
import seaborn as sns


In [30]:
filename = 'gun_violence_data.csv'

# Bellis
#filename = 'gun_violence_school_shootings.csv'

df = pd.read_csv(filename)

df

Unnamed: 0,Incident ID,Incident Date,State,City Or County,Address,Business/Location Name,Victims Killed,Victims Injured,Suspects Killed,Suspects Injured,Suspects Arrested,Operations
0,2713018,"September 22, 2023",North Carolina,Rocky Mount,991 S Winstead Ave,Winstead Avenue Elementary,0,0,0,0,0,
1,2712953,"September 22, 2023",Tennessee,Nashville,3150 McGavock Pike,McGavock High School,0,0,0,0,1,
2,2712398,"September 21, 2023",Alabama,Anniston,4401 Saks Rd,Saks High School,0,0,0,0,1,
3,2712502,"September 21, 2023",Florida,Milton,5445 King Arthurs Way,Avalon Middle School,0,0,0,0,2,
4,2711513,"September 20, 2023",Maryland,Hyattsville (Landover),6501 Columbia Park Rd,Fairmont Heights High School,0,0,0,0,1,
...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2180889,"December 2, 2021",Wisconsin,Madison,702 Pflaum Rd,La Follette High School,0,0,0,0,1,
1996,2180722,"December 2, 2021",Texas,Katy,23440 Cinco Ranch Blvd,Cinco Ranch HS,0,0,0,0,0,
1997,2182816,"December 2, 2021",North Carolina,Charlotte,1100 Eastway Dr,Garinger High School,0,0,0,0,0,
1998,2182146,"December 2, 2021",Texas,Houston,4400 Aldine Mail Rte Rd,MacArthur Senior High School,0,0,0,0,1,


In [31]:
incidents_by_state = df.groupby('State')['Incident ID'].count()
injuries_by_state = df.groupby('State')['Victims Injured'].sum()
fatalities_by_state = df.groupby('State')['Victims Killed'].sum()
print("Number of incidents in each state:")
print(incidents_by_state)

print("Number of injuries in each state:")
print(injuries_by_state)

print("Number of fatalities in each state:")
print(fatalities_by_state)

Number of incidents in each state:
State
Alabama                  58
Alaska                    5
Arizona                  39
Arkansas                 28
California               94
Colorado                 28
Connecticut               9
Delaware                  9
District of Columbia      9
Florida                 154
Georgia                 102
Hawaii                    1
Idaho                     2
Illinois                 58
Indiana                  66
Iowa                     13
Kansas                   17
Kentucky                 57
Louisiana                41
Maine                     4
Maryland                 65
Massachusetts            18
Michigan                 49
Minnesota                33
Mississippi              26
Missouri                 32
Montana                   2
Nebraska                 13
Nevada                   33
New Hampshire             2
New Jersey               10
New Mexico               23
New York                 43
North Carolina          132
North D

In [32]:
incidents_by_state.sort_values(ascending=False)

State
Texas                   175
Florida                 154
North Carolina          132
Ohio                    102
Georgia                 102
South Carolina           96
California               94
Virginia                 91
Tennessee                86
Indiana                  66
Maryland                 65
Illinois                 58
Alabama                  58
Kentucky                 57
Pennsylvania             55
Michigan                 49
New York                 43
Louisiana                41
Arizona                  39
Wisconsin                33
Minnesota                33
Nevada                   33
Missouri                 32
Colorado                 28
Arkansas                 28
Mississippi              26
Oklahoma                 24
New Mexico               23
Washington               23
Massachusetts            18
Utah                     17
Kansas                   17
Nebraska                 13
Iowa                     13
New Jersey               10
District of Co

# Population file

In [33]:
filename = 'population_dataset.csv'
dfpop = pd.read_csv(filename)

dfpop.sample(3)

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,ESTIMATESBASE2020,POPESTIMATE2020,POPESTIMATE2021,POPESTIMATE2022,POPESTIMATE2023,...,RNATURALCHG2023,RINTERNATIONALMIG2021,RINTERNATIONALMIG2022,RINTERNATIONALMIG2023,RDOMESTICMIG2021,RDOMESTICMIG2022,RDOMESTICMIG2023,RNETMIG2021,RNETMIG2022,RNETMIG2023
53,40,1,1,44,Rhode Island,1097371,1096444,1097092,1093842,1095962,...,-0.390903,0.977417,2.430927,5.276271,0.332796,-4.4319,-2.944556,1.310213,-2.000973,2.331716
36,40,2,3,26,Michigan,10077674,10070627,10038117,10033281,10037261,...,-0.401584,0.700591,1.869925,2.273681,-2.971941,-0.855745,-1.49981,-2.27135,1.014179,0.77387
58,40,4,8,49,Utah,3271614,3283982,3339284,3381236,3417734,...,7.294046,0.620238,1.625469,3.025458,8.957514,4.107718,0.393589,9.577752,5.733187,3.419047


In [34]:
grouped = dfpop.groupby('REGION')

grouped.groups.keys()

grouped.get_group('1')['NAME']

1     Northeast Region
2          New England
3      Middle Atlantic
20         Connecticut
33               Maine
35       Massachusetts
43       New Hampshire
44          New Jersey
46            New York
52        Pennsylvania
53        Rhode Island
59             Vermont
Name: NAME, dtype: object

*Italics!*

In [35]:
mask = dfpop['STATE'] > 0

sns.relplot(data=dfpop[mask], x='NAME', y='POPESTIMATE2023', height=4, aspect=2)

plt.xticks(rotation=90);


In [36]:
state = 'Alabama'
x = incidents_by_state[state]
y = dfpop[dfpop['NAME']==state]['POPESTIMATE2023'].values[0]
print(x)
print(y)

58
5108468


In [50]:
total_victims = df["Victims Killed"].sum() 
total_victims

98

In [53]:
population_sum = dfpop["POPESTIMATE2021"].sum()
population_sum

1331458601

In [55]:
dfpop['STATE'] = dfpop['STATE'].astype(str)

merged_df = pd.merge(df, dfpop, left_on="State", right_on="STATE")

percentage_involved = (total_victims / population_sum) * 100
percentage_involved

7.360349013209761e-06

# Gun ownership laws

In [37]:
filename = 'TL-354-State-Level Estimates of Household Firearm Ownership.xlsx'

dfgo = pd.read_excel(filename, sheet_name='State-Level Data & Factor Score')

dfgo

Unnamed: 0,FIP,Year,STATE,HFR,2,universl,permit,Fem_FS_S,Male_FS_S,BRFSS,GALLUP,GSS,PEW,HuntLic,GunsAmmo,BackChk,PewQChng,BS1,BS2,BS3
0,1,1980,Alabama,0.608,0.031,0,0,0.824324,0.833795,-9.0,0.55395,0.583632,-9.000000,0.291102,-0.509164,-9.000000,0,0.0,0.0,0.0
1,1,1981,Alabama,0.597,0.047,0,0,0.692308,0.831126,-9.0,-9.00000,-9.000000,-9.000000,0.294962,-0.618954,-9.000000,0,1.0,0.0,0.0
2,1,1982,Alabama,0.661,0.036,0,0,0.771739,0.821429,-9.0,-9.00000,0.655196,-9.000000,0.290545,-0.526692,-9.000000,0,2.0,0.0,0.0
3,1,1983,Alabama,0.586,0.038,0,0,0.688172,0.819277,-9.0,0.61144,-9.000000,-9.000000,0.284983,-0.713227,-9.000000,0,3.0,0.0,0.0
4,1,1984,Alabama,0.624,0.036,0,0,0.710000,0.775956,-9.0,-9.00000,0.626933,-9.000000,0.281622,-0.733305,-9.000000,0,4.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1845,56,2012,Wyoming,0.597,0.029,0,0,0.375000,0.647482,-9.0,0.66206,-9.000000,0.622224,0.545627,2.973924,1.562481,0,12.0,12.0,8.0
1846,56,2013,Wyoming,0.613,0.032,0,0,0.529412,0.714286,-9.0,-9.00000,-9.000000,0.685266,0.542697,3.066252,1.638737,1,12.0,12.0,9.0
1847,56,2014,Wyoming,0.608,0.044,0,0,0.583333,0.666667,-9.0,-9.00000,-9.000000,-9.000000,0.538761,3.092854,1.603822,1,12.0,12.0,10.0
1848,56,2015,Wyoming,0.549,0.032,0,0,0.393939,0.661290,-9.0,-9.00000,-9.000000,0.611531,0.544356,2.910414,1.348323,1,12.0,12.0,11.0


In [38]:
dfgo['Year'].unique()

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015, 2016])