In [1]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import re

### Gun Ownership & Licenses

source: https://www.cbsnews.com/pictures/gun-ownership-rates-by-state/

In [2]:
url = "https://www.cbsnews.com/pictures/gun-ownership-rates-by-state/"
response = requests.get(url)

In [3]:
soup = bs(response.text, 'html.parser')

# print(soup.prettify())

In [4]:
# Pull Section with needed data
country= soup.find_all(class_="content__body content__section")

In [5]:
# Drop the introduction section of article to just get paragraphs w state info
states=country[1:]
# states

In [6]:
# Create empty lists to hold values 
name =[]
rank=[]
count=[]
percent=[]

for x in range(2,53):
    #find each section that hold state data- easiest to find id using numbered id of html  
    state= soup.find(class_="content__body content__section", id="content__section-" + str(x))
    
    #remove headline text which is state name and ranking
    namerank=state.find('h2', class_="content__section-headline").text
    nr=namerank.split('. ')
    nr1=nr[1]
    nr0=nr[0]
    #first bolded text is number of gun licenses purchased 2021 
    lisc=state.find('strong').text
    lisc2=re.split(': ', lisc, 1)
    lisc3=lisc2[1]
    
    #second paragraph includes % of estimated guns in house in state
    pct=state.find_all('p')[1].text
    #only take percentages from this paragraph
    pct2= re.findall('\d\d.\d%|.\d%', pct)
        
    #append to lists above
    name.append(nr1)
    rank.append(nr0)
    count.append(lisc3)
    percent.append(pct2)

In [7]:
#Create new DF
gundata=pd.DataFrame({'State': name, 'gun_license_ct':count, 'gun_house_pct':percent})
#RAND was nested list with one item, so explode it out
gundata['gun_house_pct']=gundata['gun_house_pct'].explode()
gundata.head()

Unnamed: 0,State,gun_license_ct,gun_house_pct
0,Rhode Island,4887,14.8%
1,Delaware,6092,34.4%
2,Hawaii,9280,14.9%
3,Vermont,9451,50.5%
4,Maine,21396,46.8%


In [8]:
#Strip formatting & convert to numeric
gundata['gun_license_ct']=gundata['gun_license_ct'].str.replace(',', '').astype(int)
gundata['gun_house_pct']=gundata['gun_house_pct'].str.replace('%', '').astype(float)


In [9]:
#Rank with lowest license and household percent as highest points 
gundata['gun_license_ct_rank']=gundata['gun_license_ct'].rank(method='min', na_option='bottom').astype(int)
gundata['gun_house_pct_rank']=gundata['gun_house_pct'].rank(method='min', na_option='bottom').astype(int)

In [10]:
gundata.head()

Unnamed: 0,State,gun_license_ct,gun_house_pct,gun_license_ct_rank,gun_house_pct_rank
0,Rhode Island,4887,14.8,1,3
1,Delaware,6092,34.4,2,10
2,Hawaii,9280,14.9,3,4
3,Vermont,9451,50.5,4,35
4,Maine,21396,46.8,5,28


### Gun Mortality 2020

source: https://www.cdc.gov/nchs/pressroom/sosmap/firearm_mortality/firearm.htm

In [11]:
gun_mort=pd.read_csv('csv_s/gun_mortaility.csv')
gun_mort.head()

Unnamed: 0,YEAR,STATE,RATE,DEATHS,URL
0,2020,AL,23.6,1141,/nchs/pressroom/states/alabama/al.htm
1,2020,AK,23.5,175,/nchs/pressroom/states/alaska/ak.htm
2,2020,AZ,16.7,1265,/nchs/pressroom/states/arizona/az.htm
3,2020,AR,22.6,673,/nchs/pressroom/states/arkansas/ar.htm
4,2020,CA,8.5,3449,/nchs/pressroom/states/california/ca.htm


In [12]:
#narrow to 2020 drop unnecessary columns
gun_mort20=gun_mort[gun_mort['YEAR']==2020].drop(columns={'YEAR', 'URL'}).rename(columns={'RATE': 'gun_mort_rate', 'DEATHS': 'gun_deaths'})
#ranking 
gun_mort20['gun_mort_rank'] = gun_mort20['gun_mort_rate'].rank(method='min', na_option='bottom').astype(int)
gun_mort20['gun_deaths_rank'] = gun_mort20['gun_deaths'].rank(method='min', na_option='bottom').astype(int)

In [13]:
gun_mort20.head()

Unnamed: 0,STATE,gun_mort_rate,gun_deaths,gun_mort_rank,gun_deaths_rank
0,AL,23.6,1141,46,35
1,AK,23.5,175,45,10
2,AZ,16.7,1265,31,39
3,AR,22.6,673,43,25
4,CA,8.5,3449,7,49


In [14]:
#bring in state names to exchange for abbv 
states=pd.read_csv('csv_s/state_abbv.csv')
states.head()

Unnamed: 0,State,Abbv,Region
0,Alabama,AL,South
1,Alaska,AK,West
2,Arizona,AZ,West
3,Arkansas,AR,South
4,California,CA,West


In [15]:
#merge & drop redundant columns
mort=gun_mort20.merge(states, how='outer', left_on='STATE', right_on='Abbv').drop(columns=['STATE', 'Abbv'])
mort.head()

Unnamed: 0,gun_mort_rate,gun_deaths,gun_mort_rank,gun_deaths_rank,State,Region
0,23.6,1141.0,46.0,35.0,Alabama,South
1,23.5,175.0,45.0,10.0,Alaska,West
2,16.7,1265.0,31.0,39.0,Arizona,West
3,22.6,673.0,43.0,25.0,Arkansas,South
4,8.5,3449.0,7.0,49.0,California,West


### Merge Licence/Ownership with Mortality

In [16]:
gun2=gundata.merge(mort, how='outer', left_on='State', right_on='State')
gun2.head()

Unnamed: 0,State,gun_license_ct,gun_house_pct,gun_license_ct_rank,gun_house_pct_rank,gun_mort_rate,gun_deaths,gun_mort_rank,gun_deaths_rank,Region
0,Rhode Island,4887,14.8,1,3,5.1,54.0,4.0,2.0,Northeast
1,Delaware,6092,34.4,2,10,14.4,135.0,26.0,7.0,Northeast
2,Hawaii,9280,14.9,3,4,3.4,50.0,1.0,1.0,West
3,Vermont,9451,50.5,4,35,11.6,76.0,14.0,3.0,Northeast
4,Maine,21396,46.8,5,28,10.4,153.0,11.0,8.0,Northeast


### Mass Shootings 2020

source: https://www.gunviolencearchive.org/reports/number-of-gun-deaths?year=2021

In [17]:
mass=pd.read_csv('csv_s/gva_export.csv', usecols=['Incident Date', 'State', '# Killed', '# Injured'])
mass.head()

Unnamed: 0,Incident Date,State,# Killed,# Injured
0,12-Jun-22,California,3,4
1,12-Jun-22,Michigan,0,4
2,12-Jun-22,Indiana,2,4
3,12-Jun-22,Indiana,0,5
4,12-Jun-22,Texas,1,3


In [18]:
#break out the d/m/y to filter 
mass[['Day', 'Month', 'Year']]= mass['Incident Date'].str.split('-', expand=True)
#set types to do computaions
mass['mass_injured']=mass['# Injured'].astype(int)
mass['mass_killed']=mass['# Killed'].astype(int)
mass['Year']=mass['Year'].astype(int)
mass.head()

Unnamed: 0,Incident Date,State,# Killed,# Injured,Day,Month,Year,mass_injured,mass_killed
0,12-Jun-22,California,3,4,12,Jun,22,4,3
1,12-Jun-22,Michigan,0,4,12,Jun,22,4,0
2,12-Jun-22,Indiana,2,4,12,Jun,22,4,2
3,12-Jun-22,Indiana,0,5,12,Jun,22,5,0
4,12-Jun-22,Texas,1,3,12,Jun,22,3,1


In [19]:
# filter just to 2020
mass=mass[(mass.Year == 20)]
#drop unneeded columns
mass=mass.drop(columns=['Incident Date', 'Day', 'Month', 'Year', '# Injured', '# Killed'])
mass.head()

Unnamed: 0,State,mass_injured,mass_killed
959,Indiana,3,1
960,Oregon,4,0
961,Illinois,3,3
962,Florida,2,2
963,Massachusetts,5,1


In [20]:
#get a count of incidents by state
mass2=mass.groupby('State').size()
# create df from list created with groupby which will be merged back in
mass2_df=pd.DataFrame(mass2).reset_index().rename(columns={0: 'mass_incidents'})
mass2_df.head()

Unnamed: 0,State,mass_incidents
0,Alabama,11
1,Alaska,3
2,Arizona,4
3,Arkansas,13
4,California,39


In [21]:
#Groupby State and sum up the injured & killed
mass=mass.groupby('State').sum().reset_index()
mass.head()

Unnamed: 0,State,mass_injured,mass_killed
0,Alabama,37,16
1,Alaska,8,5
2,Arizona,16,4
3,Arkansas,48,10
4,California,153,37


In [22]:
#merge incidents with injured & killed
mass_df=mass.merge(mass2_df, left_on='State', right_on='State')
mass_df.head()

Unnamed: 0,State,mass_injured,mass_killed,mass_incidents
0,Alabama,37,16,11
1,Alaska,8,5,3
2,Arizona,16,4,4
3,Arkansas,48,10,13
4,California,153,37,39


### Merge License/Ownership/Mortality with Mass Incidents

In [23]:
allgun=gun2.merge(mass_df, how='outer', left_on='State', right_on='State')
allgun.head()

Unnamed: 0,State,gun_license_ct,gun_house_pct,gun_license_ct_rank,gun_house_pct_rank,gun_mort_rate,gun_deaths,gun_mort_rank,gun_deaths_rank,Region,mass_injured,mass_killed,mass_incidents
0,Rhode Island,4887,14.8,1,3,5.1,54.0,4.0,2.0,Northeast,,,
1,Delaware,6092,34.4,2,10,14.4,135.0,26.0,7.0,Northeast,11.0,3.0,3.0
2,Hawaii,9280,14.9,3,4,3.4,50.0,1.0,1.0,West,,,
3,Vermont,9451,50.5,4,35,11.6,76.0,14.0,3.0,Northeast,,,
4,Maine,21396,46.8,5,28,10.4,153.0,11.0,8.0,Northeast,1.0,3.0,1.0


### Final Rankings

In [24]:
# Mass shootings rankings done after merge to capture NA states for 2021 mass incidents
# because NA = 0 for mass shootings,fill them before ranking
allgun['mass_incidents'] = allgun['mass_incidents'].fillna(0)
allgun['mass_injured'] = allgun['mass_injured'].fillna(0)
allgun['mass_killed'] = allgun['mass_killed'].fillna(0)
#now rank
allgun['mass_incidents_rank']=allgun['mass_incidents'].rank(method='min', na_option='top').astype(int)
allgun['mass_injured_rank']=allgun['mass_injured'].rank(method='min', na_option='top').astype(int)
allgun['mass_killed_rank']=allgun['mass_killed'].rank(method='min', na_option='top').astype(int)

allgun

Unnamed: 0,State,gun_license_ct,gun_house_pct,gun_license_ct_rank,gun_house_pct_rank,gun_mort_rate,gun_deaths,gun_mort_rank,gun_deaths_rank,Region,mass_injured,mass_killed,mass_incidents,mass_incidents_rank,mass_injured_rank,mass_killed_rank
0,Rhode Island,4887,14.8,1,3,5.1,54.0,4.0,2.0,Northeast,0.0,0.0,0.0,1,1,1
1,Delaware,6092,34.4,2,10,14.4,135.0,26.0,7.0,Northeast,11.0,3.0,3.0,16,18,17
2,Hawaii,9280,14.9,3,4,3.4,50.0,1.0,1.0,West,0.0,0.0,0.0,1,1,1
3,Vermont,9451,50.5,4,35,11.6,76.0,14.0,3.0,Northeast,0.0,0.0,0.0,1,1,1
4,Maine,21396,46.8,5,28,10.4,153.0,11.0,8.0,Northeast,1.0,3.0,1.0,8,8,17
5,Alaska,28237,64.5,6,48,23.5,175.0,45.0,10.0,West,8.0,5.0,3.0,16,15,22
6,North Dakota,30975,55.1,7,41,13.8,100.0,23.0,4.0,Midwest,0.0,0.0,0.0,1,1,1
7,Montana,36678,66.3,8,50,20.9,238.0,40.0,13.0,West,0.0,0.0,0.0,1,1,1
8,Nebraska,43261,45.2,9,22,10.1,197.0,10.0,11.0,Midwest,10.0,2.0,2.0,12,17,14
9,Massachusetts,45138,14.7,10,1,3.7,268.0,2.0,14.0,Northeast,33.0,5.0,8.0,27,26,22


In [25]:
# get a total of all gun rankings and rank them, proving an overall gun ranking per state
ranks=['mass_killed_rank', 'mass_injured_rank', 'mass_incidents_rank', 'gun_license_ct_rank', 'gun_house_pct_rank', 'gun_mort_rank', 'gun_deaths_rank']
allgun['gun_score']= allgun[ranks].sum(axis=1)
allgun['total_gun_rank']=allgun['gun_score'].rank(method='min', na_option='bottom').astype(int)
allgun

Unnamed: 0,State,gun_license_ct,gun_house_pct,gun_license_ct_rank,gun_house_pct_rank,gun_mort_rate,gun_deaths,gun_mort_rank,gun_deaths_rank,Region,mass_injured,mass_killed,mass_incidents,mass_incidents_rank,mass_injured_rank,mass_killed_rank,gun_score,total_gun_rank
0,Rhode Island,4887,14.8,1,3,5.1,54.0,4.0,2.0,Northeast,0.0,0.0,0.0,1,1,1,13.0,2
1,Delaware,6092,34.4,2,10,14.4,135.0,26.0,7.0,Northeast,11.0,3.0,3.0,16,18,17,96.0,8
2,Hawaii,9280,14.9,3,4,3.4,50.0,1.0,1.0,West,0.0,0.0,0.0,1,1,1,12.0,1
3,Vermont,9451,50.5,4,35,11.6,76.0,14.0,3.0,Northeast,0.0,0.0,0.0,1,1,1,59.0,4
4,Maine,21396,46.8,5,28,10.4,153.0,11.0,8.0,Northeast,1.0,3.0,1.0,8,8,17,85.0,6
5,Alaska,28237,64.5,6,48,23.5,175.0,45.0,10.0,West,8.0,5.0,3.0,16,15,22,162.0,23
6,North Dakota,30975,55.1,7,41,13.8,100.0,23.0,4.0,Midwest,0.0,0.0,0.0,1,1,1,78.0,5
7,Montana,36678,66.3,8,50,20.9,238.0,40.0,13.0,West,0.0,0.0,0.0,1,1,1,114.0,11
8,Nebraska,43261,45.2,9,22,10.1,197.0,10.0,11.0,Midwest,10.0,2.0,2.0,12,17,14,95.0,7
9,Massachusetts,45138,14.7,10,1,3.7,268.0,2.0,14.0,Northeast,33.0,5.0,8.0,27,26,22,102.0,9


In [26]:
allgun.to_json('json_out/guns.json', orient="records")