In [1]:
!pip install psycopg2-binary



In [2]:
# Import dependencies
import pandas as pd
from sqlalchemy import create_engine

In [3]:
#Create path to mass_shooting csv file
data_file= "Resources/US Mass Shootings May 24 2022.csv"

In [4]:
#Use read csv function read in csv file
mass_shooting_df= pd.read_csv(data_file, usecols=["location", "year"])
mass_shooting_df["state"]= mass_shooting_df["location"].apply(lambda x: x.split(",")[1].strip())
mass_shooting_df.drop("location", axis=1, inplace=True)

In [5]:
#Print df
mass_shooting_df

Unnamed: 0,year,state
0,2022,Texas
1,2022,New York
2,2022,California
3,2021,Michigan
4,2021,California
...,...,...
123,1987,Florida
124,1986,Oklahoma
125,1984,California
126,1984,Texas


In [6]:
#Set mass shooting range of year from 1991-2017 to match gun laws timeframe
summary_df= mass_shooting_df.loc[((mass_shooting_df["year"] >=1991) & (mass_shooting_df["year"] <= 2017))]
summary_df = summary_df.groupby(["state","year"], as_index=False).size().rename(columns={"size": "No. of Mass Shootings"})
summary_df

Unnamed: 0,state,year,No. of Mass Shootings
0,Arizona,2011,1
1,Arkansas,1998,1
2,California,1992,1
3,California,1993,1
4,California,1997,1
...,...,...,...
76,Washington,2016,1
77,Wisconsin,2005,1
78,Wisconsin,2007,1
79,Wisconsin,2012,1


In [7]:
# Create path to csv file to import gun regulations
gun_data_file= "Resources/raw_data.csv"

In [8]:
#Use read csv to read in gun regulatioons csv
gun_df= pd.read_csv(gun_data_file, usecols=["state", "year", "lawtotal"])

In [9]:
#Rename lawtotal column to No.of Gun Regulations
gun_df= gun_df.rename(columns= {"lawtotal": "No. of Gun Regulations"})
gun_df

Unnamed: 0,state,year,No. of Gun Regulations
0,Alabama,1991,15
1,Alaska,1991,10
2,Arizona,1991,12
3,Arkansas,1991,15
4,California,1991,58
...,...,...,...
1345,Virginia,2017,13
1346,Washington,2017,43
1347,West Virginia,2017,21
1348,Wisconsin,2017,23


In [10]:
# Merge the two dataframes together on columns state and year
merged_df = pd.merge(summary_df,gun_df, how="inner", on = ["state", "year"])
merged_df

Unnamed: 0,state,year,No. of Mass Shootings,No. of Gun Regulations
0,Arizona,2011,1,11
1,Arkansas,1998,1,11
2,California,1992,1,57
3,California,1993,1,57
4,California,1997,1,70
...,...,...,...,...
74,Washington,2016,1,43
75,Wisconsin,2005,1,24
76,Wisconsin,2007,1,24
77,Wisconsin,2012,1,24


In [11]:
pd.set_option("display.max_rows", 500)
merged_df

Unnamed: 0,state,year,No. of Mass Shootings,No. of Gun Regulations
0,Arizona,2011,1,11
1,Arkansas,1998,1,11
2,California,1992,1,57
3,California,1993,1,57
4,California,1997,1,70
5,California,2006,1,93
6,California,2011,1,94
7,California,2012,1,95
8,California,2013,1,99
9,California,2014,2,100


In [12]:
merged_df.set_index(["state", "year"])

Unnamed: 0_level_0,Unnamed: 1_level_0,No. of Mass Shootings,No. of Gun Regulations
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arizona,2011,1,11
Arkansas,1998,1,11
California,1992,1,57
California,1993,1,57
California,1997,1,70
California,2006,1,93
California,2011,1,94
California,2012,1,95
California,2013,1,99
California,2014,2,100


In [17]:
#Connection to local database
protocol = 'postgresql'
username = 'postgres'
password = 'admin'
host = 'localhost'
port = 5432
database_name = 'mass_shooting_db'

#Creating connection string
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [18]:
engine.table_names()

  engine.table_names()


['mass_shooting', 'regulations']

In [19]:
# Use Pandas to load mass shooting csv to df
summary_df.to_sql(name="mass_shooting", con= engine, if_exists="replace", index=False)

In [20]:
# Use Pandas to load gun regulation csv to df
gun_df.to_sql(name="regulations", con= engine, if_exists="replace", index=False)

In [23]:
#Confirm the data has been added in both tables
pd.read_sql_query("select * from mass_shooting", con=engine)

Unnamed: 0,state,year,No. of Mass Shootings
0,Arizona,2011,1
1,Arkansas,1998,1
2,California,1992,1
3,California,1993,1
4,California,1997,1
5,California,2006,1
6,California,2011,1
7,California,2012,1
8,California,2013,1
9,California,2014,2


In [24]:
#Confirm the data has been added in both tables
pd.read_sql_query("select * from regulations", con=engine)

Unnamed: 0,state,year,No. of Gun Regulations
0,Alabama,1991,15
1,Alaska,1991,10
2,Arizona,1991,12
3,Arkansas,1991,15
4,California,1991,58
...,...,...,...
1345,Virginia,2017,13
1346,Washington,2017,43
1347,West Virginia,2017,21
1348,Wisconsin,2017,23
