In [1]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime
from sqlalchemy import create_engine
import psycopg2
import io
from config import password

Imports and Upload data

In [2]:
filepath = "Resources/GunViolenceArchiveDataJan2014toJun2022.csv"
gunviolence_df = pd.read_csv(filepath, encoding="UTF-8")
gunviolence_df.head()

Unnamed: 0,Incident ID,Incident Date,state,City Or County,Address,n_killed,n_injured,Operations
0,92540.0,1/1/2014,Florida,Orlando,831 Sky Lake Cir,1,1,
1,92168.0,1/1/2014,Ohio,Cincinnati,1600 block of Potter Pl,1,0,
2,95232.0,1/1/2014,California,Glendale,2600 block of Sleepy Hollow Dr,0,1,
3,93431.0,1/1/2014,Texas,Hebbronville,,0,1,
4,92432.0,1/1/2014,New York,Rochester,55 Lois Street,1,1,


Drop Operations Column

In [3]:
clean_df=gunviolence_df[['Incident ID', 'Incident Date', 'state', 'City Or County', 'Address', 'n_killed', 'n_injured']]
clean_df.head()

Unnamed: 0,Incident ID,Incident Date,state,City Or County,Address,n_killed,n_injured
0,92540.0,1/1/2014,Florida,Orlando,831 Sky Lake Cir,1,1
1,92168.0,1/1/2014,Ohio,Cincinnati,1600 block of Potter Pl,1,0
2,95232.0,1/1/2014,California,Glendale,2600 block of Sleepy Hollow Dr,0,1
3,93431.0,1/1/2014,Texas,Hebbronville,,0,1
4,92432.0,1/1/2014,New York,Rochester,55 Lois Street,1,1


Adjust headers to remove empty spaces

In [4]:
clean_df= clean_df.rename(columns={'Incident ID':'Incident_ID', 'Incident Date':'Incident_Date', 'state':'State', 'City Or County':'City_or_County'})


Change column to Datetime

In [5]:
clean_df['Incident_Date']= pd.to_datetime(clean_df['Incident_Date'])


Filter Dataset to just the incidents in 2022

In [6]:
clean_df_filter=clean_df[(clean_df['Incident_Date']> '12/31/2021')]
clean_df_filter.head()

Unnamed: 0,Incident_ID,Incident_Date,State,City_or_County,Address,n_killed,n_injured
427510,2202397.0,2022-01-01,Indiana,Indianapolis,3800 block of N Temple Ave,0,1
427511,2202390.0,2022-01-01,Michigan,Detroit,Joy Rd and Prest St,0,1
427512,2205160.0,2022-01-01,North Carolina,Summerfield,Spotswood Rd and Ashview Ct,0,1
427513,2211626.0,2022-01-01,Kansas,Lawrence,1837 N 1500 Rd,0,1
427514,2206646.0,2022-01-01,Oklahoma,Oklahoma City,S McKinley Ave and SW 27th St,1,1


Check to make sure filtering dates worked correctly

In [7]:
clean_df_filter.count()

Incident_ID       20892
Incident_Date     20892
State             20892
City_or_County    20892
Address           20387
n_killed          20892
n_injured         20892
dtype: int64

Check Datatypes

In [8]:
clean_df_filter.dtypes

Incident_ID              float64
Incident_Date     datetime64[ns]
State                     object
City_or_County            object
Address                   object
n_killed                   int64
n_injured                  int64
dtype: object

In [9]:
import numpy as np
import matplotlib

Queries


In [10]:
clean_2022=clean_df_filter

Top 20 most deadly gun violence incidents

In [11]:
Top20_deaths_per_incident=clean_2022.sort_values(by=['n_killed'], ascending=False)
Top20_deaths_per_incident.head(20)

Unnamed: 0,Incident_ID,Incident_Date,State,City_or_County,Address,n_killed,n_injured
445409,2310278.0,2022-05-24,Texas,Uvalde,715 Old Carrizo Rd,22,17
443911,2301751.0,2022-05-14,New York,Buffalo,1275 Jefferson Ave,10,3
431715,2227058.0,2022-02-05,Texas,Corsicana,2919 W 2nd Ave,6,2
438630,2269082.0,2022-04-03,California,Sacramento,J St and 10th St,6,12
446668,2319567.0,2022-06-02,Texas,Centerville,1696 TX-7 W,6,0
430249,2218159.0,2022-01-23,Wisconsin,Milwaukee,2505 N 21st St,6,0
440770,2283535.0,2022-04-20,Minnesota,Duluth,715 E 12th St,5,0
434417,2243861.0,2022-02-28,California,Sacramento,2041 Wyda Way,5,0
446467,2318310.0,2022-06-01,Oklahoma,Tulsa,6161 S Yale Ave,5,0
430207,2217882.0,2022-01-23,California,Inglewood,1300 block of North Park Ave,4,1


Total number of incidents per state

In [12]:
clean_df_filter['State'].value_counts()

Texas                   1774
Illinois                1511
California              1469
Pennsylvania            1326
Florida                 1013
New York                 921
Ohio                     902
Georgia                  855
Louisiana                818
North Carolina           683
Maryland                 674
Michigan                 651
Virginia                 636
Tennessee                592
Alabama                  500
Wisconsin                496
Missouri                 492
South Carolina           487
Indiana                  447
Kentucky                 362
Colorado                 351
Washington               351
Arizona                  322
Minnesota                320
Mississippi              315
District of Columbia     305
New Jersey               267
Oklahoma                 218
Arkansas                 213
Connecticut              194
Oregon                   178
Nevada                   152
New Mexico               152
Massachusetts            137
Kansas        

Incident count by City

In [13]:
clean_df_filter['City_or_County'].value_counts()

Chicago               1156
Philadelphia           892
Houston                523
Milwaukee              397
Baltimore              385
                      ... 
Springtown               1
Winthrop                 1
Mchenry (Lakemoor)       1
West Springfield         1
Alexander                1
Name: City_or_County, Length: 3042, dtype: int64

Total number of deaths from gun violence by state, and number of people injured by state

In [19]:
incidents_by_state_in_2022= clean_2022.groupby('State').sum()
incidents_by_state_in_2022

Unnamed: 0_level_0,Incident_ID,n_killed,n_injured
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama,1131296000.0,277,368
Alaska,70124320.0,23,15
Arizona,730027300.0,217,218
Arkansas,483444400.0,119,174
California,3326674000.0,761,1103
Colorado,795429800.0,139,292
Connecticut,439298000.0,58,172
Delaware,184111700.0,19,75
District of Columbia,690649500.0,86,277
Florida,2294727000.0,504,747


In [20]:
most_killed=incidents_by_state_in_2022.sort_values(by=['n_killed'], ascending=False)
most_killed.head(10)

Unnamed: 0_level_0,Incident_ID,n_killed,n_injured
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Texas,4016499000.0,944,1351
California,3326674000.0,761,1103
Florida,2294727000.0,504,747
Pennsylvania,3007308000.0,436,1188
Georgia,1938783000.0,412,655
Illinois,3429173000.0,392,1440
Louisiana,1851408000.0,364,713
Ohio,2047362000.0,347,719
North Carolina,1548361000.0,314,532
Alabama,1131296000.0,277,368


The 10 lowest number of people killed in gun violence by state

In [21]:
least_killed= incidents_by_state_in_2022.sort_values(by=['n_killed'], ascending=True)
least_killed.head(10)

Unnamed: 0_level_0,Incident_ID,n_killed,n_injured
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Wyoming,22772589.0,7,4
Vermont,45477328.0,7,18
North Dakota,24983635.0,9,7
Maine,56918666.0,9,22
Rhode Island,58541465.0,10,23
New Hampshire,49953664.0,10,15
South Dakota,47619190.0,13,10
Hawaii,42966299.0,13,15
Delaware,184111663.0,19,75
Nebraska,165735313.0,22,73


In [23]:
cleaned_df=clean_df_filter.dropna(how='all')
cleaned_df.head()

Unnamed: 0,Incident_ID,Incident_Date,State,City_or_County,Address,n_killed,n_injured
427510,2202397.0,2022-01-01,Indiana,Indianapolis,3800 block of N Temple Ave,0,1
427511,2202390.0,2022-01-01,Michigan,Detroit,Joy Rd and Prest St,0,1
427512,2205160.0,2022-01-01,North Carolina,Summerfield,Spotswood Rd and Ashview Ct,0,1
427513,2211626.0,2022-01-01,Kansas,Lawrence,1837 N 1500 Rd,0,1
427514,2206646.0,2022-01-01,Oklahoma,Oklahoma City,S McKinley Ave and SW 27th St,1,1


Create connection

In [27]:
connection_string = "postgres:" + password + "@localhost:5432/gun-violence_db"
engine = create_engine(f'postgresql://{connection_string}')

In [28]:
engine.table_names()

  engine.table_names()


['gunviolence_db']

In [29]:
clean_df_filter.to_sql(name='gunviolence_db', con=engine, if_exists='append', index=False)


ProgrammingError: (psycopg2.errors.UndefinedColumn) column "n_killed" of relation "gunviolence_db" does not exist
LINE 1: ...dent_Date", "State", "City_or_County", "Address", n_killed, ...
                                                             ^

[SQL: INSERT INTO gunviolence_db ("Incident_ID", "Incident_Date", "State", "City_or_County", "Address", n_killed, n_injured) VALUES (%(Incident_ID)s, %(Incident_Date)s, %(State)s, %(City_or_County)s, %(Address)s, %(n_killed)s, %(n_injured)s)]
[parameters: ({'Incident_ID': 2202397.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Indiana', 'City_or_County': 'Indianapolis', 'Address': '3800 block of N Temple Ave', 'n_killed': 0, 'n_injured': 1}, {'Incident_ID': 2202390.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Michigan', 'City_or_County': 'Detroit', 'Address': 'Joy Rd and Prest St', 'n_killed': 0, 'n_injured': 1}, {'Incident_ID': 2205160.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'North Carolina', 'City_or_County': 'Summerfield', 'Address': 'Spotswood Rd and Ashview Ct', 'n_killed': 0, 'n_injured': 1}, {'Incident_ID': 2211626.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Kansas', 'City_or_County': 'Lawrence', 'Address': '1837 N 1500 Rd', 'n_killed': 0, 'n_injured': 1}, {'Incident_ID': 2206646.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Oklahoma', 'City_or_County': 'Oklahoma City', 'Address': 'S McKinley Ave and SW 27th St', 'n_killed': 1, 'n_injured': 1}, {'Incident_ID': 2202384.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Illinois', 'City_or_County': 'Chicago', 'Address': '3100 block of N Central Ave', 'n_killed': 0, 'n_injured': 1}, {'Incident_ID': 2202206.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Virginia', 'City_or_County': 'Portsmouth', 'Address': '4020 Victory Blvd', 'n_killed': 0, 'n_injured': 1}, {'Incident_ID': 2202519.0, 'Incident_Date': datetime.datetime(2022, 1, 1, 0, 0), 'State': 'Indiana', 'City_or_County': 'Indianapolis', 'Address': '2200 block of W Walnut St', 'n_killed': 1, 'n_injured': 0}  ... displaying 10 of 20892 total bound parameter sets ...  {'Incident_ID': 2332069.0, 'Incident_Date': datetime.datetime(2022, 6, 16, 0, 0), 'State': 'Kentucky', 'City_or_County': 'Hopkinsville', 'Address': 'Glass Ave and Evergreen Park Dr', 'n_killed': 0, 'n_injured': 0}, {'Incident_ID': 2332033.0, 'Incident_Date': datetime.datetime(2022, 6, 16, 0, 0), 'State': 'Nebraska', 'City_or_County': 'Lincoln', 'Address': 'F St and S 8th St', 'n_killed': 0, 'n_injured': 1})]
(Background on this error at: https://sqlalche.me/e/14/f405)