In [14]:
# Dependencies
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
from psycopg2 import sql

### Extract CSVs into DataFrames

In [15]:
Hatecrime_file = "Hatecrime.csv"
Hatecrime_df = pd.read_csv(Hatecrime_file)
Hatecrime_df.head()

Unnamed: 0,S/N,Precinct,Number,Gender,Race,Age,Year,Quarter
0,0,6,1,Male,Hispanic,48,2017,1
1,1,7,2,Male,White,29,2017,1
2,2,7,3,Male,White,29,2017,1
3,3,14,4,Male,Hispanic,64,2017,1
4,4,14,5,Male,Hispanic,64,2017,1


In [16]:
Precincts_file = "precincts_zips.csv"
Precincts_df = pd.read_csv(Precincts_file)
Precincts_df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,precinct_number,Phone,st_address,city,state,full_address,lat,lng,zipcode
0,0,0,1,212-334-0611,16 Ericsson Place,New York,NY,"16 Ericsson Place, New York, NY",40.72027,-74.007198,10013
1,1,1,5,212-334-0711,19 Elizabeth Street,New York,NY,"19 Elizabeth Street, New York, NY",40.716194,-73.99747,10013
2,2,2,6,212-741-4811,233 West 10 Street,New York,NY,"233 West 10 Street, New York, NY",40.734233,-74.005453,10014
3,3,3,7,212-477-7311,19 1/2 Pitt Street,New York,NY,"19 1/2 Pitt Street, New York, NY",40.716362,-73.983934,10002
4,4,4,9,212-477-7811,321 East 5 Street,New York,NY,"321 East 5 Street, New York, NY",40.644583,-73.9755,10003


In [17]:
Precincts_df.columns

Index(['Unnamed: 0', 'Unnamed: 0.1', 'precinct_number', 'Phone', 'st_address',
       'city', 'state', 'full_address', 'lat', 'lng', 'zipcode'],
      dtype='object')

In [18]:
Felony_file = "felony_offenses.csv"
Felony_df = pd.read_csv(Felony_file)
Felony_df

Unnamed: 0,OFFENSE,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,MURDER & NON-NEGL. MANSLAUGHTER,673,649,587,597,570,539,596,496,523,471,536,515,419,335,333,352,335,292,295
1,RAPE,2068,1981,2144,2070,1905,1858,1525,1351,1299,1205,1373,1420,1445,1378,1352,1438,1438,1449,1794
2,ROBBERY,32562,28202,27229,25989,24373,24722,23739,21809,22401,18601,19486,19717,20144,19128,16539,16931,15500,13956,12913
3,FELONY ASSAULT,25924,23453,21147,19139,18622,17750,17309,17493,16284,16773,16956,18482,19381,20297,20207,20270,20847,20052,20208
4,BURGLARY,38352,32763,31275,29110,26976,24117,23143,21762,20725,19430,18600,18720,19168,17429,16765,15125,12990,12083,11687
5,GRAND LARCENY,49631,46329,45771,46751,48763,48243,46625,44924,44242,39580,37835,38501,42497,45368,43862,44005,44279,43150,43558
6,GRAND LARCENY OF MOTOR VEHICLE,35442,29531,26656,23413,20884,18246,15745,13174,12482,10670,10329,9314,8093,7400,7664,7332,6327,5676,5428
7,TOTAL SEVEN MAJOR FELONY OFFENSES,184652,162908,154809,147069,142093,135475,128682,121009,117956,106730,105115,106669,111147,111335,106722,105453,101716,96658,95883


### Transform Hatecrime DataFrame

In [19]:
# Create a filtered dataframe from specific columns
Hatecrime_columns = ["Precinct", "Gender", "Race", "Age", "Year"]
Hatecrime_transformed = Hatecrime_df[Hatecrime_columns].copy()
Hatecrime_transformed



Unnamed: 0,Precinct,Gender,Race,Age,Year
0,6,Male,Hispanic,48,2017
1,7,Male,White,29,2017
2,7,Male,White,29,2017
3,14,Male,Hispanic,64,2017
4,14,Male,Hispanic,64,2017
5,14,Male,Hispanic,64,2017
6,14,Male,Hispanic,64,2017
7,14,Male,Hispanic,64,2017
8,14,Male,Hispanic,64,2017
9,14,Male,Hispanic,64,2017


In [20]:
# Rename the column headers
Hatecrime_transformed = Hatecrime_transformed.rename(columns={"Precinct": "id", "Gender":"Gender", "Race":"Race", "Age":"Age", "Year":"Year"})

In [21]:
# Clean the data by dropping duplicates and setting the index
Hatecrime_transformed.drop_duplicates("id", inplace=True)
Hatecrime_transformed.set_index("id", inplace=True)

In [22]:
Hatecrime_transformed.head()

Unnamed: 0_level_0,Gender,Race,Age,Year
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,Male,Hispanic,48,2017
7,Male,White,29,2017
14,Male,Hispanic,64,2017
19,Male,White,53,2017
20,Male,Hispanic,22,2017


### Transform precinct DataFrame

In [23]:
# Create a filtered dataframe from specific columns
Precincts_columns = ["precinct_number", "Phone","city", "full_address", "lat", "lng", "zipcode"]
Precincts_transformed = Precincts_df[Precincts_columns].copy()
Precincts_transformed

Unnamed: 0,precinct_number,Phone,city,full_address,lat,lng,zipcode
0,1,212-334-0611,New York,"16 Ericsson Place, New York, NY",40.720270,-74.007198,10013
1,5,212-334-0711,New York,"19 Elizabeth Street, New York, NY",40.716194,-73.997470,10013
2,6,212-741-4811,New York,"233 West 10 Street, New York, NY",40.734233,-74.005453,10014
3,7,212-477-7311,New York,"19 1/2 Pitt Street, New York, NY",40.716362,-73.983934,10002
4,9,212-477-7811,New York,"321 East 5 Street, New York, NY",40.644583,-73.975500,10003
5,10,212-741-8211,New York,"230 West 20th Street, New York, NY",40.742712,-73.998643,10011
6,13,212-477-7411,New York,"230 East 21st Street, New York, NY",40.736788,-73.982908,10010
7,14,212-239-9811,New York,"357 West 35th Street, New York, NY",40.753890,-73.994894,10001
8,17,212-826-3211,New York,"167 East 51st Street, New York, NY",40.756762,-73.970786,10022
9,18,212-767-8400,New York,"306 West 54th Street, New York, NY",40.764952,-73.985138,10019


In [24]:
# Rename the column headers
Precincts_transformed = Precincts_transformed.rename(columns={"precinct_number":"id", "Phone":"phone_number","city":"city", "full_address":"full_address", "lat":"Latitude", "lng":"Longitude", "zipcode":"Zipcode"})

# Clean the data by dropping duplicates and setting the index
Precincts_transformed.drop_duplicates("id", inplace=True)
Precincts_transformed.set_index("id", inplace=True)

Precincts_transformed.head()

Unnamed: 0_level_0,phone_number,city,full_address,Latitude,Longitude,Zipcode
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,212-334-0611,New York,"16 Ericsson Place, New York, NY",40.72027,-74.007198,10013
5,212-334-0711,New York,"19 Elizabeth Street, New York, NY",40.716194,-73.99747,10013
6,212-741-4811,New York,"233 West 10 Street, New York, NY",40.734233,-74.005453,10014
7,212-477-7311,New York,"19 1/2 Pitt Street, New York, NY",40.716362,-73.983934,10002
9,212-477-7811,New York,"321 East 5 Street, New York, NY",40.644583,-73.9755,10003


In [25]:
#####Transform offender DataFrame

In [26]:
Felony_df.columns

Index(['OFFENSE', '2000', '2001', '2002', '2003', '2004', '2005', '2006',
       '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015',
       '2016', '2017', '2018'],
      dtype='object')

In [27]:
# Create a filtered dataframe from specific columns
Felony_columns = ["OFFENSE", "2017", "2018"]
Felony_transformed = Felony_df[Felony_columns].copy()
Felony_transformed.head()

Unnamed: 0,OFFENSE,2017,2018
0,MURDER & NON-NEGL. MANSLAUGHTER,292,295
1,RAPE,1449,1794
2,ROBBERY,13956,12913
3,FELONY ASSAULT,20052,20208
4,BURGLARY,12083,11687


In [28]:
# Rename the column headers
Felony_transformed = Felony_transformed.rename(columns={"OFFENSE":"id", "2017":"Count_2017","2018":"Count_2018"})

# Clean the data by dropping duplicates and setting the index
Felony_transformed.drop_duplicates("id", inplace=True)
Felony_transformed.set_index("id", inplace=True)

Felony_transformed.head()

Unnamed: 0_level_0,Count_2017,Count_2018
id,Unnamed: 1_level_1,Unnamed: 2_level_1
MURDER & NON-NEGL. MANSLAUGHTER,292,295
RAPE,1449,1794
ROBBERY,13956,12913
FELONY ASSAULT,20052,20208
BURGLARY,12083,11687


### Create database connection

In [34]:
connection_string = "testuser:testpassword@localhost:5432/nyc_arrest_db"
engine = create_engine(f'postgresql://{connection_string}')

In [35]:
# Verify that there are no existing tables
engine.table_names()


['arrest_1718', 'Hatecrime_year', 'Hatecrime_mm', 'Precincts', 'Felony']

### Load DataFrames into database

In [36]:
Hatecrime_transformed.to_sql(name='Hatecrime', con=engine, if_exists='append', index=True)

In [37]:
Precincts_transformed.to_sql(name='Precincts', con=engine, if_exists='append', index=True)

In [38]:
Felony_transformed.to_sql(name='Felony', con=engine, if_exists='append', index=True)