# Dependencies and Setup

In [35]:
import pandas as pd
import csv
from geopy.geocoders import Nominatim
import folium 

# EXTRACT

In [36]:
# File to Load
crime_to_load = "Resources/Baltimore_2019_Crime_Only.csv"
cctv_to_load = "Resources/CCTV_hoods.csv"

from sqlalchemy import create_engine

In [37]:
# Read the CSV files
crime_df = pd.read_csv(crime_to_load)
cctv_df = pd.read_csv(cctv_to_load)

In [38]:
crime_df.head(10)

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Weapon,District,Neighborhood,Total Incidents
0,07/06/2019,01:30:00,5B,0 HILLSIDE RD,BURGLARY,,NORTHERN,ROLAND PARK,1
1,07/06/2019,09:00:00,7A,1600 APPLETON ST,AUTO THEFT,,WESTERN,EASTERWOOD,1
2,07/06/2019,07:03:00,6E,2500 EDGECOMBE CIR N,LARCENY,,NORTHERN,PARKLANE,1
3,07/06/2019,02:30:00,3AK,PL & N HIGHLAND AV,ROBBERY - STREET,KNIFE,SOUTHEAST,,1
4,07/06/2019,10:05:00,6C,700 WASHINGTON BLVD,LARCENY,,SOUTHERN,WASHINGTON VILLAGE,1
5,07/06/2019,19:00:00,4B,2100 32ND ST,AGG. ASSAULT,KNIFE,NORTHEAST,,1
6,07/06/2019,17:31:00,4C,3000 GLENMORE AVE,AGG. ASSAULT,OTHER,NORTHEAST,,1
7,07/06/2019,16:38:00,4C,3700 2ND ST,AGG. ASSAULT,OTHER,SOUTHERN,BROOKLYN,1
8,07/06/2019,15:21:00,4E,1700 RUSSELL ST,COMMON ASSAULT,,SOUTHERN,CARROLL - CAMDEN INDUSTRIAL AREA,1
9,07/06/2019,15:40:00,5A,3800 5TH ST,BURGLARY,,SOUTHERN,BROOKLYN,1


In [39]:
sample = crime_df.sample(10)
sample

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Weapon,District,Neighborhood,Total Incidents
20571,01/14/2019,09:18:00,3AJF,5900 YORK RD,ROBBERY - CARJACKING,FIREARM,NORTHERN,CHINQUAPIN PARK/BELV,1
5819,05/22/2019,03:45:00,3AF,1800 N HOWARD ST,ROBBERY - STREET,FIREARM,CENTRAL,CHARLES NORTH,1
225,07/04/2019,17:15:00,4D,6700 REISTERSTOWN RD,AGG. ASSAULT,HANDS,NORTHWEST,REISTERSTOWN STATION,1
8256,05/02/2019,16:30:00,7A,3800 ROCKFIELD AVE,AUTO THEFT,,NORTHWEST,ARLINGTON,1
9966,04/18/2019,09:00:00,6E,3400 EDNOR RD,LARCENY,,NORTHEAST,EDNOR GARDENS-LAKESI,1
6776,05/15/2019,21:27:00,4E,500 W HOFFMAN ST,COMMON ASSAULT,,CENTRAL,UPTON,1
3564,06/08/2019,14:15:00,6J,400 S HIGHLAND AVE,LARCENY,,SOUTHEAST,HUDSON-HIGHLANDTOWN,1
14188,03/14/2019,15:50:00,9S,900 SEAGULL AV,SHOOTING,FIREARM,SOUTHERN,CHERRY HILL,1
11234,04/08/2019,18:13:00,7A,5600 SINCLAIR LN,AUTO THEFT,,NORTHEAST,FRANKFORD,1
15649,02/27/2019,19:57:00,1A,1600 N DURHAM ST,HOMICIDE,FIREARM,EASTERN,BROADWAY EAST,1


In [40]:
cctv_df.head()

Unnamed: 0,cameraLocation,cameraNumber,cameraProject,Location 1,latitude,longitude,neighborhood
0,Eutaw and Lexington Market,1,Downtown,"(39.290996, -76.621073999999993)",39.290996,-76.621074,NorthEutawStreet
1,Eutaw and Fayette,2,Downtown,"(39.29048796517317, -76.623665143680341)",39.290488,-76.623665,RidgleysDelight
2,Eutaw and Baltimore,3,Downtown,"(39.289324000000001, -76.620985000000005)",39.289324,-76.620985,WestBaltimoreStreet
3,Eutaw and Redwood,4,Downtown,"(39.288677999999997, -76.620947999999999)",39.288678,-76.620948,RidgleysDelight
4,Eutaw and Lombard,5,Downtown,"(39.287528000000002, -76.620853999999994)",39.287528,-76.620854,RidgleysDelight


# Create database connection


In [41]:
connection_string = "postgres:postgres@localhost:5432/postgres"
engine = create_engine(f'postgresql://{connection_string}')

# Confirm tables
engine.table_names()

['crime', 'cctv']

# Confirm tables

In [42]:
crime_df.to_sql(name='crime', con=engine, if_exists='append', index=True)

In [43]:
cctv_df.to_sql(name='cctv', con=engine, if_exists='append', index=False)

## TRANSFORM

In [44]:
# Changing "Location 1" form cctv_df to 'latitude' and 'longitiude'
# This is required for geocoding which we want to use to find addresses of deployed CCTV

# Create two lists for the loop results to be placed
lat = []
lon = []

# For each row in a varible,
for row in cctv_df['Location 1']:
    # Try to,
    try:
        #Remove ( from the string
        row = row.replace("(", "")
        row = row.replace(")", "")
        # Split the row by comma, convert to float, and append
        # everything before the comma to lat
        lat.append(float(row.split(',')[0]))
        # Split the row by comma, convert to float, and append
        # everything after the comma to lon
        lon.append(float(row.split(',')[1]))
    # But if you get an error
    except:
        # append a missing value to lat
        lat.append(np.NaN)
        # append a missing value to lon
        lon.append(np.NaN)

# Create two new columns from lat and lon
cctv_df['latitude'] = lat
cctv_df['longitude'] = lon

In [45]:
cctv_df.head()

Unnamed: 0,cameraLocation,cameraNumber,cameraProject,Location 1,latitude,longitude,neighborhood
0,Eutaw and Lexington Market,1,Downtown,"(39.290996, -76.621073999999993)",39.290996,-76.621074,NorthEutawStreet
1,Eutaw and Fayette,2,Downtown,"(39.29048796517317, -76.623665143680341)",39.290488,-76.623665,RidgleysDelight
2,Eutaw and Baltimore,3,Downtown,"(39.289324000000001, -76.620985000000005)",39.289324,-76.620985,WestBaltimoreStreet
3,Eutaw and Redwood,4,Downtown,"(39.288677999999997, -76.620947999999999)",39.288678,-76.620948,RidgleysDelight
4,Eutaw and Lombard,5,Downtown,"(39.287528000000002, -76.620853999999994)",39.287528,-76.620854,RidgleysDelight


In [46]:
#dropping the columns we used to define neighborhood
cctv_df = cctv_df.drop(columns=['Location 1', 'latitude', 'longitude'])
cctv_df.head(10)

Unnamed: 0,cameraLocation,cameraNumber,cameraProject,neighborhood
0,Eutaw and Lexington Market,1,Downtown,NorthEutawStreet
1,Eutaw and Fayette,2,Downtown,RidgleysDelight
2,Eutaw and Baltimore,3,Downtown,WestBaltimoreStreet
3,Eutaw and Redwood,4,Downtown,RidgleysDelight
4,Eutaw and Lombard,5,Downtown,RidgleysDelight
5,Eutaw and Camden,6,Downtown,Baltimore
6,Paca and Pratt,7,Downtown,RidgleysDelight
7,Greene and Lombard,8,Downtown,RidgleysDelight
8,Greene and Baltimore,9,Downtown,RidgleysDelight
9,Greene and Fayette,10,Downtown,RidgleysDelight


In [20]:
folium_map = folium.Map(location=[39.29, -76.61],
                        zoom_start=13)
                        #tiles="CartoDB dark_matter")

for cameraNumber in cctv_df:
 #radius of circles
    # choose the color of the marker
    if int(cctv_df.loc[key, "cameraNumber"])<2:
        color="#39cc28" # green
    elif int(cctv_df.loc[key, "cameraNumber"])>=2:
        color="#2743cc" #blue
    else:
        color="#ea1f09" # green
    radius = int(cctv_df.loc[key, "cameraNumber"])
    marker = folium.CircleMarker(location=cctv_df[key],
                                 radius=radius,
                                 color=color).add_to(folium_map)

folium_map

NameError: name 'key' is not defined

In [15]:
map_osm = folium.Map(location=[39.29, -76.61], zoom_start=11)
map_osm