In [1]:
# Dependencies and Setup
import pandas as pd
import csv
import numpy as np
from geopy.geocoders import Nominatim


# EXTRACT

#### Real DATASETS

In [None]:
# File to Load 
#crime_to_load = "Resources/Baltimore_2019_Crime_Only.csv"
#cctv_to_load = "Resources/Baltimore_CCTV_Locations.csv"

#### Test for API calls

In [2]:
# File to Load 
crime_to_load = "Resources/Baltimore_2019_Crime_Only-head.csv"
cctv_to_load = "Resources/Baltimore_CCTV_Locations-head.csv"

In [3]:
# Read the CSV files
crime_df = pd.read_csv(crime_to_load)
cctv_df = pd.read_csv(cctv_to_load)

In [4]:
crime_df

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Weapon,District,Neighborhood,Total Incidents
0,07/06/2019,01:30:00,5B,0 HILLSIDE RD,BURGLARY,,NORTHERN,ROLAND PARK,1
1,07/06/2019,09:00:00,7A,1600 APPLETON ST,AUTO THEFT,,WESTERN,EASTERWOOD,1
2,07/06/2019,07:03:00,6E,2500 EDGECOMBE CIR N,LARCENY,,NORTHERN,PARKLANE,1
3,07/06/2019,02:30:00,3AK,PL & N HIGHLAND AV,ROBBERY - STREET,KNIFE,SOUTHEAST,,1
4,07/06/2019,10:05:00,6C,700 WASHINGTON BLVD,LARCENY,,SOUTHERN,WASHINGTON VILLAGE,1
5,07/06/2019,19:00:00,4B,2100 32ND ST,AGG. ASSAULT,KNIFE,NORTHEAST,,1
6,07/06/2019,17:31:00,4C,3000 GLENMORE AVE,AGG. ASSAULT,OTHER,NORTHEAST,,1
7,07/06/2019,16:38:00,4C,3700 2ND ST,AGG. ASSAULT,OTHER,SOUTHERN,BROOKLYN,1
8,07/06/2019,15:21:00,4E,1700 RUSSELL ST,COMMON ASSAULT,,SOUTHERN,CARROLL - CAMDEN INDUSTRIAL AREA,1


In [5]:
cctv_df

Unnamed: 0,cameraLocation,cameraNumber,cameraProject,Location 1
0,Eutaw and Lexington Market,1,Downtown,"(39.290996, -76.621073999999993)"
1,Eutaw and Fayette,2,Downtown,"(39.29048796517317, -76.623665143680341)"
2,Eutaw and Baltimore,3,Downtown,"(39.289324000000001, -76.620985000000005)"
3,Eutaw and Redwood,4,Downtown,"(39.288677999999997, -76.620947999999999)"
4,Eutaw and Lombard,5,Downtown,"(39.287528000000002, -76.620853999999994)"
5,Eutaw and Camden,6,Downtown,"(39.285245000000003, -76.620675000000006)"
6,Paca and Pratt,7,Downtown,"(39.286256999999999, -76.622129000000001)"
7,Greene and Lombard,8,Downtown,"(39.287415000000003, -76.623615999999998)"
8,Greene and Baltimore,9,Downtown,"(39.289222000000002, -76.623694999999998)"


# TRANSFORM

In [6]:
# Changing "Location 1" form cctv_df to 'latitude' and 'longitiude'
# This is required for geocoding which we want to use to find addresses of deployed CCTV

# Create two lists for the loop results to be placed
lat = []
lon = []

# For each row in a varible,
for row in cctv_df['Location 1']:
    # Try to,
    try:
        #Remove ( from the string
        row = row.replace("(", "")
        row = row.replace(")", "")
        # Split the row by comma, convert to float, and append
        # everything before the comma to lat
        lat.append(float(row.split(',')[0]))
        # Split the row by comma, convert to float, and append
        # everything after the comma to lon
        lon.append(float(row.split(',')[1]))
    # But if you get an error
    except:
        # append a missing value to lat
        lat.append(np.NaN)
        # append a missing value to lon
        lon.append(np.NaN)

# Create two new columns from lat and lon
cctv_df['latitude'] = lat
cctv_df['longitude'] = lon

In [7]:
cctv_df

Unnamed: 0,cameraLocation,cameraNumber,cameraProject,Location 1,latitude,longitude
0,Eutaw and Lexington Market,1,Downtown,"(39.290996, -76.621073999999993)",39.290996,-76.621074
1,Eutaw and Fayette,2,Downtown,"(39.29048796517317, -76.623665143680341)",39.290488,-76.623665
2,Eutaw and Baltimore,3,Downtown,"(39.289324000000001, -76.620985000000005)",39.289324,-76.620985
3,Eutaw and Redwood,4,Downtown,"(39.288677999999997, -76.620947999999999)",39.288678,-76.620948
4,Eutaw and Lombard,5,Downtown,"(39.287528000000002, -76.620853999999994)",39.287528,-76.620854
5,Eutaw and Camden,6,Downtown,"(39.285245000000003, -76.620675000000006)",39.285245,-76.620675
6,Paca and Pratt,7,Downtown,"(39.286256999999999, -76.622129000000001)",39.286257,-76.622129
7,Greene and Lombard,8,Downtown,"(39.287415000000003, -76.623615999999998)",39.287415,-76.623616
8,Greene and Baltimore,9,Downtown,"(39.289222000000002, -76.623694999999998)",39.289222,-76.623695


### Test geolocator

In [8]:
# Test how does geolocator.reverse works
geolocator = Nominatim(user_agent="Baltimore_Crime")
location = geolocator.reverse('39.290996, -76.621074')

In [9]:
location.address

'King Of Lexington Jewelry, 110, North Eutaw Street, Seton Hill, Baltimore, Maryland, 21201, USA'

In [10]:
lon_lat_list = list(zip(cctv_df['latitude'], cctv_df['longitude']))
len(lon_lat_list)

9

In [11]:
str(lon_lat_list[0]).replace('(', '').replace(')', '')

'39.290996, -76.621074'

### Translate latitudes and longitudes into neighbourhood

In [12]:
def convert_to_address(row) -> str:
#     print(row)
    lat = row['latitude']
    lon = row['longitude']
    input_lon_lat = str(lat) + ',' + str(lon)
    geolocator = Nominatim(user_agent="Baltimore_Crime")
    location = geolocator.reverse(input_lon_lat)
    print(location.address)
    return location.address

In [13]:
sample = lon_lat_list[0]
sample

(39.290996, -76.621074)

In [14]:
cctv_df['address'] = cctv_df.apply(lambda row: convert_to_address(row), axis = 1)

King Of Lexington Jewelry, 110, North Eutaw Street, Seton Hill, Baltimore, Maryland, 21201, USA
University of Maryland Baltimore, West Baltimore Street, Ridgleys Delight, Baltimore, Maryland, 21223, USA
Medusa, 401, West Baltimore Street, Ridgleys Delight, Baltimore, Maryland, 21201, USA
400, West Redwood Street, Ridgleys Delight, Baltimore, Maryland, 21201, USA
The Goddess, West Lombard Street, Ridgleys Delight, Baltimore, Maryland, 21201, USA
South Eutaw Street, Ridgleys Delight, Baltimore, Maryland, 21201, USA
511, West Pratt Street, Ridgleys Delight, Baltimore, Maryland, 21201, USA
University of Maryland Baltimore, West Baltimore Street, Ridgleys Delight, Baltimore, Maryland, 21223, USA
University of Maryland Baltimore, West Baltimore Street, Ridgleys Delight, Baltimore, Maryland, 21223, USA


In [16]:
cctv_df

Unnamed: 0,cameraLocation,cameraNumber,cameraProject,Location 1,latitude,longitude,address
0,Eutaw and Lexington Market,1,Downtown,"(39.290996, -76.621073999999993)",39.290996,-76.621074,"King Of Lexington Jewelry, 110, North Eutaw St..."
1,Eutaw and Fayette,2,Downtown,"(39.29048796517317, -76.623665143680341)",39.290488,-76.623665,"University of Maryland Baltimore, West Baltimo..."
2,Eutaw and Baltimore,3,Downtown,"(39.289324000000001, -76.620985000000005)",39.289324,-76.620985,"Medusa, 401, West Baltimore Street, Ridgleys D..."
3,Eutaw and Redwood,4,Downtown,"(39.288677999999997, -76.620947999999999)",39.288678,-76.620948,"400, West Redwood Street, Ridgleys Delight, Ba..."
4,Eutaw and Lombard,5,Downtown,"(39.287528000000002, -76.620853999999994)",39.287528,-76.620854,"The Goddess, West Lombard Street, Ridgleys Del..."
5,Eutaw and Camden,6,Downtown,"(39.285245000000003, -76.620675000000006)",39.285245,-76.620675,"South Eutaw Street, Ridgleys Delight, Baltimor..."
6,Paca and Pratt,7,Downtown,"(39.286256999999999, -76.622129000000001)",39.286257,-76.622129,"511, West Pratt Street, Ridgleys Delight, Balt..."
7,Greene and Lombard,8,Downtown,"(39.287415000000003, -76.623615999999998)",39.287415,-76.623616,"University of Maryland Baltimore, West Baltimo..."
8,Greene and Baltimore,9,Downtown,"(39.289222000000002, -76.623694999999998)",39.289222,-76.623695,"University of Maryland Baltimore, West Baltimo..."


### Function to get neighborhood name from address