##### Dependencies
____

In [90]:
import numpy as np
import pandas as pd
import requests 
import time
import geocoder

from sqlalchemy import create_engine

##### Database connection
____

In [91]:
g_params = {
    'dbuser'   : 'csc4710',
    'dbpwd'    : 'password',
    'dbserver' : 'localhost',
    'dbport'   : 3306,
    'dbname'   : 'CSC4710'
}

disk_engine = create_engine(f'mysql+pymysql://{g_params["dbuser"]}:{g_params["dbpwd"]}@{g_params["dbserver"]}:{g_params["dbport"]}/{g_params["dbname"]}', echo=False)
disk_engine.connect().connection.ping()

##### Load Datasource
____

In [92]:
source_csv = '../resources/COBRA-YTD2017.csv'
columns = ['location', 'x', 'y']

crimes = pd.read_csv(source_csv, usecols=columns, low_memory=False, encoding='ISO-8859-1')
crimes.head(10)

Unnamed: 0,location,x,y
0,43 JESSE HILL JR DR NE,-84.38013,33.75582
1,1169 ATLANTIC DR NW,-84.39745,33.78674
2,633 PRYOR ST SW,-84.39486,33.7376
3,333 NELSON ST SW,-84.39887,33.75156
4,2348 CASCADE RD SW,-84.46522,33.72146
5,1245 GLENWOOD AVE SE,-84.3466,33.74006
6,351 CHEROKEE AVE SE,-84.37373,33.74505
7,461 PONCE DE LEON AVE NE,-84.3719,33.77303
8,437 MEMORIAL DR SE,-84.37285,33.74639
9,1053 LINAM ST SE,-84.38625,33.72579


#### Sampling
____

In [93]:
master = pd.DataFrame(crimes['location'].sample(n=100, replace=False))

master['city'] = ''
master['state'] = 'GA'
master['zipcode'] = ''
master['latitude'] = np.NaN
master['longitude'] = np.NaN

columns = { 
    'location' : 'street'
}

master.rename(columns=columns, inplace=True)

master.head(10)

Unnamed: 0,street,city,state,zipcode,latitude,longitude
1359,2175 PIEDMONT RD NE,,GA,,,
17523,306 LUCKIE ST NW,,GA,,,
22736,227 WELLINGTON ST SW,,GA,,,
10923,1125 CASCADE CIR SW @ZONE 4,,GA,,,
14108,469 FAIR ST SW,,GA,,,
21162,3210 ROSWELL RD NW,,GA,,,
12011,2871 NORMANDY DR NW,,GA,,,
23929,1275 CAROLINE ST NE @TARGET - CAROLINE,,GA,,,
23102,3393 PEACHTREE RD NE,,GA,,,
8055,2124 CHESHIRE BRIDGE RD NE,,GA,,,


In [94]:
# https://developer.mapquest.com

API_KEY='YOUR API KEY'

In [95]:
for ind in master.index:
    if np.isnan(master['latitude'][ind]) or np.isnan(master['longitude'][ind]):
        g = geocoder.mapquest(f'{master["street"][ind]}, {master["state"][ind]}', key=API_KEY)  
        
        if g.ok:
            master.loc[ind, 'latitude'] = g.json['lat'] if "lat" in g.json.keys() else np.NaN
            master.loc[ind, 'longitude'] = g.json['lng'] if "lng" in g.json.keys() else np.NaN
            master.loc[ind, 'city'] = g.json['city'] if "city" in g.json.keys() else ''
            master.loc[ind, 'zipcode'] = g.json['postal'] if "postal" in g.json.keys() else ''
        
        time.sleep(15)

In [96]:
master.head(10)

Unnamed: 0,street,city,state,zipcode,latitude,longitude
1359,2175 PIEDMONT RD NE,Atlanta,GA,30324-4128,33.817098,-84.366785
17523,306 LUCKIE ST NW,Atlanta,GA,30313-1706,33.762858,-84.395977
22736,227 WELLINGTON ST SW,Atlanta,GA,30314-2240,33.749008,-84.438294
10923,1125 CASCADE CIR SW @ZONE 4,Atlanta,GA,30311-2817,33.724948,-84.449931
14108,469 FAIR ST SW,Atlanta,GA,30313-1203,33.748725,-84.403206
21162,3210 ROSWELL RD NW,Atlanta,GA,30305-1822,33.84282,-84.379005
12011,2871 NORMANDY DR NW,Atlanta,GA,30305-2824,33.833359,-84.402408
23929,1275 CAROLINE ST NE @TARGET - CAROLINE,Caroline Park,GA,,32.5144,-84.976898
23102,3393 PEACHTREE RD NE,Atlanta,GA,30326-1109,33.8489,-84.36448
8055,2124 CHESHIRE BRIDGE RD NE,Atlanta,GA,30324-4276,33.813291,-84.354007


##### Export Dataframe to DB
____

In [97]:
master.to_sql(name='address', con=disk_engine, if_exists='append',index=False)