# Dependencies & Read Data

In [15]:
#Imports
import pandas as pd
import requests
import gmaps
import re
import numpy as np

# Import API key
from config import g_key

# Configure gmaps API key
gmaps.configure(api_key=g_key)

In [16]:
#Read the CSV & Create DF
geocoded_df = pd.read_csv("unique_restaurants_geocoded.csv")
geocoded_df.head()

Unnamed: 0,facilityId,facilityName,siteAddress,streetNumber,streetDirection,streetName,streetType,streetUnit,city,state,...,violationCode,violation,violationPoints,violationType,violationStatus,inspectionScore,location,concat_address,split_coords,google_coords
0,FA0003323,RUNZA,1743 MAIN ST,1743,,MAIN,ST,,LONGMONT,CO,...,01B,Wholesome Free of Spoilage,0,Critical,In,27,"1743 MAIN ST\r\nLONGMONT, CO 80501\r\n(40.1897...",1743 MAIN ST LONGMONT CO USA,"40.189746, -105.102332","40.1897717,-105.1026674"
1,FA0000616,FRANKS CHOP HOUSE,921 WALNUT ST,921,,WALNUT,ST,,BOULDER,CO,...,02E,Smoking Eating Drinking,0,Critical,In,20,"921 WALNUT ST\r\nBOULDER, CO 80302\r\n(40.0162...",921 WALNUT ST BOULDER CO USA,"40.016235, -105.282759","40.0165533,-105.2828453"
2,FA0004494,ROASTED TOAD BBQ THE,229 HWY 119,229,,HWY 119,,,NEDERLAND,CO,...,02C,Hands Washed As Needed,0,Critical,In,45,"229 HWY 119\r\nNEDERLAND, CO 80466",229 HWY 119 NEDERLAND CO USA,,"39.9631805,-105.5095941"
3,FA0003893,FAIR ISLE COFFEE CO @ LGMT FARM MRKT,237 COLLYER ST,237,,COLLYER,ST,,Longmont,CO,...,08B,Properly Labeled,0,Critical,In,5,"237 COLLYER ST\r\nLongmont, CO 80501\r\n(40.16...",237 COLLYER ST Longmont CO USA,"40.163078, -105.098175","40.1630767,-105.0985565"
4,FA0003472,TODS ESPRESSO CAFE,6558 LOOKOUT RD,6558,,LOOKOUT,RD,,BOULDER,CO,...,FC51,Plumbing installed; proper backflow devices,10,,Not Observed,10,"6558 LOOKOUT RD\r\nBOULDER, CO 80301\r\n(40.07...",6558 LOOKOUT RD BOULDER CO USA,"40.072805, -105.20104","40.0721707,-105.2008731"


# Data Sort/Clean

In [17]:
#Drop unnecessary Columns
clean_geocoded = geocoded_df[["facilityId", "facilityName", "concat_address","typeOfFacility", "categoryOfFacility","split_coords","google_coords"]].copy()
clean_geocoded.head()

Unnamed: 0,facilityId,facilityName,concat_address,typeOfFacility,categoryOfFacility,split_coords,google_coords
0,FA0003323,RUNZA,1743 MAIN ST LONGMONT CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,"40.189746, -105.102332","40.1897717,-105.1026674"
1,FA0000616,FRANKS CHOP HOUSE,921 WALNUT ST BOULDER CO USA,RESTAURANT MORE THAN 200 SEATS,FULL SERVICE FULL MENU,"40.016235, -105.282759","40.0165533,-105.2828453"
2,FA0004494,ROASTED TOAD BBQ THE,229 HWY 119 NEDERLAND CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,,"39.9631805,-105.5095941"
3,FA0003893,FAIR ISLE COFFEE CO @ LGMT FARM MRKT,237 COLLYER ST Longmont CO USA,SPECIAL EVENT,SPECIAL EVENT,"40.163078, -105.098175","40.1630767,-105.0985565"
4,FA0003472,TODS ESPRESSO CAFE,6558 LOOKOUT RD BOULDER CO USA,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,"40.072805, -105.20104","40.0721707,-105.2008731"


# Splitting Lat/Long from Original Dataset

In [18]:
#split lat/long from original dataset into two columns
for index, row in clean_geocoded.iterrows():
    try: 
        #split the data in the location column
        messy = clean_geocoded.loc[index, "split_coords"]
        lat = messy.split(',')[0]   
        long = messy.split(',')[1]

        #drop the lat into its own cell
        clean_geocoded.loc[index, "origin_lat"] = lat
        clean_geocoded.loc[index, "origin_long"] = long
        
    except (AttributeError):
        continue
        
clean_geocoded.head()

Unnamed: 0,facilityId,facilityName,concat_address,typeOfFacility,categoryOfFacility,split_coords,google_coords,origin_lat,origin_long
0,FA0003323,RUNZA,1743 MAIN ST LONGMONT CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,"40.189746, -105.102332","40.1897717,-105.1026674",40.189746,-105.102332
1,FA0000616,FRANKS CHOP HOUSE,921 WALNUT ST BOULDER CO USA,RESTAURANT MORE THAN 200 SEATS,FULL SERVICE FULL MENU,"40.016235, -105.282759","40.0165533,-105.2828453",40.016235,-105.282759
2,FA0004494,ROASTED TOAD BBQ THE,229 HWY 119 NEDERLAND CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,,"39.9631805,-105.5095941",,
3,FA0003893,FAIR ISLE COFFEE CO @ LGMT FARM MRKT,237 COLLYER ST Longmont CO USA,SPECIAL EVENT,SPECIAL EVENT,"40.163078, -105.098175","40.1630767,-105.0985565",40.163078,-105.098175
4,FA0003472,TODS ESPRESSO CAFE,6558 LOOKOUT RD BOULDER CO USA,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,"40.072805, -105.20104","40.0721707,-105.2008731",40.072805,-105.20104


# Splitting Lat/Long from Google Dataset

In [19]:
#split lat/long from Geoogle Geocoding dataset into two columns
for index, row in clean_geocoded.iterrows():
    try: 
        #split the data in the location column
        messy = clean_geocoded.loc[index, "google_coords"]
        lat = messy.split(',')[0]   
        long = messy.split(',')[1]

        #drop the lat into its own cell
        clean_geocoded.loc[index, "google_lat"] = lat
        clean_geocoded.loc[index, "google_long"] = long
        
    except (AttributeError):
        continue
        
clean_geocoded.head()

Unnamed: 0,facilityId,facilityName,concat_address,typeOfFacility,categoryOfFacility,split_coords,google_coords,origin_lat,origin_long,google_lat,google_long
0,FA0003323,RUNZA,1743 MAIN ST LONGMONT CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,"40.189746, -105.102332","40.1897717,-105.1026674",40.189746,-105.102332,40.1897717,-105.1026674
1,FA0000616,FRANKS CHOP HOUSE,921 WALNUT ST BOULDER CO USA,RESTAURANT MORE THAN 200 SEATS,FULL SERVICE FULL MENU,"40.016235, -105.282759","40.0165533,-105.2828453",40.016235,-105.282759,40.0165533,-105.2828453
2,FA0004494,ROASTED TOAD BBQ THE,229 HWY 119 NEDERLAND CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,,"39.9631805,-105.5095941",,,39.9631805,-105.5095941
3,FA0003893,FAIR ISLE COFFEE CO @ LGMT FARM MRKT,237 COLLYER ST Longmont CO USA,SPECIAL EVENT,SPECIAL EVENT,"40.163078, -105.098175","40.1630767,-105.0985565",40.163078,-105.098175,40.1630767,-105.0985565
4,FA0003472,TODS ESPRESSO CAFE,6558 LOOKOUT RD BOULDER CO USA,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,"40.072805, -105.20104","40.0721707,-105.2008731",40.072805,-105.20104,40.0721707,-105.2008731


# Drop combined coord columns

In [21]:
clean_geocoded.drop(['split_coords','google_coords'], inplace=True,axis=1)
clean_geocoded.head()

Unnamed: 0,facilityId,facilityName,concat_address,typeOfFacility,categoryOfFacility,origin_lat,origin_long,google_lat,google_long
0,FA0003323,RUNZA,1743 MAIN ST LONGMONT CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,40.189746,-105.102332,40.1897717,-105.1026674
1,FA0000616,FRANKS CHOP HOUSE,921 WALNUT ST BOULDER CO USA,RESTAURANT MORE THAN 200 SEATS,FULL SERVICE FULL MENU,40.016235,-105.282759,40.0165533,-105.2828453
2,FA0004494,ROASTED TOAD BBQ THE,229 HWY 119 NEDERLAND CO USA,RESTAURANT 0 TO 100 SEATS,FULL MENU LIMITED SERVICE,,,39.9631805,-105.5095941
3,FA0003893,FAIR ISLE COFFEE CO @ LGMT FARM MRKT,237 COLLYER ST Longmont CO USA,SPECIAL EVENT,SPECIAL EVENT,40.163078,-105.098175,40.1630767,-105.0985565
4,FA0003472,TODS ESPRESSO CAFE,6558 LOOKOUT RD BOULDER CO USA,LIMITED FOOD SERVICE CONVENIENCE OTHER,FAST FOOD LIMITED MENU,40.072805,-105.20104,40.0721707,-105.2008731


# Export cleaned facility data

In [23]:
# Export unique address dataset
output_data = "cleaned_faciliy_data.csv"
clean_geocoded.to_csv(output_data,header=True,index=False)