In [1]:
import pandas as pd
import numpy as np
import requests
import json

pd.options.mode.chained_assignment = None
# Re-format the data
df = pd.read_csv("restaurants.csv", encoding="ISO-8859-1", on_bad_lines="warn", header=0)
df.rename(columns={"restaurant_type": "restaurant", "name": "street"}, inplace=True)
new_columns = ["restaurant", "street", "address"]
df = df[new_columns]
df["country"] = "USA"

In [3]:
# Format all restaurants in the same way
# Format McDonalds restaurants
mcdonalds = df[df["restaurant"] == "McDonalds"]
mcdonalds['address'] = mcdonalds['address'].str.replace(',', '', regex=False)
mcdonalds[["city", "state", "zip"]] = mcdonalds["address"].str.split(" ", n=2, expand=True)
mcdonalds[ "zip"] = mcdonalds["zip"].str.slice(0, 5)
mcdonalds.drop("address", axis="columns", inplace=True)
new_columns = ["restaurant", "street", "city", "state", "zip", "country"]
mcdonalds = mcdonalds[new_columns]
mcdonalds.tail()

Unnamed: 0,restaurant,street,city,state,zip,country
277,McDonalds,705 W Jefferson,Dallas,TX,75208,USA
278,McDonalds,3026 Hampton Rd,Dallas,TX,75212,USA
279,McDonalds,1415 E Illinois,Dallas,TX,75212,USA
280,McDonalds,2747 Fort Worth Ave,Dallas,TX,75211,USA
281,McDonalds,1717 W Mockingbird Ln,Dallas,TX,75038,USA


In [4]:
# Format Subway restaurants
subway = df[df["restaurant"] == "Subway"]

def parse_mcdonalds_address(address):
    try:
        address_list = address.split()
        zip_code = address_list[-2].split(",")[0]
        state = address_list[-3]
        city = address_list[-4].replace(",", "")
        street = " ".join([word for word in address_list[:-5]])
        return street, city, state, zip_code
    except Exception as e:
        print(e)
        return None, None, None, None

subway[["street", "city", "state", "zip"]] = subway['street'].apply(lambda x: pd.Series(parse_mcdonalds_address(x)))
new_columns = ["restaurant", "street", "city", "state", "zip", "country"]
subway.drop("address", axis="columns", inplace=True)
subway = subway[new_columns]

In [5]:
# Format Starbucks restaurants
starbucks = df[df["restaurant"] == "Starbucks"]

def parse_starbucks_address(address):
    try:
        address_list = address.split()
        zip_code = address_list[-1]
        state = address_list[-2]
        city = address_list[-3].replace(",", "")
        street = " ".join(address_list)[:-5].split(",")[0]
        return street, city, state, zip_code
    except Exception as e:
        print(e)
        return None, None, None, None
 
starbucks[["street", "city", "state", "zip"]] = starbucks['address'].apply(lambda x: pd.Series(parse_starbucks_address(x)))
starbucks.drop("address", axis="columns", inplace=True)
starbucks = starbucks[new_columns]

In [6]:
# Concatenate the dataframes 
restaurants = pd.concat([mcdonalds, subway, starbucks])
restaurants[["lat", "long"]] = np.nan


In [7]:
# Find latitute and longitudes
key = 'NvsloVp4rMbaSexBITVSFmgreco3XGse'

for i, row in restaurants.iterrows():
    api_address = str(restaurants.at[i,'street'])+','+str(restaurants.at[i,'zip'])+','+str(restaurants.at[i,'city'])+','+str(restaurants.at[i,'country'])
    parameters = {
            "key":key,
            "location":api_address
    }

    response = requests.get("http://www.mapquestapi.com/geocoding/v1/address", params=parameters)
    data = response.text
    data_json = json.loads(data)["results"]
    lat = (data_json[0]['locations'][0]['latLng']['lat'])
    lng = (data_json[0]['locations'][0]['latLng']['lng'])
    
    restaurants.at[i,'lat'] = lat
    restaurants.at[i,'long'] = lng


In [8]:
restaurants.to_csv('restaurants_geo.csv')