In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

## reading ManhattanLotLocs data into lot_df DataFrame

lot_df = pd.DataFrame(pd.read_csv('ManhattanLotLocs.txt'))

In [None]:
lot_df.head(10)

In [None]:
len(lot_df['lot_id'].unique())

In [None]:
lot_df.shape ## all lot_id values are unique

In [None]:
lotdf_colindex = lot_df.columns.values
lotdf_colindex

In [None]:
len(lot_df['Lot'].unique()) ## lot id's are composed of block_lot 

In [None]:
lot_id_df = lot_df.set_index(lot_df['lot_id'])
# lot_id_df.head(10)
lot_id_df = lot_id_df.drop('lot_id', 1)
lot_id_df.head()

## lot_id_df is lot_df indexed by lot_ids 

### Calculate Manhattan Distance for each lot to the each pizza shop in miles

In [None]:
## Load Pizza Map location data (from Ksenia)

pizza_df = pd.DataFrame(pd.read_csv('pizza_locations_google.csv'))
# pizza_df.head(10)

pizza_df = pizza_df.set_index('Unnamed: 0', drop=True)

pizza_df_colindex = pizza_df.columns.values
# pizza_df_colindex

pizza_df.head()

In [None]:
## Manhattan Distance Function

## nearest listing in terms of 1) Specified Distance (user input function) 
                            ## 2) Specified Number of Pizza Shops
                            ## 3) Specified Neighborhood

In [None]:
## DataFrame: rows = lots, columns = pizza shops

In [None]:
## Creating a DataFrame to store lattitude-longitude data for Manhattan Lots
lot_indexvector = [str(k) for k in lot_id_df.index.values]
lot_lat_vector = lot_id_df['latitude']
lot_lng_vector = lot_id_df['longitude']

lot_latlng = pd.DataFrame({'latitude': lot_lat_vector, 'longitude':lot_lng_vector}, 
                          index=lot_indexvector)

lot_latlng.head()

In [None]:
## Creating a DataFrame to store lattitude-longitude data for Manhattan Pizza Shops
pizza_indexvector = pizza_df.index.values
pizza_lat_vector = pizza_df['lat']
pizza_lng_vector = pizza_df['lng']

pizza_latlng = pd.DataFrame({'latitude': pizza_lat_vector, 'longitude': pizza_lng_vector},
                           index=pizza_indexvector)

pizza_latlng['pizza_id'] = pizza_df['place_id']
pizza_latlng['pizza_id'] = [str(k) for k in pizza_latlng['pizza_id']]
pizza_latlng = pizza_latlng.set_index('pizza_id', drop=True)
pizza_latlng.head()

### Manhattan Distance with Haversine Formula

Haversine Formula is used to calculate the distance of two points on a sphere given their latitudes and longitudes

a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2) 

c = 2 ⋅ asin( √a )

d = R ⋅ c

where, R = 6371 km (3959 miles) (Earth's mean radius), φ is latitude, λ is longitude. 



In [None]:
from math import sin
from math import cos
from math import atan2
from math import sqrt
from math import asin
from math import radians


def ManhattanDist(loc_lot, loc_pizza):
    '''Takes as input a lot_id and a pizza_id to calculate the distance between the
    two points with the Haversine formula. Returns a dataframe wtih rows = lot_ids, 
    columns = pizza_ids, data = distance in kms'''
    
    lot_lat = lot_latlng.loc[loc_lot]['latitude']
    lot_lng = lot_latlng.loc[loc_lot]['longitude']
    pizza_lat = pizza_latlng.loc[loc_pizza]['latitude']
    pizza_lng = pizza_latlng.loc[loc_pizza]['longitude']
    
    r = 3959
    
### If Manhattan was aligned with N-S 

#     longitude_a = sin(0)**2 + cos(radians(lot_lat))*cos(radians(lot_lat))*sin(radians((lot_lng-pizza_lng)/2))**2
    
#     latitude_a = (sin(radians(abs((lot_lat-pizza_lat)/2))))**2 + cos(radians(lot_lat))*cos(pizza_lat)*(sin(0))**2
    
#     longitude_c = c = 2*asin(sqrt(longitude_a))
                                                                              
#     latitude_c = 2*asin(sqrt(latitude_a))
                                                                              
#     d_latitude = r*latitude_c
                                                                              
#     d_longitude = r*longitude_c
                                                                              
#     d = abs(d_latitude) + abs(d_longitude)
                                                                              
#     return d 

    a = sin((radians(abs(lot_lat-pizza_lat)))/2)**2 + cos(lot_lat)*cos(pizza_lat)*sin((radians(abs(lot_lng - pizza_lng)/2)))**2

    c = 2*asin(sqrt(a))         
    
    d = r*c
    
    return d
                                                                                       
print (ManhattanDist('2_2', 'ChIJy6G-JHj2wokRiLxKme5PyBQ'))

print (ManhattanDist('3_10', 'ChIJy6G-JHj2wokRiLxKme5PyBQ'))

print (ManhattanDist('7_29', 'ChIJ57jXPxP2wokRRrcwDoksk18'))

print (ManhattanDist('3_1', 'ChIJbWfIg3j2wokRJ22QKbeFRu8'))

In [None]:
def list_distance(pizza):
    Pizza_Dict = {}
    for lot in lot_latlng.index:
        Pizza_Dict[str(lot)] = ManhattanDist(lot, pizza)
    return Pizza_Dict 

distance_df = pd.DataFrame()
pizza_dict_to_df = [list_distance(pizza) for pizza in pizza_df['place_id']]
# distance_df.index = pizza_dict_to_df[0].keys()
distance_df = distance_df.append(pizza_dict_to_df)
distance_df.head()

# distance_df.to_csv('lot_to_pizza.csv')

In [None]:
distance_df['place_id'] = pizza_df['place_id']
distance_df = distance_df.set_index('place_id', drop=True, inplace=True)

In [None]:
distance_df.to_csv('lot_to_pizza.csv')