In [1]:
import numpy as np
import pandas as pd
from geopy.distance import geodesic
import folium
from folium.features import DivIcon
from sklearn import preprocessing

Dublin City point to center map representations

In [6]:
xmin, xmax, ymin, ymax = -6.295547, -6.227464, 53.322556, 53.356207 # project scope boundaries
dub_map_center_lat = ((ymax-ymin)/2) + ymin
dub_map_center_lon = ((xmax-xmin)/2) + xmin
print ('Dublin City Center Latitude: {}'.format(dub_map_center_lat))
print ('Dublin City Center Longitude: {}'.format(dub_map_center_lon))



Dublin City Center Latitude: 53.3393815
Dublin City Center Longitude: -6.2615055


Project Scope Boundaries

In [8]:
boundaries= [[53.356207, -6.287982],[53.356207, -6.235029],[53.3514, -6.227464],
               [53.327363, -6.227464],[53.322556, -6.235029],[53.322556, -6.287982],
               [53.327363, -6.295547],[53.3514, -6.295547],[53.356207, -6.287982]]

### <a id="data_ac">Data Acquisition</a>

### Liffey coordinates represented for 3 segments: P1-P2, P2-P3, P3-P4
- P0 (53.34723, -6.30832)
- P1 (53.34744, -6.29184)
- P2 (53.34526, -6.27215)
- P3 (53.34792, -6.25496)
- P4 (53.34401, -6.20390)


In [2]:
liffey_points = {'Name': ['P0', 'P1', 'P2', 'P3', 'P4'],
                 'Latitude': [53.34723, 53.34744, 53.34526, 53.34792, 53.34401],
                 'Longitude': [-6.30832, -6.29184, -6.27215, -6.25496, -6.20390]}
                
liffey = pd.DataFrame(liffey_points, columns=['Name', 'Latitude', 'Longitude'])

In [17]:
liffey

Unnamed: 0,Name,Latitude,Longitude
0,P0,53.34723,-6.30832
1,P1,53.34744,-6.29184
2,P2,53.34526,-6.27215
3,P3,53.34792,-6.25496
4,P4,53.34401,-6.2039


Next is creating a dataframe liffey_points with new points dividing each segment into 10 equal parts

In [3]:
liffey_points = pd.DataFrame(columns=['Latitude', 'Longitude'])

In [4]:
i=0
counter=0
for i in range(0, len(liffey)-1):
    diff_lat = (liffey.at[i+1, 'Latitude'] - liffey.at[i, 'Latitude'])/10
    diff_lng = (liffey.at[i+1, 'Longitude'] - liffey.at[i, 'Longitude'])/10
    for j in range (0, 10):
        liffey_points.at[counter, 'Latitude'] =  liffey.at[i, 'Latitude'] + diff_lat*j
        liffey_points.at[counter, 'Longitude'] =  liffey.at[i, 'Longitude'] + diff_lng*j
        counter+=1   

In [54]:
liffey_points.head()

Unnamed: 0,Latitude,Longitude
0,53.3472,-6.30832
1,53.3473,-6.30667
2,53.3473,-6.30502
3,53.3473,-6.30338
4,53.3473,-6.30173


In [9]:
map_dublin = folium.Map([dub_map_center_lat, dub_map_center_lon],tiles='CartoDB dark_matter', zoom_start=13)
poly_line = []
for lat, lng in zip(liffey_points['Latitude'], liffey_points['Longitude']):
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dublin) 

folium.PolyLine(boundaries, color='darkblue', weight=1).add_to(map_dublin)
map_dublin

Importing restaurants dataset 

In [11]:
#rest_df = pd.read_csv('Features_csv/Restaurant_List_scope.csv')

### <center><a href="./a.Companies.ipynb#data_ac">NEXT CHAPTER</a></center>

#### <center> <a href="./../0.Table_of_Contents_Code.ipynb">Table of Contents</a><br></center>

### <a id="feat_gen">Feature Generation</a>

Now we calculate distances to all liffey points and add to the Restaurant dataframe the minimum distance

In [13]:
i=0
for i in range(0, len(rest_df)):
    distances = []
    for j in range (0, len(liffey_points)):
        distances.append(geodesic([rest_df.at[i,'Latitude'], rest_df.at[i,'Longitude']], 
                                  [liffey_points.at[j,'Latitude'], liffey_points.at[j,'Longitude']]).m)
        if sorted(distances)[0] < 200:
            rest_df.at[i,'Distance'] = 200 # we fix a minimum distance in order to have consistent weights later on
        else:
            rest_df.at[i,'Distance'] = sorted(distances)[0]

In [65]:
rest_df.sample(2)

Unnamed: 0,Name,Ranking,Reviews,Rating,Price,Cuisines,Address,Latitude,Longitude,Phone,Link,Distance,Distance Proximity
59,Umi Falafel,73,1008,4.5,Cheap,"['Lebanese', 'Fast food', 'Mediterranean']","Callaghan House 13 Dame Street, Dublin D02HX67...",53.324603,-6.26524,+353 1 670 6866,/Restaurant_Review-g186605-d5003310-Reviews-Um...,2344.58,Farest
229,The Brewer's Dining Hall - Guinness Storehouse,267,494,4.5,Average,"['Irish', 'European', 'Vegetarian Friendly']","St. James Gate Saint James's Gate, Dublin Dubl...",53.344112,-6.285172,+353 1 408 4800,/Restaurant_Review-g186605-d1554254-Reviews-Th...,284.904,Farest


Let's classify distances and show them on the map

In [14]:
i=0
for i in range(0, rest_df.shape[0]):
    if rest_df.loc[i, 'Distance'] <= 250:
        rest_df.loc[i, 'Distance Proximity'] = 'Closest'
    elif rest_df.loc[i, 'Distance'] <= 750:
        rest_df.loc[i, 'Distance Proximity'] = 'Close'        
    elif rest_df.loc[i, 'Distance'] <= 1500:
        rest_df.loc[i, 'Distance Proximity'] = 'Far'         
    else:
        rest_df.loc[i, 'Distance Proximity'] = 'Farest'

In [15]:
map_dublin = folium.Map([dub_map_center_lat, dub_map_center_lon],tiles='CartoDB dark_matter', zoom_start=14)
poly_line = []
proximity = {'Far':'orange', 'Close':'blue','Farest':'red', 'Closest':'green'}
for lat, lng in zip(liffey['Latitude'], liffey['Longitude']):
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dublin) 
    poly_line.append([lat, lng])
    
for lat, lng, label, proxin in zip(rest_df['Latitude'], rest_df['Longitude'], 
                                  rest_df['Name'], rest_df['Distance Proximity']):
    label = folium.Popup(label, parse_html=True)
    #print (label)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color= proximity.get(proxin),
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dublin)   
    
folium.PolyLine(boundaries, color='darkblue', weight=1).add_to(map_dublin)  
folium.PolyLine([poly_line], weight=5).add_to(map_dublin)
map_dublin

In [85]:
rest_df.describe()

Unnamed: 0,Ranking,Reviews,Rating,Latitude,Longitude,Distance
count,1188.0,1188.0,1188.0,1188.0,1188.0,1188.0
mean,867.664141,277.626263,4.141835,53.341764,-6.260744,762.503896
std,583.001783,555.012944,0.568664,0.007607,0.012098,648.587962
min,1.0,1.0,1.0,53.32258,-6.295349,200.0
25%,353.75,12.0,4.0,53.337302,-6.265706,252.670755
50%,782.5,71.0,4.0,53.343006,-6.262388,514.833512
75%,1353.75,299.25,4.5,53.347322,-6.25504,1057.797971
max,2045.0,6164.0,5.0,53.356205,-6.227568,2741.908314


Now we get the inverse Distance (higher weight will correspond a positive correlation) and normalize it

In [86]:
i=0
for i in range(0, rest_df.shape[0]):
    rest_df.at[i, 'Distance inv'] = 1/rest_df.at[i, 'Distance'] 

In [90]:
weight_nor = rest_df[['Distance inv']].values
weight_nor = preprocessing.StandardScaler(with_mean=False, with_std=True).fit(weight_nor).transform(weight_nor.astype(float))
weight_nor_df = pd.DataFrame(data=weight_nor, columns=['Liffey'])
rest_df_liffey = pd.merge(rest_df, weight_nor_df, left_index=True, right_index=True)
rest_df_liffey.drop(columns=['Name', 'Ranking', 'Reviews', 'Rating', 'Price', 'Cuisines', 'Address',
       'Latitude', 'Longitude', 'Phone', 'Distance',
       'Distance Proximity', 'Distance inv'], inplace=True)

In [92]:
rest_df_liffey.describe()

Unnamed: 0,Liffey
count,1188.0
mean,1.465103
std,1.000421
min,0.220071
25%,0.570445
50%,1.17206
75%,2.388153
max,3.017075


Keeping 'Link' column in order to merge with the main Dataset later on

In [93]:
rest_df_liffey.sample(5)

Unnamed: 0,Link,Liffey
849,/Restaurant_Review-g186605-d12791573-Reviews-S...,2.931228
665,/Restaurant_Review-g186605-d2068885-Reviews-Ry...,2.049117
944,/Restaurant_Review-g186605-d14292776-Reviews-C...,2.354223
1171,/Restaurant_Review-g186605-d19712415-Reviews-B...,3.017075
1128,/Restaurant_Review-g186605-d10446859-Reviews-G...,0.253091


In [96]:
rest_df_liffey.to_csv('Features_csv/liffey_NOR.csv', index=False)

### <center><a href="./g.Weights_features_in_range.ipynb#feat_gen">NEXT CHAPTER</a></center>

#### <center> <a href="./../0.Table_of_Contents_Code.ipynb">Table of Contents</a><br></center>