In [1]:
import pandas as pd
import numpy as np
import re

from math import sin, cos, sqrt, atan2, radians
import matplotlib.pyplot as plt

import googlemaps

In [2]:
def distance_between_points(lat1,lon1,lat2,lon2):
    
# approximate radius of earth in km
    R = 6378.0

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return distance

In [3]:
# Read Dataset
poi1_df = pd.read_csv("Datasets/Project/Landmarks_and_places_of_interest__including_schools__theatres__health_services__sports_facilities__places_of_worship__galleries_and_museums. (1).csv")
poi2_df = pd.read_csv("Datasets/Project/EventVenuePointsExternal.csv")
bbq_df = pd.read_csv("Datasets/Project/Public_barbecues.csv")

### Clean up POI1 dataset

Section Information :
<table style="margin-left:0">
    <tr>
        <td>Date</td>
        <td>: </td>
        <td> 9 April 2020 </td>
    </tr>
     <tr>
        <td>Time Start</td>
        <td>: </td>
        <td> 10.50 PM </td>
    </tr>
    </table>

In [4]:
poi1_df.head()

Unnamed: 0,Theme,Sub Theme,Feature Name,Co-ordinates
0,Transport,Railway Station,Flemington Bridge Railway Station,"(-37.7881645889621, 144.939277838304)"
1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),"(-37.8142591432011, 144.966638432727)"
2,Place Of Assembly,Library,The Melbourne Athenaeum Library,"(-37.8148855756416, 144.967291289941)"
3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,"(-37.8060684577258, 144.971266479841)"
4,Place of Worship,Church,St Francis Church,"(-37.8118847831837, 144.962422614541)"


In [5]:
# Separate Co-ordinates
poi1_df['lat']  = [coord.split('(')[1].split(', ')[0] for coord in poi1_df['Co-ordinates']]
poi1_df['lon'] = [coord.split(')')[0].split(', ')[1] for coord in poi1_df['Co-ordinates']]
poi1_df.head()

Unnamed: 0,Theme,Sub Theme,Feature Name,Co-ordinates,lat,lon
0,Transport,Railway Station,Flemington Bridge Railway Station,"(-37.7881645889621, 144.939277838304)",-37.7881645889621,144.939277838304
1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),"(-37.8142591432011, 144.966638432727)",-37.8142591432011,144.966638432727
2,Place Of Assembly,Library,The Melbourne Athenaeum Library,"(-37.8148855756416, 144.967291289941)",-37.8148855756416,144.967291289941
3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,"(-37.8060684577258, 144.971266479841)",-37.8060684577258,144.971266479841
4,Place of Worship,Church,St Francis Church,"(-37.8118847831837, 144.962422614541)",-37.8118847831837,144.962422614541


In [8]:
set(poi1_df['Sub Theme'])

{'Aquarium',
 'Art Gallery/Museum',
 'Bridge',
 'Casino',
 'Cemetery',
 'Church',
 'Cinema',
 'Current Construction Site',
 'Current Construction Site - Commercial',
 'Department Store',
 'Dwelling (House)',
 'Film & RV Studio',
 'Fire Station',
 'Function/Conference/Exhibition Centre',
 'Further Education',
 'Government Building',
 'Gymnasium/Health Club',
 'Hostel',
 'Indoor Recreation Facility',
 'Industrial (Manufacturing)',
 'Informal Outdoor Facility (Park/Garden/Reserve)',
 'Library',
 'Major Sports & Recreation Facility',
 'Marina',
 'Medical Services',
 'Observation Tower/Wheel',
 'Office',
 'Outdoor Recreation Facility (Zoo, Golf Course)',
 'Police Station',
 'Primary Schools',
 'Private Hospital',
 'Private Sports Club/Facility',
 'Public Buildings',
 'Public Hospital',
 'Railway Station',
 'Retail',
 'Retail/Office',
 'Retail/Office/Carpark',
 'Retail/Office/Residential/Carpark',
 'Retail/Residential',
 'School - Primary and Secondary Education',
 'Secondary Schools',
 'Sto

In [9]:
# Set USE = nan
poi1_df['Use'] = 'nan'

# Leisure/ Recreation
poi1_df.loc[poi1_df['Theme'] == 'Leisure/Recreation','Use'] = 1

# From Community Use, we will only take the public buildings, exclude courts
## 1. Use = 0 for non Community Use
poi1_df.loc[(poi1_df['Theme'] == 'Community Use') & (poi1_df['Sub Theme']!= 'Public Buildings') , 'Use'] = 0

## 2. Use = 1 for other community use  excluding courts
for idx in poi1_df.loc[(poi1_df['Theme'] == 'Community Use') & (poi1_df['Use']!= 0) ].index:
    # Find court in the Feature name
    if(re.search('court', poi1_df.loc[idx]['Feature Name'].lower()) != None ):
        poi1_df.loc[idx,'Use'] = 0
    else :
        poi1_df.loc[idx,'Use'] = 1
        
# Cannot Use POI:
poi1_df.loc[poi1_df['Theme'].isin(["Education Centre",
                                    "Office",
                                    "Industrial", 
                                    "Office", 
                                    "Residential Accommodation", 
                                    "Specialist Residential Accommodation", 
                                    "Vacant Land", 
                                    "Warehouse/Store"]),'Use'] = 0

# Can use POI: 
poi1_df.loc[poi1_df['Theme'].isin(["Health Services",
                                    "Mixed Use",
                                    "Place Of Assembly",
                                    "Place of Worship",
                                    "Purpose Built",
                                    "Retail",
                                    "Transport"]),'Use'] = 1

In [10]:
poi1_final = poi1_df[poi1_df['Use']==1][['Theme','Sub Theme','Feature Name','lat','lon']]
poi1_final['lon'] = pd.to_numeric(poi1_final['lon'])
poi1_final['lat'] = pd.to_numeric(poi1_final['lat'])
# Set poi_id 
poi1_final = poi1_final.reset_index()
poi1_final['poi_id'] = [i+1 for i in poi1_final.index]
print("Number of POI from poi1:" , len(poi1_final))
poi1_final_csv = poi1_final[['poi_id','Feature Name','Theme', 'Sub Theme', 'lat','lon']]
poi1_final_csv.columns= ['poi_id','Name','Theme', 'Sub Theme', 'lat','lon']

poi1_final_csv.to_csv('Datasets/Project/Final/POI_FINAL.csv',index=False)

Number of POI from poi1: 198


### Clean up and Join Public Toilet Dataset

Section Information :
<table style="margin-left:0">
    <tr>
        <td>Date</td>
        <td>: </td>
        <td> 10 April 2020 </td>
    </tr>
     <tr>
        <td>Time Start</td>
        <td>: </td>
        <td> 8.40 PM </td>
    </tr>
     <tr>
        <td>End Start</td>
        <td>: </td>
        <td> 8.40 PM </td>
    </tr>
    </table>

In [11]:
toilet_df1 = pd.read_csv("Datasets/Project/Public_toilets.csv")
toilet_df1['lon'] = pd.to_numeric(toilet_df1['lon'])
toilet_df1['lat'] = pd.to_numeric(toilet_df1['lat'])
print(toilet_df1.head())
print(toilet_df1.describe())

                                                name female male wheelchair  \
0  Public Toilet - Toilet 140 - Queensberry Stree...     no  yes         no   
1  Public Toilet - Toilet 106 - Kings Domain Gove...    yes  yes         no   
2  Public Toilet - Queen Victoria Market (153 Vic...    yes  yes         no   
3  Public Toilet - Victoria Harbour, Shed 3 (Nort...     no  yes         no   
4  Public Toilet - Toilet 6 - Elizabeth Street (T...    yes   no         no   

            operator baby_facil        lat         lon  
0  City of Melbourne         no -37.803995  144.959091  
1  City of Melbourne         no -37.826916  144.974648  
2  City of Melbourne         no -37.806121  144.956538  
3  City of Melbourne         no -37.819796  144.937665  
4  City of Melbourne         no -37.813838  144.963097  
             lat         lon
count  74.000000   74.000000
mean  -37.810676  144.961615
std     0.014212    0.014872
min   -37.845207  144.921106
25%   -37.819286  144.955233
50%   -37

In [9]:
poi1_final

Unnamed: 0,index,Theme,Sub Theme,Feature Name,lat,lon,poi_id
0,0,Transport,Railway Station,Flemington Bridge Railway Station,-37.788165,144.939278,1
1,1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.814259,144.966638,2
2,2,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.814886,144.967291,3
3,3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.806068,144.971266,4
4,4,Place of Worship,Church,St Francis Church,-37.811885,144.962423,5
...,...,...,...,...,...,...,...
193,236,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Lincoln Square,-37.802792,144.962761,194
194,237,Health Services,Private Hospital,Epworth Freemasons Hospital,-37.810971,144.983700,195
195,239,Health Services,Medical Services,Mercy Private Hospital,-37.811897,144.984436,196
196,240,Place Of Assembly,Art Gallery/Museum,The Museum Of Australian Chinese History,-37.810769,144.969234,197


In [12]:
def get_toilets(lat,lon,poi_id):
    '''
    Given the latitude, longitude, and the respective poi_id
    output : dataframe of the nearest public toilet
    '''
    toilet_df = toilet_df1
    # Get the differences between points
    toilet_df['distance'] =np.vectorize(distance_between_points)(lat,lon,toilet_df['lat'], toilet_df['lon'])
    
    toilet_sort_df = toilet_df.sort_values(by='distance').reset_index()
    # Make sure atleast there is 1 female, male, and wheelchair accessible toilet
    show = 3
    while ((len(toilet_sort_df.head(show).loc[toilet_sort_df['female'] == 'yes']) < 1) &
          (len(toilet_sort_df.head(show).loc[toilet_sort_df['male'] == 'yes']) < 1) &
          (len(toilet_sort_df.head(show).loc[toilet_sort_df['wheelchair'] == 'yes']) <1)):
        show = show+1
        if show > 6:
            break
    toilet_sort_df['poi_id'] = poi_id
    
    return toilet_sort_df.head(show)
    


In [13]:

for i in poi1_final.index:
    res_df = get_toilets(poi1_final.loc[i]['lat'],
                         poi1_final.loc[i]['lon'],
                         poi1_final.loc[i]['poi_id'])

    if i==0:
        toilet_final_df = res_df
    else:
        toilet_final_df = toilet_final_df.append(res_df)
            
# np.vectorize(get_toilets)(poi1_final['lat'],poi1_final['lat'],poi1_final['Feature Name'])
# get_toilets(-37.8069431,144.9588825, 'Queen Victoria Market')


In [14]:
toilet_final_df.head()

Unnamed: 0,index,name,female,male,wheelchair,operator,baby_facil,lat,lon,distance,poi_id
0,35,Public Toilet - Toilet 131 - Royal Park (Flemi...,yes,yes,yes,City of Melbourne,no,-37.790174,144.943669,30.6981,1
1,50,"Public Toilet - Toilet 120 - Royal Park, Wetla...",yes,yes,yes,City of Melbourne,no,-37.783032,144.941856,36.604172,1
2,17,Public Toilet - Toilet 177 - Bellair Street (O...,yes,yes,yes,City of Melbourne,no,-37.793732,144.930352,66.893662,1
0,30,Public Toilet - Town Hall Melbourne (200 Colli...,yes,yes,yes,City of Melbourne,yes,-37.815216,144.966943,6.39973,2
1,61,Public Toilet - Toilet 46 - Bourke Street (opp...,yes,yes,yes,City of Melbourne,no,-37.813245,144.966897,6.669144,2


In [15]:
toilet_final_df.to_csv('Datasets/Project/Final/Nearest_public_toilet.csv', index=False)

In [14]:
get_toilets(-37.8069431,144.9588825, 'Queen Victoria Market')

Unnamed: 0,index,name,female,male,wheelchair,operator,baby_facil,lat,lon,distance,poi_id
0,46,Public Toilet - Queen Victoria Market - (Meat ...,yes,yes,no,City of Melbourne,no,-37.806821,144.958747,1.158336,Queen Victoria Market
1,52,Public Toilet - Queen Victoria Market (Food Co...,yes,yes,no,City of Melbourne,no,-37.80638,144.959058,3.761852,Queen Victoria Market
2,57,Public Toilet - Queen Victoria Market (465 Que...,yes,yes,yes,City of Melbourne,yes,-37.807831,144.957955,8.164046,Queen Victoria Market


In [31]:
# Save the dataset
# toilet_final_df = toilet_final_df.reset_index()
toilet_final_df['toilet_id'] = [i+1 for i in toilet_final_df.index]
toilet_final_df = toilet_final_df[['toilet_id','poi_id','name','female','male','wheelchair','lat','lon','distance']]
toilet_final_df.to_csv('Datasets/Project/Final/Nearest_public_toilet.csv', index=False)

## DONE HERE for ITERATION 1

### Fixing Double Entry

In [15]:
# Find double entry
for i in poi1_final.index:
    near_poi = [poi1_final.loc[j,'Feature Name']  for j in poi1_final.index if (distance_between_points(poi1_final.loc[i,'lat'],
                                                            poi1_final.loc[j,'lon'],
                                                            poi1_final.loc[j,'lat'],
                                                            poi1_final.loc[i,'lon']) < 6) & (i!= j )]
    
    poi_idx = [j for j in poi1_final.index if (distance_between_points(poi1_final.loc[i,'lat'],
                                                            poi1_final.loc[j,'lon'],
                                                            poi1_final.loc[j,'lat'],
                                                            poi1_final.loc[i,'lon']) < 6) & (i!= j )]
    
    
    poi1_final.loc[i,'Near'] = str(near_poi)
    poi1_final.loc[i,'poi_idx'] = str(poi_idx)
    poi1_final.loc[i,'Count'] = len(near_poi)

In [16]:
poi1_final.loc[100]

index                                                         125
Theme                                               Community Use
Sub Theme                                        Public Buildings
Feature Name                            Melbourne Theatre Company
lat                                                      -37.8239
lon                                                       144.968
poi_id                                                        101
Near            ['MTC Theatre', 'Elisabeth Murdoch Hall', 'Mel...
poi_idx                                           [101, 102, 103]
Count                                                           3
Name: 100, dtype: object

### GOOGLE MAPS

In [17]:
# gmaps = googlemaps.Client(key='AIzaSyAJA2i0GRCZV3EweKSBfMggeLuP9iZ3zhA')
# res = gmaps.reverse_geocode((-37.810769,144.969234))
# res[0]['formatted_address']

### Adding Address to Dataset
https://data.melbourne.vic.gov.au/Property/Street-addresses/a7rp-xtya

In [18]:
add_df = pd.read_csv('Datasets/Project/Address_Points.csv')

In [19]:
add_df2 = add_df[['LATITUDE','LONGITUDE','AddressPnt']]
add_df2.columns = ['lat','lon','Address']
# add_df2['lon'] = pd.to_numeric(add_df2['lon']).round(4)
# add_df2['lat'] = pd.to_numeric(add_df2['lat']).round(4)

In [20]:
def get_address(lat,lon):
    '''
    Given the latitude and longitude
    output : Address - string
    '''
    address_df = add_df2
    # Get the differences between points
    address_df['distance'] =np.vectorize(distance_between_points)(lat,lon,address_df['lat'], address_df['lon'])
    address_sort_df = address_df.sort_values(by='distance').reset_index()
    
    # Uncomment to debug, the distance between point and address
    # print(address_sort_df.loc[0]['distance'], address_sort_df.loc[0]['Address'])
    return address_sort_df.loc[0]['Address']


In [21]:
# Get the address
poi1_final['Address'] = np.vectorize(get_address)(poi1_final['lat'],poi1_final['lon'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [22]:
poi1_final

Unnamed: 0,index,Theme,Sub Theme,Feature Name,lat,lon,poi_id,Near,poi_idx,Count,Address
0,0,Transport,Railway Station,Flemington Bridge Railway Station,-37.788165,144.939278,1,[],[],0.0,68 Racecourse Road North Melbourne
1,1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.814259,144.966638,2,"['The Melbourne Athenaeum Library', 'Melbourne...","[2, 71]",2.0,236 Little Collins Street Melbourne
2,2,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.814886,144.967291,3,"['Council House 2 (CH2)', 'Collins Street Bapt...","[1, 28, 71, 138]",4.0,190 Collins Street Melbourne
3,3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.806068,144.971266,4,[],[],0.0,Carlton Gardens South Victoria Street Carlton
4,4,Place of Worship,Church,St Francis Church,-37.811885,144.962423,5,[],[],0.0,274 Elizabeth Street Melbourne
...,...,...,...,...,...,...,...,...,...,...,...
193,236,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Lincoln Square,-37.802792,144.962761,194,[],[],0.0,24 Lincoln Square South Carlton
194,237,Health Services,Private Hospital,Epworth Freemasons Hospital,-37.810971,144.983700,195,[],[],0.0,182 Clarendon Street East Melbourne
195,239,Health Services,Medical Services,Mercy Private Hospital,-37.811897,144.984436,196,['Melbourne Unitarian Church'],[20],1.0,149 Grey Street East Melbourne
196,240,Place Of Assembly,Art Gallery/Museum,The Museum Of Australian Chinese History,-37.810769,144.969234,197,"[""Her Majesty's Theatre""]",[145],1.0,20 Cohen Place Melbourne


In [23]:
poi1_final[['poi_id','Feature Name','Theme','Sub Theme','Address','lat','lon']].to_csv('Datasets/Project/Final/POI_FINAL.csv',index=False)

In [24]:
pd.DataFrame(set(poi1_final['Sub Theme'])).to_csv('subtheme.csv')
# set(poi1_final[['Sub Theme','Theme']])

In [32]:
set(poi1_final['Theme'])

{'Community Use',
 'Health Services',
 'Leisure/Recreation',
 'Mixed Use',
 'Place Of Assembly',
 'Place of Worship',
 'Purpose Built',
 'Retail',
 'Transport'}

In [33]:
set(poi1_final[poi1_final['Theme'] == 'Leisure/Recreation']['Sub Theme'])

{'Gymnasium/Health Club',
 'Indoor Recreation Facility',
 'Informal Outdoor Facility (Park/Garden/Reserve)',
 'Major Sports & Recreation Facility',
 'Observation Tower/Wheel',
 'Outdoor Recreation Facility (Zoo, Golf Course)',
 'Private Sports Club/Facility'}

In [38]:
poi1_final[poi1_final['Sub Theme']=='Film & RV Studio']

Unnamed: 0,index,Theme,Sub Theme,Feature Name,lat,lon,poi_id
81,101,Purpose Built,Film & RV Studio,Central City Studios,-37.814232,144.935655,82
82,102,Purpose Built,Film & RV Studio,Channel 7 - Melbourne Broadcast Centre,-37.815857,144.945823,83
