In [1]:
import pandas as pd
import numpy as np

import os
import sys
sys.path.append(os.path.dirname(os.path.abspath(os.path.abspath(''))))
from yumspeak_ml.params import *

import geocoder

In [2]:
b1_restaurant_df = pd.read_csv('/Users/triciascy/code/scytricia/yumspeak/raw_data/batch1_restaurants_dataset.csv').drop(columns=['Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16'])
b1_restaurant_df.head().T

Unnamed: 0,0,1,2,3,4
place_id,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,ChIJuXEcL-gX2jEReUwbpfL8C4c,ChIJebVvXAAZ2jERRlzs4R2Qsm4,ChIJLSQEyKwZ2jERsJYlxkDMr1g,ChIJLTGN8D4Z2jERP4VNFXTnIpQ
name,(Fu Chan) Harummanis Muslim Food,#01-22 Wufu Hakka Noodle,116 PHỐ ĐÊM,123 ZÔ - Ẩm Thực Việt,123 Zô The Việt Kitchen
reviews,35,1,3,252,19
main_category,Malaysian restaurant,Noodle shop,Vietnamese restaurant,Restaurant,Vietnamese restaurant
categories,['Malaysian restaurant'],['Noodle shop'],['Vietnamese restaurant'],['Restaurant'],['Vietnamese restaurant']
main_rating,2.1,5.0,5.0,4.4,3.7
address,"145 Teck Whye Ln, Singapore 680145","590 Upper Thomson Rd, #01-22, Singapore 574419","116 Mackenzie Rd, Singapore 228710","12 Gopeng St, #01-45, 49-51 icon village, Sing...","18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ..."
link,https://www.google.com/maps/place/(Fu+Chan)+Ha...,https://www.google.com/maps/place/%2301-22+Wuf...,https://www.google.com/maps/place/116+PH%E1%BB...,https://www.google.com/maps/place/123+Z%C3%94+...,https://www.google.com/maps/place/123+Z%C3%B4+...
review_photos,['https://lh5.googleusercontent.com/p/AF1QipNq...,[],['https://lh5.googleusercontent.com/p/AF1QipP1...,['https://lh5.googleusercontent.com/p/AF1QipNM...,['https://lh5.googleusercontent.com/p/AF1QipNI...


### Drop Duplicates

In [3]:
print(b1_restaurant_df.shape)
b1_restaurant_df.duplicated().sum()

(1356, 9)


173

In [4]:
b1 = b1_restaurant_df.drop_duplicates(subset=['place_id', 'name', 'reviews', 'address']).copy()
b1.shape

(1134, 9)

In [5]:
cats_to_remove = CAT_TO_REMOVE

cats_to_remove = [cat.lower() for cat in cats_to_remove]
mask = b1['main_category'].str.lower().isin(cats_to_remove)
b1_r_df = b1[~mask].copy()

In [6]:
b1_r_df.shape

(882, 9)

In [7]:
b1_r_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 1355
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   place_id       882 non-null    object 
 1   name           882 non-null    object 
 2   reviews        882 non-null    int64  
 3   main_category  863 non-null    object 
 4   categories     863 non-null    object 
 5   main_rating    882 non-null    float64
 6   address        873 non-null    object 
 7   link           882 non-null    object 
 8   review_photos  882 non-null    object 
dtypes: float64(1), int64(1), object(7)
memory usage: 68.9+ KB


In [8]:
# Fill na in main_category as unknown
b1_r_df.main_category.fillna('unknown', inplace=True)
b1_r_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 1355
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   place_id       882 non-null    object 
 1   name           882 non-null    object 
 2   reviews        882 non-null    int64  
 3   main_category  882 non-null    object 
 4   categories     863 non-null    object 
 5   main_rating    882 non-null    float64
 6   address        873 non-null    object 
 7   link           882 non-null    object 
 8   review_photos  882 non-null    object 
dtypes: float64(1), int64(1), object(7)
memory usage: 68.9+ KB


In [9]:
# Fill na in categories as unknown
b1_r_df['categories'] = b1_r_df['categories'].fillna("['unknown']").apply(eval)

In [10]:
b1_r_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 1355
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   place_id       882 non-null    object 
 1   name           882 non-null    object 
 2   reviews        882 non-null    int64  
 3   main_category  882 non-null    object 
 4   categories     882 non-null    object 
 5   main_rating    882 non-null    float64
 6   address        873 non-null    object 
 7   link           882 non-null    object 
 8   review_photos  882 non-null    object 
dtypes: float64(1), int64(1), object(7)
memory usage: 68.9+ KB


### Generate Coordinates lat lng

In [11]:
import re

def get_coordinates(link):
    match = re.search('!3d(-?\d+(?:\.\d+)?)!4d(-?\d+(?:\.\d+))', link)
    coordinates = [float(match.group(1)), float(match.group(2))]
    return coordinates

In [12]:
b1_r_df['coordinates'] = b1_r_df['link'].apply(lambda x: get_coordinates(x))
b1_r_df.head()

Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,[Malaysian restaurant],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]"
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,[Noodle shop],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]"
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,[Vietnamese restaurant],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]"
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,[Restaurant],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]"
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,[Vietnamese restaurant],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]"


In [13]:
b1_r_df['latitude'] = b1_r_df['coordinates'].apply(lambda x: x[0])
b1_r_df['longtitude'] = b1_r_df['coordinates'].apply(lambda x: x[1])
b1_r_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 1355
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   place_id       882 non-null    object 
 1   name           882 non-null    object 
 2   reviews        882 non-null    int64  
 3   main_category  882 non-null    object 
 4   categories     882 non-null    object 
 5   main_rating    882 non-null    float64
 6   address        873 non-null    object 
 7   link           882 non-null    object 
 8   review_photos  882 non-null    object 
 9   coordinates    882 non-null    object 
 10  latitude       882 non-null    float64
 11  longtitude     882 non-null    float64
dtypes: float64(3), int64(1), object(8)
memory usage: 89.6+ KB


In [34]:
def add_lat_lng(df):
    df['coordinates'] = df['link'].apply(lambda x: get_coordinates(x))
    df['latitude'] = df['coordinates'].apply(lambda x: x[0])
    df['longtitude'] = df['coordinates'].apply(lambda x: x[1])
    return df

In [35]:
test_b1 = add_lat_lng(b1_r_df)
test_b1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 882 entries, 0 to 1355
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   place_id       882 non-null    object 
 1   name           882 non-null    object 
 2   reviews        882 non-null    int64  
 3   main_category  882 non-null    object 
 4   categories     882 non-null    object 
 5   main_rating    882 non-null    float64
 6   address        873 non-null    object 
 7   link           882 non-null    object 
 8   review_photos  882 non-null    object 
 9   coordinates    882 non-null    object 
 10  latitude       882 non-null    float64
 11  longtitude     882 non-null    float64
dtypes: float64(3), int64(1), object(8)
memory usage: 89.6+ KB


In [36]:
test_b1['latitude'].sort_values(ascending=True).value_counts()

1.292072     4
1.291135     3
1.292811     2
1.304580     2
1.321491     2
            ..
1.291109     1
1.291111     1
1.291333     1
1.291374     1
52.159920    1
Name: latitude, Length: 863, dtype: int64

In [37]:
# Singapore lat lng borders
# lat 1.129 1.493
# lng 103.557 104.131

test_b1 = test_b1[test_b1['latitude'].between(left=1.129, right=1.493)]
test_b1 = test_b1[test_b1['longtitude'].between(left=103.557, right=104.131)]
test_b1 = test_b1.reset_index(drop=True)
print(test_b1.shape)
test_b1.head()

(867, 12)


Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates,latitude,longtitude
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,[Malaysian restaurant],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]",1.381337,103.752499
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,[Noodle shop],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]",1.372534,103.829059
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,[Vietnamese restaurant],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]",1.306641,103.847282
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,[Restaurant],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]",1.275403,103.84453
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,[Vietnamese restaurant],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]",1.28076,103.850414


### Get postal / district code

In [18]:
test_b1[test_b1['address'].isna()].shape

(8, 12)

In [19]:
test_b1.coordinates.isna().sum()

0

In [20]:
coor_1 = test_b1.coordinates[0]
coor_1

[1.3813371, 103.7524994]

In [21]:
postal = geocoder.mapbox(coor_1, method='reverse', key=MAP_API).json['postal'][0:2]

In [39]:
# get_postal from map box if address is na
test_b1['district_code'] = test_b1[test_b1['address'].isna()].coordinates.apply(lambda x: get_district_code(x))
test_b1

Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates,latitude,longtitude,district_code
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,[Malaysian restaurant],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]",1.381337,103.752499,
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,[Noodle shop],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]",1.372534,103.829059,
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,[Vietnamese restaurant],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]",1.306641,103.847282,
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,[Restaurant],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]",1.275403,103.844530,
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,[Vietnamese restaurant],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]",1.280760,103.850414,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,ChIJK9FuLKEZ2jERZYDXi6XtZ2k,Coleman's Cafe,54,Cafe,[Cafe],3.7,"3 Coleman St, Singapore 179805",https://www.google.com/maps/place/Coleman's+Ca...,['https://lh5.googleusercontent.com/p/AF1QipPa...,"[1.2918776, 103.8500267]",1.291878,103.850027,
863,ChIJ02kZpakZ2jERDQVTq10I8b0,Colony,2078,Restaurant,"[Restaurant, Buffet restaurant]",4.6,"7 Raffles Ave., Singapore 039799",https://www.google.com/maps/place/Colony/data=...,['https://lh5.googleusercontent.com/p/AF1QipMF...,"[1.2908195, 103.86009]",1.290820,103.860090,
864,ChIJKdihA68Z2jER07IZ_I3hEVI,Colony Capital Asia Pacific Pte Ltd,0,unknown,[unknown],0.0,,https://www.google.com/maps/place/Colony+Capit...,[],"[1.2956745, 103.8589973]",1.295675,103.858997,03
865,ChIJ0eEWGp8Z2jERU0GVV6u922I,Dutch Colony Coffee Co.,382,Coffee shop,[Coffee shop],4.2,"81 Clemenceau Ave, #01-21, Singapore 239917",https://www.google.com/maps/place/Dutch+Colony...,['https://lh5.googleusercontent.com/p/AF1QipMJ...,"[1.2922763, 103.8425461]",1.292276,103.842546,


In [23]:
row = test_b1.iloc[[0]]
row.index

Int64Index([0], dtype='int64')

In [41]:
# address column not na, find postal code. if error find postal code with mapbox
# not na find postal code with mapbox error return address
def get_postal_code(row):
    if isinstance(row['address'], str):
        try:
            match = re.search(r'\b\d{6}\b', row['address'])
            postal_code = match.group(0)
            row['postal_code'] = postal_code[:2]
            return row
        except:
            g =geocoder.mapbox(row['coordinates'], method='reverse', key=MAP_API)
            row['address'] = g.json['postal']
            row['postal_code'] = g.json['postal'][:2]
            return row
    else:
        try:
            g =geocoder.mapbox(row['coordinates'], method='reverse', key=MAP_API)
            row['address'] = g.json['postal']
            row['postal_code'] = g.json['postal'][:2]
            return row
        except:
            print(f'Error: {row['address']}')
            return row['address']

In [52]:
postal_b1 = test_b1.apply(get_postal_code, axis=1)
postal_b1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 867 entries, 0 to 866
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   place_id       867 non-null    object 
 1   name           867 non-null    object 
 2   reviews        867 non-null    int64  
 3   main_category  867 non-null    object 
 4   categories     867 non-null    object 
 5   main_rating    867 non-null    float64
 6   address        867 non-null    object 
 7   link           867 non-null    object 
 8   review_photos  867 non-null    object 
 9   coordinates    867 non-null    object 
 10  latitude       867 non-null    float64
 11  longtitude     867 non-null    float64
 12  postal_code    867 non-null    object 
dtypes: float64(3), int64(1), object(9)
memory usage: 88.2+ KB


In [53]:
postal_b1['address'].isna().sum()

0

In [54]:
postal_b1['postal_code'].isna().sum()

0

In [55]:
postal_b1.postal_code.unique()

array(['68', '57', '22', '07', '04', '32', '06', '17', '05', '18', '10',
       '19', '31', '23', '27', '24', '73', '40', '14', '56', '76', '53',
       '52', '33', '20', '54', '16', '78', '21', '03', '08', '26', '15',
       '36', '67', '47', '38', '59', '82', '60', '25', '42', '65', '81',
       '48', '01', '12', '13', '51', '34', '39', '09', '28', '79', '46',
       '58', '55', '44', '30', '43', '45', '66', '41', '11', '64', '75',
       '80'], dtype=object)

In [59]:
postal_b1.to_csv('/Users/triciascy/code/scytricia/yumspeak/name_data/postal_restaurant_b1.csv',index=False, header=True)

In [None]:
#district
#region

In [56]:
postal_to_district = {
    '01': '01', '02': '01', '03': '01', '04': '01', '05': '01', '06': '01',
    '07': '02', '08': '02', '09': '04', '10': '04', '11': '05', '12': '05',
    '13': '05', '14': '03', '15': '03', '16': '03', '17': '06', '18': '07',
    '19': '07', '20': '08', '21': '08', '22': '09', '23': '09', '24': '10',
    '25': '10', '26': '10', '27': '10', '28': '11', '29': '11', '30': '11',
    '31': '12', '32': '12', '33': '12', '34': '13', '35': '13', '36': '13',
    '37': '13', '38': '14', '39': '14', '40': '14', '41': '14', '42': '15',
    '43': '15', '44': '15', '45': '15', '46': '16', '47': '16', '48': '16',
    '49': '17', '50': '17', '51': '18', '52': '18', '53': '19', '54': '19',
    '55': '19', '56': '20', '57': '20', '58': '21', '59': '21', '60': '22',
    '61': '22', '62': '22', '63': '22', '64': '22', '65': '23', '66': '23',
    '67': '23', '68': '23', '69': '24', '70': '24', '71': '24', '72': '25',
    '73': '25', '75': '27', '76': '27', '77': '26', '78': '26', '79': '28',
    '80': '28', '81': '17', '82': '19'
}

In [64]:
district_to_region = {
    '01': 'City', '02': 'City', '03': 'South', '04': 'South', '05': 'West', '06': 'City',
    '07': 'City', '08': 'Central', '09': 'Central', '10': 'Central', '11': 'Central', '12': 'Central',
    '13': 'East', '14': 'East', '15': 'East', '16': 'East', '17': 'East', '18': 'East',
    '19': 'North', '20': 'North', '21': 'West', '22': 'West', '23': 'West', '24': 'West',
    '25': 'North', '26': 'North', '27': 'North', '28': 'North'
}

In [57]:
postal_b1['district_code'] = postal_b1['postal_code'].map(postal_to_district)
postal_b1.head()

Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates,latitude,longtitude,postal_code,district_code
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,[Malaysian restaurant],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]",1.381337,103.752499,68,23
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,[Noodle shop],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]",1.372534,103.829059,57,20
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,[Vietnamese restaurant],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]",1.306641,103.847282,22,9
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,[Restaurant],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]",1.275403,103.84453,7,2
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,[Vietnamese restaurant],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]",1.28076,103.850414,4,1


In [66]:
postal_b1['region'] = postal_b1['district_code'].map(district_to_region)
postal_b1

Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates,latitude,longtitude,postal_code,district_code,region
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,[Malaysian restaurant],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]",1.381337,103.752499,68,23,West
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,[Noodle shop],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]",1.372534,103.829059,57,20,North
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,[Vietnamese restaurant],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]",1.306641,103.847282,22,09,Central
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,[Restaurant],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]",1.275403,103.844530,07,02,City
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,[Vietnamese restaurant],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]",1.280760,103.850414,04,01,City
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,ChIJK9FuLKEZ2jERZYDXi6XtZ2k,Coleman's Cafe,54,Cafe,[Cafe],3.7,"3 Coleman St, Singapore 179805",https://www.google.com/maps/place/Coleman's+Ca...,['https://lh5.googleusercontent.com/p/AF1QipPa...,"[1.2918776, 103.8500267]",1.291878,103.850027,17,06,City
863,ChIJ02kZpakZ2jERDQVTq10I8b0,Colony,2078,Restaurant,"[Restaurant, Buffet restaurant]",4.6,"7 Raffles Ave., Singapore 039799",https://www.google.com/maps/place/Colony/data=...,['https://lh5.googleusercontent.com/p/AF1QipMF...,"[1.2908195, 103.86009]",1.290820,103.860090,03,01,City
864,ChIJKdihA68Z2jER07IZ_I3hEVI,Colony Capital Asia Pacific Pte Ltd,0,unknown,[unknown],0.0,038989,https://www.google.com/maps/place/Colony+Capit...,[],"[1.2956745, 103.8589973]",1.295675,103.858997,03,01,City
865,ChIJ0eEWGp8Z2jERU0GVV6u922I,Dutch Colony Coffee Co.,382,Coffee shop,[Coffee shop],4.2,"81 Clemenceau Ave, #01-21, Singapore 239917",https://www.google.com/maps/place/Dutch+Colony...,['https://lh5.googleusercontent.com/p/AF1QipMJ...,"[1.2922763, 103.8425461]",1.292276,103.842546,23,09,Central


In [67]:
postal_b1['postal_code'].unique()

array(['68', '57', '22', '07', '04', '32', '06', '17', '05', '18', '10',
       '19', '31', '23', '27', '24', '73', '40', '14', '56', '76', '53',
       '52', '33', '20', '54', '16', '78', '21', '03', '08', '26', '15',
       '36', '67', '47', '38', '59', '82', '60', '25', '42', '65', '81',
       '48', '01', '12', '13', '51', '34', '39', '09', '28', '79', '46',
       '58', '55', '44', '30', '43', '45', '66', '41', '11', '64', '75',
       '80'], dtype=object)

In [68]:
postal_b1.district_code.unique()

array(['23', '20', '09', '02', '01', '12', '06', '07', '04', '10', '25',
       '14', '03', '27', '19', '18', '08', '26', '13', '16', '21', '22',
       '15', '17', '05', '11', '28'], dtype=object)

In [69]:
postal_b1.region.unique()

array(['West', 'North', 'Central', 'City', 'South', 'East'], dtype=object)

In [70]:
final_b1 = postal_b1.copy()
final_b1

Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates,latitude,longtitude,postal_code,district_code,region
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,[Malaysian restaurant],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]",1.381337,103.752499,68,23,West
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,[Noodle shop],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]",1.372534,103.829059,57,20,North
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,[Vietnamese restaurant],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]",1.306641,103.847282,22,09,Central
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,[Restaurant],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]",1.275403,103.844530,07,02,City
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,[Vietnamese restaurant],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]",1.280760,103.850414,04,01,City
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,ChIJK9FuLKEZ2jERZYDXi6XtZ2k,Coleman's Cafe,54,Cafe,[Cafe],3.7,"3 Coleman St, Singapore 179805",https://www.google.com/maps/place/Coleman's+Ca...,['https://lh5.googleusercontent.com/p/AF1QipPa...,"[1.2918776, 103.8500267]",1.291878,103.850027,17,06,City
863,ChIJ02kZpakZ2jERDQVTq10I8b0,Colony,2078,Restaurant,"[Restaurant, Buffet restaurant]",4.6,"7 Raffles Ave., Singapore 039799",https://www.google.com/maps/place/Colony/data=...,['https://lh5.googleusercontent.com/p/AF1QipMF...,"[1.2908195, 103.86009]",1.290820,103.860090,03,01,City
864,ChIJKdihA68Z2jER07IZ_I3hEVI,Colony Capital Asia Pacific Pte Ltd,0,unknown,[unknown],0.0,038989,https://www.google.com/maps/place/Colony+Capit...,[],"[1.2956745, 103.8589973]",1.295675,103.858997,03,01,City
865,ChIJ0eEWGp8Z2jERU0GVV6u922I,Dutch Colony Coffee Co.,382,Coffee shop,[Coffee shop],4.2,"81 Clemenceau Ave, #01-21, Singapore 239917",https://www.google.com/maps/place/Dutch+Colony...,['https://lh5.googleusercontent.com/p/AF1QipMJ...,"[1.2922763, 103.8425461]",1.292276,103.842546,23,09,Central


In [72]:
final_b1.to_csv('/Users/triciascy/code/scytricia/yumspeak/name_data/final_restaurant_b1.csv', index=False, header=True)

In [74]:
test = pd.read_csv('/Users/triciascy/code/scytricia/yumspeak/name_data/final_restaurant_b1.csv', dtype={'postal_code':str, 'district_code':str})
test

Unnamed: 0,place_id,name,reviews,main_category,categories,main_rating,address,link,review_photos,coordinates,latitude,longtitude,postal_code,district_code,region
0,ChIJ8Y3ZDL8R2jERZdF0_9U5XSo,(Fu Chan) Harummanis Muslim Food,35,Malaysian restaurant,['Malaysian restaurant'],2.1,"145 Teck Whye Ln, Singapore 680145",https://www.google.com/maps/place/(Fu+Chan)+Ha...,['https://lh5.googleusercontent.com/p/AF1QipNq...,"[1.3813371, 103.7524994]",1.381337,103.752499,68,23,West
1,ChIJuXEcL-gX2jEReUwbpfL8C4c,#01-22 Wufu Hakka Noodle,1,Noodle shop,['Noodle shop'],5.0,"590 Upper Thomson Rd, #01-22, Singapore 574419",https://www.google.com/maps/place/%2301-22+Wuf...,[],"[1.3725339, 103.8290593]",1.372534,103.829059,57,20,North
2,ChIJebVvXAAZ2jERRlzs4R2Qsm4,116 PHỐ ĐÊM,3,Vietnamese restaurant,['Vietnamese restaurant'],5.0,"116 Mackenzie Rd, Singapore 228710",https://www.google.com/maps/place/116+PH%E1%BB...,['https://lh5.googleusercontent.com/p/AF1QipP1...,"[1.3066408, 103.8472818]",1.306641,103.847282,22,09,Central
3,ChIJLSQEyKwZ2jERsJYlxkDMr1g,123 ZÔ - Ẩm Thực Việt,252,Restaurant,['Restaurant'],4.4,"12 Gopeng St, #01-45, 49-51 icon village, Sing...",https://www.google.com/maps/place/123+Z%C3%94+...,['https://lh5.googleusercontent.com/p/AF1QipNM...,"[1.2754032, 103.8445304]",1.275403,103.844530,07,02,City
4,ChIJLTGN8D4Z2jERP4VNFXTnIpQ,123 Zô The Việt Kitchen,19,Vietnamese restaurant,['Vietnamese restaurant'],3.7,"18 Raffles Quay, #01-48 Lau Pa Sat, Singapore ...",https://www.google.com/maps/place/123+Z%C3%B4+...,['https://lh5.googleusercontent.com/p/AF1QipNI...,"[1.2807598, 103.8504144]",1.280760,103.850414,04,01,City
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,ChIJK9FuLKEZ2jERZYDXi6XtZ2k,Coleman's Cafe,54,Cafe,['Cafe'],3.7,"3 Coleman St, Singapore 179805",https://www.google.com/maps/place/Coleman's+Ca...,['https://lh5.googleusercontent.com/p/AF1QipPa...,"[1.2918776, 103.8500267]",1.291878,103.850027,17,06,City
863,ChIJ02kZpakZ2jERDQVTq10I8b0,Colony,2078,Restaurant,"['Restaurant', 'Buffet restaurant']",4.6,"7 Raffles Ave., Singapore 039799",https://www.google.com/maps/place/Colony/data=...,['https://lh5.googleusercontent.com/p/AF1QipMF...,"[1.2908195, 103.86009]",1.290820,103.860090,03,01,City
864,ChIJKdihA68Z2jER07IZ_I3hEVI,Colony Capital Asia Pacific Pte Ltd,0,unknown,['unknown'],0.0,038989,https://www.google.com/maps/place/Colony+Capit...,[],"[1.2956745, 103.8589973]",1.295675,103.858997,03,01,City
865,ChIJ0eEWGp8Z2jERU0GVV6u922I,Dutch Colony Coffee Co.,382,Coffee shop,['Coffee shop'],4.2,"81 Clemenceau Ave, #01-21, Singapore 239917",https://www.google.com/maps/place/Dutch+Colony...,['https://lh5.googleusercontent.com/p/AF1QipMJ...,"[1.2922763, 103.8425461]",1.292276,103.842546,23,09,Central
