In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = '1bYu4i2mQN9pJ6z-yy-rgA3QAZVrT6hwS'
downloaded = drive.CreateFile({'id': file_id})
#print('Downloaded content "{}"'.format(downloaded.GetContentString()))
downloaded.GetContentFile('mrts_sample.csv')

#https://drive.google.com/file/d/1zatbOuqAN27hTgfVOgKd0AEZL3DqQX5h/view?usp=sharing


In [0]:
front_index = 3000
last_index = 3500

In [3]:
#####################
# this script crawls onemap for routing times
# available for walk, pt, drive
# onemap2 beta: https://beta.onemap.sg/main/v2/
# docs: https://docs.onemap.sg/
#####################

import requests
import pandas as pd
import random
import time
from google.colab import files

def get_tokens():
    baseurl = "https://developers.onemap.sg/publicapi/publicsessionid"
    result = requests.get(baseurl).json()
    time.sleep(random.uniform(0.9, 4.5))
    return result

#convert SVY21 to WGS
def SVY_WGS(x, y):
    conv_url= 'https://developers.onemap.sg/commonapi/convert/3414to4326'
    payload = {'X' : x, 'Y' : y}
    return requests.get(conv_url, params = payload).json()

#start code with getting 30 tokens
tok_count = 20

tokens = {}
for i in range(0,tok_count):
    tokens[i] = get_tokens()
    print ('got token '+ str(i))

got token 0
got token 1
got token 2
got token 3
got token 4
got token 5
got token 6
got token 7
got token 8
got token 9
got token 10
got token 11
got token 12
got token 13
got token 14
got token 15
got token 16
got token 17
got token 18
got token 19


In [4]:


baseurl = "https://developers.onemap.sg/publicapi/routingsvc/route?"
#specify the routing type
routeType= "pt"

#open input file
infile = pd.read_csv(r"mrts_sample.csv", sep = ";" , header = 0)

#slice to rows desired
infile = infile[front_index:last_index]

#new columns to save data
infile['total_pt_time'] = None
infile['transit_time'] = None
infile['walking_time'] = None
infile['waiting_time'] = None

#make call per row

for index, row in infile.iterrows():

    #randomise token for each call
    #check if token is valid
    ran_t = random.randint(0,tok_count-1)
    #token not valid, need update
    #if expired OR if random index mod 3 == random int (rand & rand lol)
    if (tokens[ran_t]['expiry_timestamp'] < int(time.time())) or (ran_t % 10 == random.randint(0,9)):
        tokens[ran_t] = get_tokens()
        print('updated token')
        curr_token = tokens[ran_t]
    else:
        #token still valid
        curr_token = tokens[ran_t]['access_token']
    
    #INPUT SHOULD BE WGS ALRDY
    #convert coordinates
    #start = [row['OY'], row['DX']]
    #end = str(row['OY']) + ',' + str(row['DX'])

    #create url call
    payload = {'start' : str(row['OY']) +','+ str(row['OX']),
               'end' : str(row['DY']) +','+ str(row['DX']),
               'routeType': routeType ,
               'token': curr_token,
               'date': '2017-11-21',
               'time': '12:00:00',
               'mode' : 'TRANSIT',
               'numItineraries': 1} 
    
    #try max 8 times for connection error catching
    for i in range(0, 8):
        try:
            result = requests.get(baseurl, params = payload).json()
        except:
            time.sleep(random.uniform(5, 10)) # generous sleep time
            print('CALL FAILED, internet problem?')
            print('attempt ' + str(i) )
            continue
        else:
            pass
    
    
    #error catching if input point is invalid
    try:
        if 'plan' in result:
            pt_time = result['plan']
        else:
            pass
    except:
        pass
    
#   FOR PT
    #if route found
    try:
        if len(pt_time['itineraries']) > 0:
            itin = pt_time['itineraries'][0]
            infile.set_value(index, 'total_pt_time', itin['duration'])
            infile.set_value(index, 'transit_time', itin['transitTime'])
            infile.set_value(index, 'waiting_time', itin['waitingTime'])
            infile.set_value(index, 'walking_time', itin['walkTime'])
    except:
        pass

    #if route not found
    try:
        if 'error' in result:
            infile.set_value(index, 'total_pt_time', -9)
            infile.set_value(index, 'transit_time', -9)
            infile.set_value(index, 'waiting_time', -9)
            infile.set_value(index, 'walking_time', -9)
    except:
        pass
    
    print (str(index) + ' done')

    #sleep to not crash server, max is 4 call per sec
    time.sleep(random.uniform(0.5, 6))
    




3000 done
3001 done
3002 done
3003 done
3004 done
3005 done
3006 done
3007 done
3008 done
3009 done
3010 done
3011 done
3012 done
3013 done




3014 done
3015 done
3016 done
3017 done
3018 done
3019 done
3020 done
updated token
3021 done
3022 done
3023 done
3024 done
3025 done
3026 done
3027 done
updated token
3028 done
3029 done
3030 done
updated token
3031 done
3032 done
3033 done
updated token
3034 done
3035 done
3036 done
3037 done
3038 done
3039 done
3040 done
updated token
3041 done
3042 done
3043 done
updated token
3044 done
3045 done
3046 done
3047 done
3048 done
3049 done
updated token
3050 done
3051 done
updated token
3052 done
3053 done
3054 done
3055 done
3056 done
3057 done
3058 done
3059 done
3060 done
3061 done
3062 done
3063 done
3064 done
3065 done
updated token
3066 done
3067 done
3068 done
3069 done
3070 done
3071 done
3072 done
3073 done
3074 done
3075 done
3076 done
3077 done
3078 done
3079 done
3080 done
updated token
3081 done
3082 done
updated token
3083 done
3084 done
3085 done
3086 done
3087 done
3088 done
3089 done
3090 done
updated token
3091 done
3092 done
3093 done
updated token
3094 done
3095 don

3134 done
3135 done
3136 done
3137 done
3138 done
3139 done
3140 done
3141 done
updated token
3142 done
3143 done
3144 done
3145 done
3146 done
3147 done
updated token
3148 done
3149 done
3150 done
3151 done
3152 done
3153 done
3154 done
3155 done
3156 done
3157 done
3158 done
3159 done
3160 done
3161 done
3162 done
updated token
3163 done
3164 done
3165 done
updated token
3166 done
3167 done
3168 done
3169 done
3170 done
3171 done
3172 done
3173 done
3174 done
3175 done
3176 done
3177 done
3178 done
3179 done
updated token
3180 done
updated token
3181 done
3182 done
3183 done
3184 done
3185 done
3186 done
3187 done
3188 done
3189 done
3190 done
3191 done
3192 done
3193 done
3194 done
3195 done
3196 done
updated token
3197 done
3198 done
3199 done
3200 done
updated token
3201 done
3202 done
3203 done
3204 done
3205 done
3206 done
3207 done
3208 done
3209 done
3210 done
3211 done
3212 done
3213 done
3214 done
3215 done
updated token
3216 done
3217 done
3218 done
3219 done
3220 done
3221

3257 done
3258 done
3259 done
3260 done
3261 done
3262 done
3263 done
3264 done
updated token
3265 done
3266 done
3267 done
3268 done
3269 done
3270 done
3271 done
3272 done
3273 done
3274 done
3275 done
3276 done
3277 done
3278 done
3279 done
3280 done
3281 done
3282 done
3283 done
3284 done
3285 done
3286 done
updated token
3287 done
updated token
3288 done
3289 done
3290 done
3291 done
3292 done
3293 done
3294 done
3295 done
3296 done
updated token
3297 done
3298 done
updated token
3299 done
3300 done
3301 done
3302 done
updated token
3303 done
3304 done
3305 done
3306 done
3307 done
3308 done
3309 done
updated token
3310 done
3311 done
3312 done
updated token
3313 done
3314 done
3315 done
3316 done
3317 done
3318 done
3319 done
3320 done
3321 done
3322 done
3323 done
3324 done
3325 done
3326 done
updated token
3327 done
3328 done
3329 done
updated token
3330 done
3331 done
3332 done
3333 done
3334 done
3335 done
3336 done
3337 done
3338 done
3339 done
3340 done
3341 done
3342 done


3380 done
updated token
3381 done
3382 done
3383 done
3384 done
3385 done
3386 done
3387 done
3388 done
3389 done
updated token
3390 done
3391 done
3392 done
3393 done
3394 done
3395 done
3396 done
3397 done
3398 done
3399 done
3400 done
3401 done
updated token
3402 done
3403 done
3404 done
3405 done
updated token
3406 done
3407 done
3408 done
3409 done
3410 done
3411 done
3412 done
3413 done
3414 done
3415 done
3416 done
3417 done
3418 done
updated token
3419 done
3420 done
3421 done
3422 done
3423 done
3424 done
3425 done
3426 done
3427 done
3428 done
3429 done
updated token
3430 done
3431 done
3432 done
3433 done
3434 done
3435 done
3436 done
3437 done
3438 done
3439 done
updated token
3440 done
updated token
3441 done
3442 done
3443 done
3444 done
3445 done
3446 done
3447 done
3448 done
3449 done
3450 done
3451 done
3452 done
3453 done
3454 done
3455 done
3456 done
3457 done
3458 done
updated token
3459 done
3460 done
3461 done
3462 done
3463 done
3464 done
3465 done
3466 done
3467

In [0]:
#save everything again
of_name = "result"+ str(front_index) + "_" + str(last_index)  +".csv"
infile.to_csv(of_name , index = False)
files.download(of_name)

In [0]:
    #output per 100k rows?
    if index != 0 and index % 1000 == 0:
        #save the 1k rows
        infile[ind_front:index].to_csv(r"C:\Users\kelmanc\Desktop\datasets\MRT_PT_mine\result_"+ str(index) +".csv" , index = False)
        #update the top index for the output
        ind_front = index