In [37]:
!pip install geopandas
!pip install pyshp
!pip install descartes
!pip install fiona
!pip install shapely
!pip install pyproj
!pip install "rtree>=0.8,<0.9"
!sudo apt install python3-rtree -y

import os
import pyarrow.parquet as pq
import pandas as pd
from math import radians, cos, sin, asin, sqrt, atan2, pi
from zipfile import ZipFile
from io import BytesIO
import shapefile
from shapely.geometry import shape, Point, Polygon
import rtree
import geopandas as gpd
import numpy as np
import datetime

Reading package lists... Done
Building dependency tree       
Reading state information... Done
python3-rtree is already the newest version (0.8.2+ds-2).
The following packages were automatically installed and are no longer required:
  cmake-data grub-pc-bin libarchive13 libjsoncpp1
Use 'sudo apt autoremove' to remove them.
0 upgraded, 0 newly installed, 0 to remove and 27 not upgraded.


### Defining some helper Functions

In [35]:
def TimeGroup(hour):
    if hour <= 7:
        return 'late night'
    elif hour <= 9:
        return 'morning peak'
    elif hour <= 18:
        return 'day'
    elif hour <= 20:
        return 'evening peak'
    elif hour <= 23:
        return 'night'

def GetDistance(orilat, orilng, deslat, deslng):
    lon1, lat1, lon2, lat2 = map(radians, [orilng, orilat, deslng, deslat])
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371e3
    return c * r

def GetSubregion(long, lat, gdf):
    point = Point(long, lat)
    for i in range(len(gdf)):
        if point.within(gdf.geometry.iloc[i]):
            return gdf.PLN_AREA_N.iloc[i]
    return 'BISHAN' # BISHAN is deemed to have a central location in Singapore and therefore used as the 'average' 

def GetWeather(day, month, subregion, df):
    mask = (df.weather_day == day) & (df.weather_month == month) & (df.subregion == subregion)
    DF = df.loc[mask]
    
    if len(DF) > 1:
        mask = (df.weather_day == day) & (df.weather_month == month)
        return df.loc[mask].Rainfall.mean()
    elif len(DF) == 0:
        return 0
    else:
        return DF.Rainfall.iloc[0]

### Importing spatial and weather data

In [49]:
weather = pd.read_csv('may_apr_weather.csv')

zipfile = ZipFile('planning-area-census2010-shp.zip')
filenames = [y for y in sorted(zipfile.namelist()) for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)] 

dbf, prj, shp, shx = [BytesIO(zipfile.read(filename)) for filename in filenames]
r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf)

attributes, geometry = [], []
field_names = [field[0] for field in r.fields[1:]]  
for row in r.shapeRecords():  
    geometry.append(shape(row.shape.__geo_interface__))  
    attributes.append(dict(zip(field_names, row.record)))

gdf = gpd.GeoDataFrame(data = attributes, geometry = geometry, crs = 'epsg:3414')
gdf.geometry = gdf.geometry.to_crs(epsg=4326)

### Use the following function to get prediction for 1 input at a time

In [75]:
def predict(test, endpoint):
        
    '''Returns a prediction for 1 data point at a time
    Inputs:
        test: single json object with:
            lattitude_origin
            longitude_origin
            lattitude_destination
            longitude_destination
            timestamp
            hour_of_day
            day_of_week
        endpoint: endpoint string'''
    
    global gdf
    global weather
    
    X = list()
    x = list()
    
    x.append(test["day_of_week"])
    x.append(1 if test["day_of_week"] <= 4 else 0)
    x.append(test["hour_of_day"])
    x.append(TimeGroup(test["hour_of_day"]))
    
    timestamp = test["timestamp"]
    day = datetime.datetime.fromtimestamp(timestamp).day
    month = datetime.datetime.fromtimestamp(timestamp).month
    
    x.append(GetWeather(day, 
                        month, 
                        GetSubregion(test["longitude_origin"], test["lattitude_origin"], gdf), 
                        weather))
    x.append(GetWeather(day, 
                        month, 
                        GetSubregion(test["longitude_destination"], test["lattitude_destination"], gdf), 
                        weather))
    
    x.append(test["lattitude_origin"])
    x.append(test["longitude_origin"])
    x.append(test["lattitude_destination"])
    x.append(test["longitude_destination"])
    x.append(GetDistance(test["lattitude_origin"],
                        test["longitude_origin"],
                        test["lattitude_destination"],
                        test["longitude_destination"]))
    
    X.append(x)
    
    #Convert the array to JSON format
    input_json = json.dumps({"data": X})

    #Set the content type and authentication for the request
    headers = {"Content-Type":"application/json"}

    #Send the request
    response = requests.post(endpoint, input_json, headers=headers)

    #If we got a valid response, display the predictions
    if response.status_code == 200:
        y = json.loads(response.json())
        return int(round(y["result"][0], 0))
    else:
        return 1252

### Testing

In [79]:
endpoint = 'http://f735fbd3-2bf1-4505-99f3-ea2fac35cef9.southeastasia.azurecontainer.io/score'

'''
Input data should be a list of json objects
    each json object should have those attributes as specified:
            lattitude_origin
            longitude_origin
            lattitude_destination
            longitude_destination
            timestamp
            hour_of_day
            day_of_week
'''

test_data = [
    {"lattitude_origin": -6.141255,
    "longitude_origin": 106.692710,
    "lattitude_destination": -6.141150,
    "longitude_destination": 106.693154,
    "timestamp": 1590487113,
    "hour_of_day": 9,
    "day_of_week": 1},
    
    {"lattitude_origin": -6.141255,
    "longitude_origin": 106.692710,
    "lattitude_destination": -6.141150,
    "longitude_destination": 106.693154,
    "timestamp": 1590488113,
    "hour_of_day": 23,
    "day_of_week": 1},
    
    {"lattitude_origin": -6.141255,
    "longitude_origin": 106.692710,
    "lattitude_destination": -6.141150,
    "longitude_destination": 106.693154,
    "timestamp": 1590487113,
    "hour_of_day": 9,
    "day_of_week": 1}
]

In [78]:
'''
Using a simple for loop to populate the predictions array to calculate RMSE later on
'''

predictions = list()
for i in range(len(test_data)):
    predictions.append(predict(test_data[i], endpoint))
    
print(predictions)

[1289.0, 1242.0, 1289.0]