In [50]:
import sys
from flask import Flask, request, jsonify
import traceback
import joblib
import numpy as np
import pandas as pd

from pipelines.DataPipeline import DataPipeline
from utils.Common import Config

import geopandas as gpd
from shapely.geometry import Polygon, Point

app = Flask(__name__)

def predict(json_, classifier):
    try:
        
        if classifier:
            print(classifier.__class__.__name__)
            # convert to dataframe
            df = pd.DataFrame(json_)
            X=df[Config.cat_attribs + Config.num_attribs + Config.binary_columns]
            
               
            # pass feature to pipeline and convert it to numerical data          
            X = datapipeline.transform(X)
            X = pd.DataFrame(X)
            prediction = list(classifier.predict(X))
            print({"prediction": str(prediction)})
            return jsonify({"prediction": str(prediction)})
        else:
            print("Train the model first")
            return "No model here to use"
    except:
        return jsonify({"trace": traceback.format_exc()})


@app.route("/")
def hello_world():
    return "Hello World!"


# use decorator pattern for the route
@app.route("/predict/svc", methods=["GET", "POST"])
def predictSVC():
    json_ = request.json
    print(json_)
    result = predict(json_, svc_clf)
    return result


# use decorator pattern for the route
@app.route("/predict/rf", methods=["GET", "POST"])
def predictRF():
    json_ = request.json
    result = predict(json_, rf_clf)
    return result


# use decorator pattern for the route
@app.route("/predict/knn", methods=["GET", "POST"])
def predictKNN():
    json_ = request.json
    result = predict(json_, knn_clf)
    return result


# use decorator pattern for the route
@app.route("/predict/ada", methods=["GET", "POST"])
def predictAda():
    json_ = request.json
    result = predict(json_, ada_clf)
    return result

@app.route("/predict/vote_hard", methods=["GET", "POST"])
def predictVotingHard():
    json_ = request.json
    result = predict(json_, voting_hard_clf)
    return result

@app.route("/predict/vote_soft", methods=["GET", "POST"])
def predictVotingSoft():
    json_ = request.json
    result = predict(json_, voting_soft_clf)
    return result

@app.route("/spatialquery", methods=["GET", "POST"])
def getCoordinates():
    print(json_)
    # load data from json_
    polyjson = json.loads(json_)
    polygon =  gpd.GeoDataFrame.from_features(polyjson, crs='EPSG:4326')
    polygon = polygon.to_crs('EPSG:4326')     
    points = raw_data.apply(lambda row: Point(row['LONGITUDE'], row['LATITUDE']), axis=1)
    gdf = gpd.GeoDataFrame(raw_data, geometry=points, crs='EPSG:4326')
    contains = gdf.within(polygon.geometry.iloc[0])
    result_points = raw_data[contains][['INDEX_','LONGITUDE','LATITUDE']].to_json(orient='records')
    return result_points

try:
    port = int(sys.argv[1])  # This is for a command-line input
except:
    port = 12345  # If you don't provide any port the port will be set to 12345
    

datapipeline = joblib.load("../models/datapipeline.pkl")
svc_clf = joblib.load("../models/best_model_svc.pkl")
rf_clf = joblib.load("../models/best_model_random_forest.pkl")
knn_clf = joblib.load("../models/best_model_knn.pkl")
ada_clf = joblib.load("../models/best_model_adaboost.pkl")
voting_hard_clf = joblib.load("../models/best_model_voting_hard.pkl")
voting_soft_clf = joblib.load("../models/best_model_voting_soft.pkl")
raw_data = pd.read_csv("../data/raw/KSI.csv")


print("Model loaded")
app.run(port=port)

Model loaded
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:12345/ (Press CTRL+C to quit)


{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-79.46321135810555, 43.72212571942046], [-79.29704314521611, 43.79601968703417], [-79.29017669013803, 43.70128006022296], [-79.46801787666021, 43.66304418751821], [-79.46321135810555, 43.72212571942046]]]}, "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}], "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}


127.0.0.1 - - [07/Apr/2023 23:48:39] "POST /spatialquery HTTP/1.1" 200 -


# Testing on KSI prediction

In [108]:
from transformers.Preprocessing import Preprocessing


svc_clf = joblib.load("../models/best_model_svc.pkl")
rf_clf = joblib.load("../models/best_model_random_forest.pkl")
knn_clf = joblib.load("../models/best_model_knn.pkl")
#json_ = [{'VEHTYPE': 'Bicycle', 'ROAD_CLASS': 'Expressway', 'LOCCOORD': '', 'DISTRICT': 'North York', 'TRAFFCTL': 'No Control', 'LIGHT': 'Daylight', 'RDSFCOND': 'Dry', 'INVTYPE': 'Motorcycle Driver', 'IMPACTYPE': 'Approaching', 'INVAGE': '70 to 74', 'YEAR': 2023, 'TIME': 1420, 'LATITUDE': 43.785132, 'LONGITUDE': -79.164089, 'DATE': '2023/04/10 05:00:00+00', 'PEDESTRIAN': '', 'CYCLIST': 'Yes', 'AUTOMOBILE': '', 'TRUCK': '', 'TRSN_CITY_VEH': '', 'PASSENGER': '', 'SPEEDING': 'Yes', 'AG_DRIV': ''}]
json_ = [{'VEHTYPE': 'Bicycle', 'ROAD_CLASS': 'Expressway', 'LOCCOORD': 'Intersection', 'DISTRICT': 'North York', 'TRAFFCTL': 'No Control', 'LIGHT': 'Daylight', 'RDSFCOND': 'Dry', 'INVTYPE': 'Motorcycle Driver', 'IMPACTYPE': 'Approaching', 'INVAGE': '70 to 74', 'YEAR': 2023, 'TIME': 14, 'LATITUDE': 43.785132, 'LONGITUDE': -79.164089, 'MONTH': 4, 'DAY': 1, 'PEDESTRIAN': 0, 'CYCLIST': 1, 'AUTOMOBILE': 0, 'TRUCK': 0, 'TRSN_CITY_VEH': 0, 'PASSENGER': 0, 'SPEEDING': 1, 'AG_DRIV': 0}]

# convert to dataframe
df = pd.DataFrame(json_)
X = df[Config.cat_attribs + Config.num_attribs+ Config.binary_columns]

# pass feature to pipeline and convert it to numerical data
datapipeline = joblib.load("../models/datapipeline.pkl")
X = datapipeline.transform(df)
X = pd.DataFrame(X)

Y_pred = svc_clf.predict(X)
print(Y_pred)

[0]


# Convert polygon to json string
Coordinate pair Format: (lon,lat)

Coordinates of a polygon should be [(1,1),(1,2),(2,2),(2,1),(1,1)]

where (1,1) is the start and end point


In [48]:
from shapely.geometry import Polygon, mapping
import json
import geopandas as gpd

# create a Polygon object
polygon_coords = [(-79.46321135810555, 43.72212571942046), (-79.29704314521611, 43.79601968703417)
                  , (-79.29017669013803, 43.70128006022296), (-79.46801787666021, 43.66304418751821), (-79.46321135810555, 43.72212571942046)]
polygon = gpd.GeoSeries([Polygon(polygon_coords)], crs='EPSG:4326')
polygon = polygon.to_crs('EPSG:4326')

json_str = polygon.to_json()

print(json_str)

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-79.46321135810555, 43.72212571942046], [-79.29704314521611, 43.79601968703417], [-79.29017669013803, 43.70128006022296], [-79.46801787666021, 43.66304418751821], [-79.46321135810555, 43.72212571942046]]]}, "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}], "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}


# Testing on the Spatial Query function

In [49]:
import geopandas as gpd
import json

def getCoordinates():
    print(json_)
    # load data from json_
    polyjson = json.loads(json_)
    polygon =  gpd.GeoDataFrame.from_features(polyjson, crs='EPSG:4326')
    polygon = polygon.to_crs('EPSG:4326')     
    points = raw_data.apply(lambda row: Point(row['LONGITUDE'], row['LATITUDE']), axis=1)
    gdf = gpd.GeoDataFrame(raw_data, geometry=points, crs='EPSG:4326')
    contains = gdf.within(polygon.geometry.iloc[0])
    result_points = raw_data[contains][['INDEX_','LONGITUDE','LATITUDE']].to_json(orient='records')
    return result_points

  
json_ = '{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-79.46321135810555, 43.72212571942046], [-79.29704314521611, 43.79601968703417], [-79.29017669013803, 43.70128006022296], [-79.46801787666021, 43.66304418751821], [-79.46321135810555, 43.72212571942046]]]}, "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}], "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}'
raw_data = pd.read_csv("../data/raw/KSI.csv")
points = getCoordinates()
print(points)


{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {}, "geometry": {"type": "Polygon", "coordinates": [[[-79.46321135810555, 43.72212571942046], [-79.29704314521611, 43.79601968703417], [-79.29017669013803, 43.70128006022296], [-79.46801787666021, 43.66304418751821], [-79.46321135810555, 43.72212571942046]]]}, "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}], "bbox": [-79.46801787666021, 43.66304418751821, -79.29017669013803, 43.79601968703417]}
[{"INDEX_":3389067,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389068,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389069,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389070,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389071,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389072,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389073,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389074,"LONGITUDE":-

# Spatial Query (get point within a polygon)

In [19]:
import geopandas as gpd
from shapely.geometry import Polygon, Point

# create a GeoSeries object for the polygon
polygon_coords = [(-79.46321135810555, 43.72212571942046), (-79.29704314521611, 43.79601968703417)
                  , (-79.29017669013803, 43.70128006022296), (-79.46801787666021, 43.66304418751821), (-79.46321135810555, 43.72212571942046)]
polygon = gpd.GeoSeries([Polygon(polygon_coords)], crs='EPSG:4326')
polygon = polygon.to_crs('EPSG:4326')


raw_data = pd.read_csv("../data/raw/KSI.csv")
points = raw_data.apply(lambda row: Point(row['LONGITUDE'], row['LATITUDE']), axis=1)
gdf = gpd.GeoDataFrame(raw_data, geometry=points, crs='EPSG:4326')
contains = gdf.within(polygon.geometry.iloc[0])

print(raw_data[contains][['INDEX_','LONGITUDE','LATITUDE']].to_json(orient='records'))
#raw_data[(lat1 <= raw_data.LATITUDE <= lat2) and lon1 <= raw_data.LONGITUDE <= lon2]

[{"INDEX_":3389067,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389068,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389069,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389070,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389071,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389072,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389073,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3389074,"LONGITUDE":-79.318797,"LATITUDE":43.699595},{"INDEX_":3532897,"LONGITUDE":-79.29829,"LATITUDE":43.744545},{"INDEX_":3532898,"LONGITUDE":-79.29829,"LATITUDE":43.744545},{"INDEX_":3532899,"LONGITUDE":-79.29829,"LATITUDE":43.744545},{"INDEX_":3532900,"LONGITUDE":-79.29829,"LATITUDE":43.744545},{"INDEX_":3532901,"LONGITUDE":-79.29829,"LATITUDE":43.744545},{"INDEX_":3449528,"LONGITUDE":-79.31389,"LATITUDE":43.758145},{"INDEX_":3449529,"LONGITUDE":-79.31389,"LATITUDE":43.758145},{"INDEX_":3449530,"LONGITUDE":-79.31389,"LATITUDE":43.758145}