In [1]:
import os
import sys

import numpy as np
import pandas as pd
from geopy import distance
import json

import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')

tf.compat.v1.disable_eager_execution()

# Declare Current Directory

In [2]:
root_path = os.path.abspath(os.path.join('..'))

# Read Dataset

In [5]:
FILE_DIR = 'datasets/kotlite_driver_dataset_KWB_with_ket.csv'
df = pd.read_csv(os.path.join(root_path, FILE_DIR))
df.describe(include='all')

Unnamed: 0,driver_id,pickuptime,start_lat,start_long,end_lat,end_long,routes,ket_start,ket_end
count,20.0,20,20.0,20.0,20.0,20.0,20,20,20
unique,,13,,,,,20,19,17
top,,2021-05-19 07:00:00,,,,,"[[-7.834021300000001, 112.5285001], [-7.835748...",SMA N 1 Batu,among tani
freq,,4,,,,,1,2,3
mean,27.75,,-7.881579,112.539215,-7.88849,112.54015,,,
std,16.814389,,0.020704,0.023684,0.023849,0.029264,,,
min,6.0,,-7.916181,112.50847,-7.952293,112.509902,,,
25%,11.75,,-7.89516,112.52674,-7.905046,112.518075,,,
50%,25.5,,-7.879073,112.53063,-7.879073,112.529459,,,
75%,43.25,,-7.867929,112.556356,-7.868989,112.556356,,,


In [6]:
df

Unnamed: 0,driver_id,pickuptime,start_lat,start_long,end_lat,end_long,routes,ket_start,ket_end
0,51,2021-05-19 07:00:00,-7.893189,112.57662,-7.866979,112.511189,"[[-7.8927323, 112.5767376], [-7.8926213, 112.5...",pendem,among tani
1,12,2021-05-19 06:30:00,-7.865714,112.555438,-7.87844,112.527121,"[[-7.865687899999999, 112.5553653], [-7.864676...",giripurno,SMA N 1 Batu
2,29,2021-05-19 07:00:00,-7.868246,112.526869,-7.904076,112.56518,"[[-7.868244199999999, 112.5268463], [-7.868492...",sisir,pascasarjana UIN
3,9,2021-05-19 07:30:00,-7.890602,112.537934,-7.952293,112.612813,"[[-7.890659699999999, 112.5379368], [-7.890700...",pokopek,UB
4,11,2021-05-19 07:45:00,-7.916181,112.583958,-7.882215,112.511487,"[[-7.9161186, 112.5838508], [-7.91579580000000...",Dau,Museum Angkut
5,13,2021-05-19 07:15:00,-7.861497,112.50847,-7.879957,112.53115,"[[-7.862087499999999, 112.5083638], [-7.862186...",sumberejo,wonderland
6,22,2021-05-19 08:00:00,-7.834043,112.528245,-7.866979,112.511189,"[[-7.834021300000001, 112.5285001], [-7.835748...",Punten,among tani
7,52,2021-05-19 07:00:00,-7.854332,112.526352,-7.925165,112.587923,"[[-7.854327499999999, 112.5263389], [-7.853611...",Bumiaji,UMM
8,35,2021-05-19 07:45:00,-7.869237,112.509902,-7.899685,112.533016,"[[-7.8692712, 112.5099041], [-7.8692561, 112.5...",Pesanggrahan,Balijestro
9,37,2021-05-19 07:00:00,-7.879706,112.539196,-7.910102,112.559109,"[[-7.8796909, 112.5390926], [-7.8794179, 112.5...","Wukir, temas",Krematorium


# Build the function

## Route parser

In [7]:
def route_parser(data):
    idx = data[0]
    points = []
    
    points.append([data[2], data[3]])
    
    for point in json.loads(data[6]):
        points.append(point)
        
    points.append([data[4], data[5]])
    
    return idx, points

## Recommendation System

In [10]:
class NearestNeighbor():
    def __init__(self, k=1):
        self.init = tf.compat.v1.global_variables_initializer()
        
        # K value
        self.k = k
        
        # Data
        self.train = None
        self.query = None
        
        # Graph Input
        self.xtr = None
        self.xqe = None
        
        # Output
        self.values = None
        self.indices = None
        
        self.result = self.values, self.indices
    
    def fit(self, train, query):
        self.train = train
        self.query = query
        
        self.xtr = tf.compat.v1.placeholder('float', [None, len(self.train[0])])
        self.xqe = tf.compat.v1.placeholder('float', [None, len(self.query[0])])
    
    def train(self):
        # Manhattan distance
        distance = tf.reduce_sum(tf.abs(tf.subtract(self.xtr, tf.expand_dims(self.xqe, axis=1))), axis=2)
        
        # Nearest Data
        values, indices = tf.nn.top_k(tf.negative(distance), k=self.k)
        values = tf.negative(values)
        
        with tf.compat.v1.Session() as sess:
            sess.run(self.init)
            
            self.values, self.indices = sess.run([values, indices], feed_dict={self.xtr:self.train, 
                                                                               self.xqe:self.query})
            self.values = self.values.reshape(-1)
            self.indices = self.indices.reshape(-1)
    
    def fit_train(self, train, query):
        self.fit(train, query)
        self.transform()

In [11]:
data = df.copy()
query = [[-7.8838611,112.5381295], [-7.8786821,112.524145]] # from MAN 1 Batu to GOR Gajah Mada

dist = []

for dt in data.values:
    idx, route = route_parser(dt)
    model = NearestNeighbor()
    model.fit_transform(route, query)
    if model.indices[0] < model.indices[1]:
        dist.append((model.values[0] + model.values[1], idx,
                     route[model.indices[0]], route[model.indices[1]]))

sorted_dist = sorted(dist)

recommendation = []

for sd in sorted_dist:
    pick_dist = distance.distance(query[0], sd[2]).km
    drop_dist = distance.distance(query[1], sd[3]).km
    if (pick_dist <= 0.7) & (drop_dist <= 0.7):
        recommendation.append(sd[1])

recommendation

[11, 19]

In [70]:
df[df['driver_id'].isin(recommendation)]

Unnamed: 0,driver_id,pickuptime,start_lat,start_long,end_lat,end_long,routes,ket_start,ket_end
4,11,2021-05-19 07:45:00,-7.916181,112.583958,-7.882215,112.511487,"[[-7.9161186, 112.5838508], [-7.91579580000000...",Dau,Museum Angkut
11,19,2021-05-19 08:30:00,-7.893652,112.578221,-7.87416,112.520271,"[[-7.893488899999999, 112.5781829], [-7.892989...",Pendem,RSUD Batu


# Result Analysis

The result of the recommendation system for passengers who want to depart from MAN 1 Kota Batu on `(-7.8838611,112.5381295)` to GOR Gajah Mada Kota Batu on `(-7.8786821,112.524145)`, by using a threshold of 0.7 km from the nearest point, the system recommends 2 drivers who have a similar route, namely the driver with id `[11,19]`. We're trying to see how effective the system is at providing driver recommendations to passengers. in this case we want to test using google maps to see and assess how effective this system is. The testing process uses a scenario that the driver will pick up passengers and then deliver them first before the driver goes to his final destination.

## Passanger with Driver_id 11

The driver with ID 11 will travel from his home in the Dau District area, heading to his workplace at the Transportation Museum. Here is the route Google maps suggests the driver to get to work.

![driver_id 11 routes](assets/driver_11_routes.png)

It can be seen that if you go alone, the driver with ID 11 will be estimated to cover 12.1 km with an estimated time of 27 minutes.

![driver_id 11 with passanger routes](assets/Driver11_with_pass.png)

however, if the driver with ID 11 picks up passengers and delivers them to the passenger's destination, the distance covered will be 12.5 km with an estimated travel time of 28 minutes. It is good enough, that the system can search for and recommend drivers who have the same direction to passengers. The results of the recommendations are also not burdensome or detrimental to drivers, because the maximum pick-up distance is limited to 0.7 Km from the point provided by the maps.