# **Library Imports**

In [None]:
import math
import datetime
import numpy as np
import pandas as pd
import haversine as hs
import plotly.express as px
from collections import Counter
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim

pd.options.mode.chained_assignment = None

# **Encoding Functions**

In [None]:
def get_time_zone(time):
    t = time.index(':')
    h = int(time[0:t])
    m = int(time[t + 1:])
    if m > 30:
        return h * 2 + 1
    else:
        return h * 2


def get_day_order(date):
    born = datetime.datetime.strptime(date, '%m/%d/%Y').weekday()
    return born


def encode_day_and_time():
    dataset_date = (dataset.date_of_trip).to_numpy()
    dataset_time = (dataset.time_of_trip).to_numpy()
    for i in range(0, len(dataset_date)):
        dataset_date[i] = get_day_order(dataset_date[i])
        dataset_time[i] = get_time_zone(dataset_time[i])
    return dataset_date, dataset_time

# **Data Pre-processing**

In [None]:
def get_last_index(column, i, a):
    m = len(column)
    while i < m and column[i] == a:
        i += 1
    return i


def in_array():
    start_row_time = 0
    dataset.date_of_trip, dataset.time_of_trip = encode_day_and_time()
    dataset.sort_values(by=['date_of_trip', 'time_of_trip'], inplace=True)
    for i in range(7):
        for j in range(48):
            end_row_time = get_last_index(dataset[['time_of_trip']].to_numpy(), start_row_time, j)
            fl_data = (dataset.iloc[start_row_time:end_row_time, 2:]).values.tolist()
            arr[i][j] = fl_data
            start_row_time = end_row_time

# **Clustering Mechanism**

In [None]:
#Decides the optimal number of clusters based on the data size
def get_optimal_clusters(data):
    length = len(data)
    n=(length-length%10)/10
    return int(n)


#Forms the Clusters 
def get_clusters(data):
  n = get_optimal_clusters(data)
  kmeans = KMeans(n_clusters=n, init='k-means++', random_state=42, n_init='auto')
  kmeans.fit(data)
  return [kmeans.labels_ ,kmeans.cluster_centers_]

# **Get Coordinates**

In [None]:
#Just  used to convert logical address to coordinates
def get_Coordinates(position):
  geolocator = Nominatim(user_agent="this_is_a_very_long_weird_ass_string")
  location = geolocator.geocode(position)
  return((location.latitude, location.longitude))

# **Distance from cluster centroids**

In [None]:
def get_Distance(cp,position):
    loc1 = get_Coordinates(position)
    zones[1] = np.append(zones[1],[[loc1[0],loc1[1]]],axis =0);
    dist = [[],[]]
    for n in cp:
        loc2 = (n[0],n[1])
        x = hs.haversine(loc1,loc2)
        dist[0].append(round(x,2))
        dist[1].append('Cluster Head');
    dist[0].append(1);
    dist[1].append('Current Location');
    return dist

# **Data Plotting**

In [None]:
def plot_data(data1, data2):
    
    #Plot-1
    fig1 = px.scatter_mapbox(data1,
                             lat="Latitude",
                             lon="Longitude",
                             color="Zone",
                             color_continuous_scale=px.colors.cyclical.IceFire,
                             size_max=20,
                             zoom=10,
                             title = 'Cluster Formations',
                             mapbox_style="carto-positron"
                             )
    #Plot-2
    fig2 = px.scatter_mapbox(data2,
                             lat="Centroid Latitude",
                             lon="Centroid Longitude",
                             labels = {'Distance':'Distance from current location(km): '} ,
                             hover_data = ['Distance'],
                             color ='Point',
                             size = 'Utility',
                             size_max=20,
                             zoom=10,
                             title = 'Distance from Cluster Heads',                    
                             mapbox_style="carto-positron"
                             )

    fig1.show()
    fig2.show()

# **Get Day Code**

In [None]:
#Converts user Input of current Day to Day Code
def getdayCode(day):
  day = day.lower()
  if day == 'monday': return 0
  if day == 'tuesday': return 1
  if day == 'wednesday': return 2
  if day == 'thursday': return 3
  if day == 'friday': return 4
  if day == 'saturday': return 5
  else: return 6 

# **Utility Calculation**

In [None]:
def getProbability(clusters):
  total = len(clusters)
  count = sorted(Counter(clusters).items())
  for m,n in enumerate(count):
    count[m] = n[1]/total
  return count

def getUtility(probability,distance):
  probability.append(0)
  utility = []
  for i in range(len(probability)):
    utility.append( round(probability[i] / distance[i],4) )
  return utility
  


# **Storing the data in Array**

In [None]:
dataset = pd.read_csv("./dataset.csv")
dataset = dataset.iloc[:, :]
arr = [[[] for x in range(48)] for x in range(7)]
in_array()

# **Main Execution**

In [None]:
_day_ = getdayCode(input("Enter the Day: ").strip())
_time_ = get_time_zone(input("Enter the Time(24 hr): "))
_pos_ = input("Enter your current location(address): ")


zones = get_clusters(np.array(arr[_day_][_time_]))
probability = getProbability(zones[0])

dist = get_Distance(zones[1],_pos_)

utility = getUtility(probability,dist[0])
utility[-1] = sum(utility)/len(utility)

min_dist = min(dist[0][:-1])
nearest_cluster = dist[0][:-1].index(min_dist)

max_util = max(utility[:-1])
max_util_zone = utility[:-1].index(max_util)


print("The nearest zone from the current location is Zone-", nearest_cluster, 'at a distance of', min_dist, 'km')
print("The maximum utility zone is Zone-",max_util_zone,'with utility of',max_util)


df1 = pd.DataFrame(arr[_day_][_time_], columns=['Latitude', 'Longitude']) 

df2 = pd.DataFrame(zones[0], columns=['Zone'])

data1 = pd.concat([df1, df2], axis=1)



df3 = pd.DataFrame(zones[1], columns=['Centroid Latitude', 'Centroid Longitude'])

df4 = pd.DataFrame(dist[0], columns=['Distance'])


df5 = pd.DataFrame(dist[1], columns=['Point'])

df6 = pd.DataFrame(utility, columns=['Utility'])


data2 = pd.concat([df3, df4, df5, df6], axis=1)



plot_data(data1, data2)
    