In [77]:
import geopy
from geopy.geocoders import Nominatim
import requests
import folium
import itertools
import pandas as pd
import networkx as nx
from sklearn.cluster import KMeans

pd.options.mode.chained_assignment = None

## LOAD DATA

In [27]:
with open('cast_types.txt', 'r') as file:
    cast_types = [f.strip('\n') for f in file.readlines()]

In [34]:
data = {}
for cast_ in cast_types:
    data[cast_] = pd.read_csv(f'data/{cast_}.csv').set_index('name')

In [35]:
driver_num = data['drivers'].shape[0]
cast_num = data['cast_members'].shape[0]

## DIRECTIONS API

In [36]:
with open('gitignore/api_key.txt') as fh:
    api_key = fh.read()

In [37]:
geolocator = Nominatim(user_agent="Transport_planner")

def geocode_address(address, geolocator=geolocator):
    return geolocator.geocode(address)

def get_duration(loc_1, loc_2, loc=True, api_key=api_key):
    # return time of route plan in seconds
    if loc:
        geojson = get_directions_locations(loc_1, loc_2, api_key=api_key)
    else:
        geojson = get_directions_coords(loc_1, loc_2, api_key=api_key)
    return geojson['features'][0]['properties']['segments'][0]['duration']

def get_directions_locations(loc_1, loc_2, api_key=api_key):
    # get direction as a geojson
    url = f"https://api.openrouteservice.org/v2/directions/driving-car?api_key={api_key}&start={str(loc_1.longitude)},{str(loc_1.latitude)}&end={str(loc_2.longitude)},{str(loc_2.latitude)}"
    r = requests.get(url)
    return r.json()

def get_directions_coords(coords_1, coords_2, api_key=api_key):
    # get direction as a geojson
    url = f"https://api.openrouteservice.org/v2/directions/driving-car?api_key={api_key}&start={str(coords_1[0])},{str(coords_1[1])}&end={str(coords_2[0])},{str(coords_2[1])}"
    r = requests.get(url)
    return r.json()

In [38]:
locations = {}
for _, value in data.items():
    for ind, row in value.iterrows():
        try:
            locations[ind] = geocode_address(row['address'])
        except:
            continue

In [39]:
locations

{'Apci': Location(Szent István körút, Újlipótváros, 13. kerület, Budapest, Közép-Magyarország, 1137, Magyarország, (47.5110476, 19.05431, 0.0)),
 'Bius': Location(Golden élelmiszerbolt, 58, Wesselényi utca, Ligetváros, Erzsébetváros, 7. kerület, Budapest, Közép-Magyarország, 1077, Magyarország, (47.5027025, 19.0723412, 0.0)),
 'Boci': Location(Örs vezér tere, Örs vezér téri lakótelep, Rákosfalva, 14. kerület, Budapest, Közép-Magyarország, 1148, Magyarország, (47.5056186, 19.1356264, 0.0)),
 'Coci': Location(Ludovika tér, Losonci negyed, 8. kerület, Budapest, Közép-Magyarország, 1083, Magyarország, (47.4821204, 19.0850836, 0.0)),
 'Dani': Location(Klauzál tér, Ligetváros, Erzsébetváros, 7. kerület, Budapest, Közép-Magyarország, 1072, Magyarország, (47.5004821, 19.0637521, 0.0)),
 'Deni': Location(Hegyalja út, Gellérthegy, 1. kerület, Budapest, Közép-Magyarország, 1016, Magyarország, (47.4882875, 19.0306139, 0.0)),
 'Dressing': Location(ELTE PPK Egészségfejlesztési és Sporttudományi Inté

## CREATE OBJECT

In [82]:
class TransportPlanner(object):
    
    def __init__(self, supervisors, drivers, cast_members, destinations, geolocator=geolocator):
        self.supervisors = supervisors
        self.drivers = drivers
        self.cast_members = cast_members
        self.destinations = destinations
        self.all_locs = [self.supervisors, self.drivers, self.cast_members, self.destinations]
        self.geolocator = geolocator
        self.min_clus = drivers['capacity'].max() + 1
        with open('cast_types.txt', 'r') as file:
            self.cast_types = [f.strip('\n') for f in file.readlines()] 
        
    def __str__(self):
        return('This is a transport planner object.')
    
    
    def get_locations(self):
        self.locations = {}
        for df in self.all_locs:
            for ind, row in df.iterrows():
                try:
                    self.locations[ind] = self.geolocator.geocode(row['address'])
                except:
                    continue
    
    def get_coordinates(self):
        self.coords = pd.DataFrame({k: (v.longitude, v.latitude) for k, v in self.locations.items()}).T
    
    
    
    
    
        

In [83]:
tp = TransportPlanner(*[data[f] for f in cast_types])

In [88]:
tp.min_clus

4

In [84]:
tp.get_locations()

In [85]:
tp.locations

{'Apci': Location(Szent István körút, Újlipótváros, 13. kerület, Budapest, Közép-Magyarország, 1137, Magyarország, (47.5110476, 19.05431, 0.0)),
 'Bius': Location(Golden élelmiszerbolt, 58, Wesselényi utca, Ligetváros, Erzsébetváros, 7. kerület, Budapest, Közép-Magyarország, 1077, Magyarország, (47.5027025, 19.0723412, 0.0)),
 'Boci': Location(Örs vezér tere, Örs vezér téri lakótelep, Rákosfalva, 14. kerület, Budapest, Közép-Magyarország, 1148, Magyarország, (47.5056186, 19.1356264, 0.0)),
 'Coci': Location(Ludovika tér, Losonci negyed, 8. kerület, Budapest, Közép-Magyarország, 1083, Magyarország, (47.4821204, 19.0850836, 0.0)),
 'Dani': Location(Klauzál tér, Ligetváros, Erzsébetváros, 7. kerület, Budapest, Közép-Magyarország, 1072, Magyarország, (47.5004821, 19.0637521, 0.0)),
 'Deni': Location(Hegyalja út, Gellérthegy, 1. kerület, Budapest, Közép-Magyarország, 1016, Magyarország, (47.4882875, 19.0306139, 0.0)),
 'Dressing': Location(ELTE PPK Egészségfejlesztési és Sporttudományi Inté

In [86]:
tp.get_coordinates()

In [87]:
tp.coords

Unnamed: 0,0,1
Laci,19.067784,47.492366
Feri,19.115424,47.56049
Dani,19.063752,47.500482
Deni,19.030614,47.488287
Bius,19.072341,47.502702
Zoli,19.069,47.497386
Peti,19.112558,47.539564
Nori,19.092024,47.522544
Boci,19.135626,47.505619
Coci,19.085084,47.48212


In [52]:
tp.cast_members

Unnamed: 0_level_0,address,destination,mailto
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bius,1077 Budapest Wesselényi utca 58.,Shooting,mor.kapronczay@gmail.com
Zoli,1072 Budapest Akácfa utca 4.,Dressing,mor.kapronczay@gmail.com
Peti,1155 Budapest Perczel Mór utca 13.,Shooting,mor.kapronczay@gmail.com
Nori,1142 Budapest Szőnyi út 2.,Shooting,mor.kapronczay@gmail.com
Boci,"Budapest, Örs vezér tere 22, 1148",Dressing,mor.kapronczay@gmail.com
Coci,"Budapest, Ludovika tér 2, 1083",Dressing,mor.kapronczay@gmail.com
Loci,Lónyay u. 13 Budapest 1093,Shooting,mor.kapronczay@gmail.com
Apci,"Budapest, Szent István körút 14, 1137",Shooting,mor.kapronczay@gmail.com


In [53]:
cast_groups = {dest: tp.cast_members[tp.cast_members['destination'] == dest] 
               for dest in tp.cast_members['destination'].unique()}

In [55]:
cast_groups['Shooting']

Unnamed: 0_level_0,address,destination,mailto
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bius,1077 Budapest Wesselényi utca 58.,Shooting,mor.kapronczay@gmail.com
Peti,1155 Budapest Perczel Mór utca 13.,Shooting,mor.kapronczay@gmail.com
Nori,1142 Budapest Szőnyi út 2.,Shooting,mor.kapronczay@gmail.com
Loci,Lónyay u. 13 Budapest 1093,Shooting,mor.kapronczay@gmail.com
Apci,"Budapest, Szent István körút 14, 1137",Shooting,mor.kapronczay@gmail.com


In [59]:
tp.coords.loc[cast_groups['Shooting'].index]

Unnamed: 0_level_0,0,1
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bius,19.072341,47.502702
Peti,19.112558,47.539564
Nori,19.092024,47.522544
Loci,19.061479,47.486542
Apci,19.05431,47.511048


In [60]:
cast_groups_load = {k: v.shape[0] for k, v in cast_groups.items()}

In [61]:
cast_groups_load

{'Dressing': 3, 'Shooting': 5}

In [99]:
clus_centers = {key: None for key in cast_groups.keys()}
for key in cast_groups.keys():
    if cast_groups[key].shape[0] >= tp.min_clus:
        kmeans = KMeans(n_clusters = int(cast_groups[key].shape[0] / tp.min_clus) + 1).fit(
            tp.coords.loc[cast_groups[key].index])
    else:
        kmeans = KMeans(n_clusters = 1).fit(
            tp.coords.loc[cast_groups[key].index])
    cast_groups[key].loc[:, 'clus'] = kmeans.labels_
    clus_centers[key] = kmeans.cluster_centers_

In [100]:
clus_centers

{'Dressing': array([[19.09656996, 47.4950416 ]]),
 'Shooting': array([[19.10229102, 47.53105396],
        [19.06271017, 47.5000972 ]])}

In [97]:
cast_groups['Dressing']

Unnamed: 0_level_0,address,destination,mailto,clus
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Zoli,1072 Budapest Akácfa utca 4.,Dressing,mor.kapronczay@gmail.com,0
Boci,"Budapest, Örs vezér tere 22, 1148",Dressing,mor.kapronczay@gmail.com,0
Coci,"Budapest, Ludovika tér 2, 1083",Dressing,mor.kapronczay@gmail.com,0


In [98]:
cast_groups['Shooting']

Unnamed: 0_level_0,address,destination,mailto,clus
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bius,1077 Budapest Wesselényi utca 58.,Shooting,mor.kapronczay@gmail.com,0
Peti,1155 Budapest Perczel Mór utca 13.,Shooting,mor.kapronczay@gmail.com,1
Nori,1142 Budapest Szőnyi út 2.,Shooting,mor.kapronczay@gmail.com,1
Loci,Lónyay u. 13 Budapest 1093,Shooting,mor.kapronczay@gmail.com,0
Apci,"Budapest, Szent István körút 14, 1137",Shooting,mor.kapronczay@gmail.com,0


## ALGORITHM

In [None]:
coords = {k: (v.longitude, v.latitude) for k, v in locations.items()}

In [None]:
coords_df = pd.DataFrame.from_dict(coords, orient='index')

In [None]:
cast_df = coords_df.loc[[f for f in coords_df.index if 'cast' in f]]

In [None]:
cast_df

In [None]:
clus = KMeans(n_clusters=driver_num)

In [None]:
clus.fit(cast_df)

In [None]:
clus.labels_

In [None]:
clus.cluster_centers_

In [None]:
drivers_df = coords_df.loc[[f for f in coords_df.index if 'driver' in f]]

In [None]:
drivers_df

In [None]:
clusters_df = pd.DataFrame(clus.cluster_centers_)

In [None]:
clusters_df

In [None]:
get_duration(clusters_df.loc[0], drivers_df.iloc[0], loc=False)

In [None]:
get_duration(clusters_df.loc[0], drivers_df.iloc[1], loc=False)

In [None]:
matching_dict = {}
for driver in drivers_df.index:
    min_ = 10000000
    match = None
    for clus_cent in clusters_df.index:
        dur = get_duration(drivers_df.loc[driver], clusters_df.loc[clus_cent], loc = False)
        if  dur < min_:
            matching_dict[driver] = clus_cent
            min_ = dur
            
if len(matching_dict.values) != len(set(matching_dict.values)):
    left_out = [f for f in clus_cent if not f in matching_dict.values()]
    
## kezelni hogy kaphatják ugyanazt a clustert 
# ami tobb helyre került - legközelebbihez
# maradéknál pedig a left_outból a legközelebbit, kivéve ami sokszor volt
# ha még mindig para van folytasd amig nincs 1:1

In [None]:
len(matching_dict.values()) != len(set(matching_dict.values()))

In [None]:
matching_dict

In [None]:
relations = [element for element in itertools.product(*[locations.keys(), locations.keys()]) if (
    (element[0] != element[1]) &
    (not element[0].startswith('drivers') & element[1].startswith('drivers')))]


In [None]:
edgelist[edgelist['start'] == 'drivers_Laci']

## CREATE GRAPH

In [None]:
graph_edges = []
for first, second in relations:
    if not (second, first) in graph_edges:
        graph_edges.append((first, second))

In [None]:
edgelist = pd.DataFrame(graph_edges, columns = ['start', 'end'])

In [None]:
def get_distance(row):
    return get_duration(locations[row['start']], locations[row['end']])

In [None]:
edgelist['dist'] = edgelist.apply(get_distance, axis = 1)

In [None]:
vertices = list(set(list(edgelist['start'].unique()) + list(edgelist['end'].unique())))

In [None]:
vertices = {i: {'name': vertices[i]} for i in range(len(vertices))}

In [None]:
vertex_indices = {v['name']: k for k, v in vertices.items()}

In [None]:
edges = [(vertex_indices[row['start']], vertex_indices[row['end']], row['dist']) for i, row in edgelist.iterrows()]

In [None]:
G = nx.Graph()

In [None]:
for k, v in vertex_indices.items():
    G.add_nodes_from([v], name=k)

In [None]:
G.add_weighted_edges_from(edges)

In [None]:
m = folium.Map(location=(feriloc.latitude, feriloc.longitude))

folium.GeoJson(
    geojson,
    name='geojson'
).add_to(m)

In [None]:
m