In [1]:
import geopy
from geopy.geocoders import Nominatim
import requests
import folium
import itertools
import pandas as pd
import networkx as nx
from sklearn.cluster import KMeans

## LOAD DATA

In [4]:
with open('cast_types.txt', 'r') as file:
    cast_types = [f.strip('\n') for f in file.readlines()]

In [5]:
data = {}
for cast_ in cast_types:
    data[cast_] = pd.read_csv(f'data/{cast_}.csv').set_index('name')

In [6]:
driver_num = data['drivers'].shape[0]
cast_num = data['cast_members'].shape[0]

## DIRECTIONS API

In [7]:
with open('gitignore/api_key.txt') as fh:
    api_key = fh.read()

In [8]:
geolocator = Nominatim(user_agent="Transport_planner")

def geocode_address(address, geolocator=geolocator):
    return geolocator.geocode(address)

def get_duration(loc_1, loc_2, loc=True, api_key=api_key):
    # return time of route plan in seconds
    if loc:
        geojson = get_directions_locations(loc_1, loc_2, api_key=api_key)
    else:
        geojson = get_directions_coords(loc_1, loc_2, api_key=api_key)
    return geojson['features'][0]['properties']['segments'][0]['duration']

def get_directions_locations(loc_1, loc_2, api_key=api_key):
    # get direction as a geojson
    url = f"https://api.openrouteservice.org/v2/directions/driving-car?api_key={api_key}&start={str(loc_1.longitude)},{str(loc_1.latitude)}&end={str(loc_2.longitude)},{str(loc_2.latitude)}"
    r = requests.get(url)
    return r.json()

def get_directions_coords(coords_1, coords_2, api_key=api_key):
    # get direction as a geojson
    url = f"https://api.openrouteservice.org/v2/directions/driving-car?api_key={api_key}&start={str(coords_1[0])},{str(coords_1[1])}&end={str(coords_2[0])},{str(coords_2[1])}"
    r = requests.get(url)
    return r.json()

In [9]:
locations = {}
for _, value in data.items():
    for ind, row in value.iterrows():
        try:
            locations[ind] = geocode_address(row['address'])
        except:
            continue

In [10]:
locations

{'Laci': Location(Rajk László Szakkollégium, 6, Horánszky utca, Palotanegyed, 8. kerület, Budapest, Közép-Magyarország, 1085, Magyarország, (47.49236565, 19.0677840526337, 0.0)),
 'Feri': Location(18, Dobó utca, MÁV-telep, Rákospalota, 15. kerület, Budapest, Közép-Magyarország, 1153, Magyarország, (47.5604898333333, 19.1154243333333, 0.0)),
 'Bius': Location(Golden élelmiszerbolt, 58, Wesselényi utca, Ligetváros, Erzsébetváros, 7. kerület, Budapest, Közép-Magyarország, 1077, Magyarország, (47.5027025, 19.0723412, 0.0)),
 'Zoli': Location(4, Akácfa utca, Ligetváros, Erzsébetváros, 7. kerület, Budapest, Közép-Magyarország, 1072, Magyarország, (47.4973858, 19.0689998786508, 0.0)),
 'Peti': Location(13, Perczel Mór utca, MÁV-telep, Rákospalota, 15. kerület, Budapest, Közép-Magyarország, 1155, Magyarország, (47.5395641666667, 19.1125582, 0.0)),
 'Nori': Location(Laky Károly Sportuszoda (BVSC-Zugló), 2, Szőnyi út, Herminamező, 14. kerület, Budapest, Közép-Magyarország, 1142, Magyarország, (4

## CREATE OBJECT

In [34]:
class TransportPlanner(object):
    
    def __init__(self, supervisors, drivers, cast_members, destinations, geolocator=geolocator):
        self.supervisors = supervisors
        self.drivers = drivers
        self.cast_members = cast_members
        self.destinations = destinations
        self.all_locs = [self.supervisors, self.drivers, self.cast_members, self.destinations]
        self.geolocator = geolocator
        with open('cast_types.txt', 'r') as file:
            self.cast_types = [f.strip('\n') for f in file.readlines()] 
        
    def __str__(self):
        return('This is a transport planner object.')
    
    
    def get_locations(self):
        self.locations = {}
        for df in self.all_locs:
            for ind, row in df.iterrows():
                try:
                    self.locations[ind] = self.geolocator.geocode(row['address'])
                except:
                    continue
    
    def get_coordinates(self):
        self.coords = {k: (v.longitude, v.latitude) for k, v in self.locations.items()}
    
    
    
    
    
        

In [35]:
tp = TransportPlanner(*[data[f] for f in cast_types])

In [36]:
tp.get_locations()

In [37]:
tp.get_coordinates()

In [38]:
tp.coords

{'Laci': (19.0677840526337, 47.49236565),
 'Feri': (19.1154243333333, 47.5604898333333),
 'Bius': (19.0723412, 47.5027025),
 'Zoli': (19.0689998786508, 47.4973858),
 'Peti': (19.1125582, 47.5395641666667),
 'Nori': (19.0920238413811, 47.52254375),
 'Shooting': (18.978789991874, 47.51999175),
 'Dressing': (19.0576071, 47.4730609)}

In [31]:
tp.cast_members

Unnamed: 0_level_0,address,destination,mailto
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bius,1077 Budapest Wesselényi utca 58.,Shooting,mor.kapronczay@gmail.com
Zoli,1072 Budapest Akácfa utca 4.,Dressing,mor.kapronczay@gmail.com
Peti,1155 Budapest Perczel Mór utca 13.,Shooting,mor.kapronczay@gmail.com
Nori,1142 Budapest Szőnyi út 2.,Dressing,mor.kapronczay@gmail.com


## ALGORITHM

In [None]:
coords = {k: (v.longitude, v.latitude) for k, v in locations.items()}

In [None]:
coords_df = pd.DataFrame.from_dict(coords, orient='index')

In [None]:
cast_df = coords_df.loc[[f for f in coords_df.index if 'cast' in f]]

In [None]:
cast_df

In [None]:
clus = KMeans(n_clusters=driver_num)

In [None]:
clus.fit(cast_df)

In [None]:
clus.labels_

In [None]:
clus.cluster_centers_

In [None]:
drivers_df = coords_df.loc[[f for f in coords_df.index if 'driver' in f]]

In [None]:
drivers_df

In [None]:
clusters_df = pd.DataFrame(clus.cluster_centers_)

In [None]:
clusters_df

In [None]:
get_duration(clusters_df.loc[0], drivers_df.iloc[0], loc=False)

In [None]:
get_duration(clusters_df.loc[0], drivers_df.iloc[1], loc=False)

In [None]:
matching_dict = {}
for driver in drivers_df.index:
    min_ = 10000000
    match = None
    for clus_cent in clusters_df.index:
        dur = get_duration(drivers_df.loc[driver], clusters_df.loc[clus_cent], loc = False)
        if  dur < min_:
            matching_dict[driver] = clus_cent
            min_ = dur
            
if len(matching_dict.values) != len(set(matching_dict.values)):
    left_out = [f for f in clus_cent if not f in matching_dict.values()]
    
## kezelni hogy kaphatják ugyanazt a clustert 
# ami tobb helyre került - legközelebbihez
# maradéknál pedig a left_outból a legközelebbit, kivéve ami sokszor volt
# ha még mindig para van folytasd amig nincs 1:1

In [None]:
len(matching_dict.values()) != len(set(matching_dict.values()))

In [None]:
matching_dict

In [None]:
relations = [element for element in itertools.product(*[locations.keys(), locations.keys()]) if (
    (element[0] != element[1]) &
    (not element[0].startswith('drivers') & element[1].startswith('drivers')))]


In [None]:
edgelist[edgelist['start'] == 'drivers_Laci']

## CREATE GRAPH

In [None]:
graph_edges = []
for first, second in relations:
    if not (second, first) in graph_edges:
        graph_edges.append((first, second))

In [None]:
edgelist = pd.DataFrame(graph_edges, columns = ['start', 'end'])

In [None]:
def get_distance(row):
    return get_duration(locations[row['start']], locations[row['end']])

In [None]:
edgelist['dist'] = edgelist.apply(get_distance, axis = 1)

In [None]:
vertices = list(set(list(edgelist['start'].unique()) + list(edgelist['end'].unique())))

In [None]:
vertices = {i: {'name': vertices[i]} for i in range(len(vertices))}

In [None]:
vertex_indices = {v['name']: k for k, v in vertices.items()}

In [None]:
edges = [(vertex_indices[row['start']], vertex_indices[row['end']], row['dist']) for i, row in edgelist.iterrows()]

In [None]:
G = nx.Graph()

In [None]:
for k, v in vertex_indices.items():
    G.add_nodes_from([v], name=k)

In [None]:
G.add_weighted_edges_from(edges)

In [None]:
m = folium.Map(location=(feriloc.latitude, feriloc.longitude))

folium.GeoJson(
    geojson,
    name='geojson'
).add_to(m)

In [None]:
m