In [1]:
import osmnx as ox
import networkx as nx
import pandas as pd

In [2]:
data_df = pd.read_csv('data/people-with-companies.csv')
company_df = pd.read_csv('data/ratisbona-companies.csv')

# merge columns into one string
data_df["address"] = data_df['street'] + ' ' + data_df['street_number'] + ', ' + data_df['city']
company_df["address"] = company_df['street'] + ' ' + company_df['street_number'] + ', ' + company_df['city']

data_df = data_df[data_df["city"] == "Regensburg"]
# drop columns
data_df = data_df.drop(columns=['street', 'street_number', 'city', 'gender_y', 'postal_code'])
company_df = company_df.drop(columns=['street', 'street_number', 'city', 'postal_code'])

clean_data_df = data_df.merge(company_df, left_on='company_id', right_on='id', how='left', suffixes=('', '_company'))

clean_data_df = clean_data_df.drop(columns=['id_company',  'company_id'])

In [62]:
clean_data_df.to_csv('data/people-with-companies-clean.csv', index=False)

In [7]:
#clean_data_df.to_csv('data/people-with-companies-clean.csv', index=False)

In [8]:
ox.distance.great_circle_vec(49.004139227404124, 12.09650733163438, 49.00819304523213, 12.118651650304834)

1676.95822158135

In [3]:
clean_data_df["address"].values[0]

'Am Kreuzhof 2, Regensburg'

In [3]:
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
import pandas as pd
import osmnx as ox

# Define the car driver's departure and arrival addresses
car_driver_departure_address = clean_data_df["address"].values[0]
car_driver_arrival_address = clean_data_df["address_company"].values[0]

# Create a DataFrame with other people's details

# Retrieve the street network for the car driver's area
G = ox.graph_from_address(car_driver_arrival_address, network_type='drive', dist=500)

In [4]:
car_driver_departure_location = ox.geocode(car_driver_departure_address)
car_driver_arrival_location = ox.geocode(car_driver_arrival_address)


# Calculate distances between car driver's departure and arrival locations
car_driver_departure_node = ox.distance.nearest_nodes(G, car_driver_departure_location[0], car_driver_departure_location[1])
car_driver_arrival_node = ox.distance.nearest_nodes(G, car_driver_arrival_location[0], car_driver_arrival_location[1])

In [6]:
# Create a graph representing the connections
graph = nx.Graph()

# Add nodes for car driver, passengers, and destination
graph.add_node("Car Driver", pos=car_driver_departure_location)
graph.add_node("Destination", pos=car_driver_arrival_location)

# Add nodes for passengers
try:
    for _, row in clean_data_df.iterrows():
        graph.add_node(row['first_name'], pos=ox.geocode(row['address']))
    print("node finished")
    # Connect car driver to passengers
    for _, row in clean_data_df.iterrows():
        graph.add_edge("Car Driver", row['first_name'])
    print("edge finished")
    # Connect passengers to the destination
    for _, row in clean_data_df.iterrows():
        graph.add_edge(row['first_name'], "address_company")
    print("edge finished")
except:
    print("error")
    pass
# Find the shortest path connecting all nodes
shortest_path = nx.shortest_path(graph, source="Car Driver", target="Destination", weight='length')

# Print the shortest path
print("Shortest path:", shortest_path)

error


NetworkXNoPath: No path between Car Driver and Destination.

In [None]:


# Calculate distances between each person's departure and arrival locations and car driver's locations
clean_data_df['Departure Distance'] = clean_data_df['address'].apply(lambda x: nx.shortest_path_length(G, car_driver_departure_node, ox.distance.nearest_nodes(G, *ox.geocode(x).values()), weight='length'))
clean_data_df['Arrival Distance'] = clean_data_df['address_company'].apply(lambda x: nx.shortest_path_length(G, ox.distance.nearest_nodes(G, *ox.geocode(x).values()), car_driver_arrival_node, weight='length'))
clean_data_df
# Filter the DataFrame to find people who are closer to the destination than the car driver
can_take_with_us = people_df[(people_df['Departure Distance'] <= people_df['Arrival Distance'])]

# Create a list of locations (car driver's departure node and people's departure nodes)
locations = [car_driver_departure_node] + can_take_with_us['Departure Address'].apply(lambda x: ox.distance.nearest_nodes(G, *ox.geocode(x).values())).tolist()

# Calculate distance matrix between locations
dist_matrix = []
for i in range(len(locations)):
    row = []
    for j in range(len(locations)):
        if i == j:
            row.append(0)
        else:
            row.append(nx.shortest_path_length(G, locations[i], locations[j], weight='length'))
    dist_matrix.append(row)

# Create a routing model
num_locations = len(locations)
num_vehicles = 1
depot = 0

routing = pywrapcp.RoutingModel(num_locations, num_vehicles, depot)
search_parameters = pywrapcp.DefaultRoutingSearchParameters()

# Set the distance callback
def distance_callback(from_index, to_index):
    return dist_matrix[from_index][to_index]

transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

# Solve the TSP
search_parameters.first_solution_strategy = (
    routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)

solution = routing.SolveWithParameters(search_parameters)

# Extract the route
route = []
index = routing.Start(0)
while not routing.IsEnd(index):
    node_index = routing.IndexToNode(index)
    route.append(locations[node_index])
    index = solution.Value(routing.NextVar(index))

# Print the route
print("Route:", route)

In [41]:
from networkx.classes.multidigraph import MultiDiGraph
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import osmnx
from typing import Tuple, List
from pandarallel import pandarallel

def get_location_from_address(address: str):

    try:
        locator = Nominatim(user_agent = "roadio")
        location = locator.geocode(address)
    except GeocoderTimedOut:
        return get_location_from_address(address)

    return location.latitude, location.longitude

def get_graph(address_orig: str, address_dest: str):

    MARGIN = 0.1

    # find location by address
    location_orig = get_location_from_address(address_orig)
    location_dest = get_location_from_address(address_dest)

    north = max(location_orig[0],location_dest[0]) + MARGIN
    south = min(location_orig[0],location_dest[0]) - MARGIN
    west = max(location_orig[1],location_dest[1]) + MARGIN
    east = min(location_orig[1],location_dest[1]) - MARGIN

    graph = osmnx.graph.graph_from_bbox(north, south, east, west, network_type='drive', clean_periphery=False)

    return graph, location_orig, location_dest


def get_graph_from_mode(address_orig: str, address_dest: str, mode: str, city: str="Brussels", dist: float=1000.):

    assert mode in ['place', 'address']

    # find location by address
    location_orig = get_location_from_address(address_orig)
    location_dest = get_location_from_address(address_dest)

    if mode == 'place':
        graph = osmnx.graph_from_place(city, network_type = 'drive')
    else:
        graph = osmnx.graph.graph_from_address(address_orig, dist=dist, dist_type='bbox', network_type = 'drive')

    return graph, location_orig, location_dest


def find_shortest_path(graph: MultiDiGraph, location_orig: Tuple[float], location_dest: Tuple[float], optimizer: str) -> List[int]:
    # find the nearest node to the departure and arrival location
    node_orig = osmnx.get_nearest_node(graph, location_orig)
    node_dest = osmnx.get_nearest_node(graph, location_dest)
    
    route = nx.shortest_path(graph, node_orig, node_dest, weight=optimizer.lower())
    return route, node_orig, node_dest

In [42]:
def get_route(address_orig: str, address_dest: str) -> List[int]:
    graph, location_orig, location_dest = get_graph(address_orig, address_dest)
    route, node_orig, node_dest = find_shortest_path(graph, location_orig, location_dest, 'time')
    return route, node_orig, node_dest


In [50]:
clean_data_df

Unnamed: 0,id,first_name,last_name,email,gender,lat,lon,address,lat_company,lon_company,name,address_company
0,1,Linnet,Willoughby,lwilloughby0@apache.org,Female,49.006584,12.182435,"Am Kreuzhof 2, Regensburg",49.016493,12.127319,Autolackiererei Baumer GmbH,"Auweg 11 b, Regensburg"
1,2,Aaron,Zanussii,azanussii1@hao123.com,Male,49.004452,11.975334,"Regensburger Straße 12, Sinzing",49.025878,12.116306,Tom + Peter Gastro GmbH + Co. KG,"Weichser Weg 5, Regensburg"
2,3,Ansley,Lemmon,alemmon2@redcross.org,Female,49.004848,11.980031,"Regensburger Straße 21, Sinzing",49.014438,12.092961,Thurn + Taxis Immobilien Service GmbH + Co. Ob...,"Emmeramsplatz 5, Regensburg"
3,4,Brad,Pettis,bpettis3@unesco.org,Male,49.005052,12.115240,"Hornstraße 6, Regensburg",49.028822,12.099478,Eco Management Solarrente 2 GmbH,"Drehergasse 20, Regensburg"
4,5,Gertrude,Sarson,gsarson4@cpanel.net,Female,49.005403,12.117833,"Kleiberstraße 2, Regensburg",49.019810,12.089037,DSVW GmbH + Co. KG,"Arnulfsplatz 4, Regensburg"
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,Maure,Looks,mlooksrn@miitbeian.gov.cn,Female,49.019350,12.043527,"Kurt-Schumacher-Straße 21, Regensburg",49.028822,12.099478,Eco Invest Solarrente 4 GmbH + Co. KG,"Drehergasse 20, Regensburg"
9996,9997,Shepard,Stannislawski,sstannislawskiro@columbia.edu,Male,49.019266,12.044349,"Zirklstraße 22, Regensburg",49.019775,12.104956,K+P Krankenhausplanungsgruppe GmbH,"Donaulände 7, Regensburg"
9997,9998,Madeline,Crux,mcruxrp@stanford.edu,Agender,49.018586,12.043858,"Kurt-Schumacher-Straße 23, Regensburg",49.020347,12.091675,Blochberger + Weiss GmbH,"Am Römling 6, Regensburg"
9998,9999,Colet,Swadlen,cswadlenrq@privacy.gov.au,Male,49.019221,12.044345,"Zirklstraße 24, Regensburg",49.019948,12.094743,FORUM TOURISMUS REGENSBURG e.V.,"Rathausplatz 3, Regensburg"


In [43]:
names = ["Linnet Willoughby", "Brad Pettis", "Gertrude Sarson", "Minni	Narracott"]

filtered_first_names = [name.split()[0] for name in names]
filtered_last_names = [name.split()[1] for name in names]

# Filtering the dataframe
filtered_df = clean_data_df[(clean_data_df['first_name'].isin(filtered_first_names)) & (clean_data_df['last_name'].isin(filtered_last_names))]

# Displaying the filtered dataframe
filtered_df.head()

Unnamed: 0,id,first_name,last_name,email,gender,lat,lon,address,lat_company,lon_company,name,address_company
0,1,Linnet,Willoughby,lwilloughby0@apache.org,Female,49.006584,12.182435,"Am Kreuzhof 2, Regensburg",49.016493,12.127319,Autolackiererei Baumer GmbH,"Auweg 11 b, Regensburg"
3,4,Brad,Pettis,bpettis3@unesco.org,Male,49.005052,12.11524,"Hornstraße 6, Regensburg",49.028822,12.099478,Eco Management Solarrente 2 GmbH,"Drehergasse 20, Regensburg"
4,5,Gertrude,Sarson,gsarson4@cpanel.net,Female,49.005403,12.117833,"Kleiberstraße 2, Regensburg",49.01981,12.089037,DSVW GmbH + Co. KG,"Arnulfsplatz 4, Regensburg"
5,6,Minni,Narracott,mnarracott5@blogger.com,Female,48.994003,12.109968,"Franz-Hartl-Straße 1, Regensburg",49.018992,12.097724,LEDA Handelsgesellschaft mbH,"Domplatz 4, Regensburg"


In [44]:
filtered_df[["routes", "node_orig", "node_dest"]] =  filtered_df.apply(lambda row: get_route(row["address"], row["address_company"]), axis=1, result_type="expand")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[["routes", "node_orig", "node_dest"]] =  filtered_df.apply(lambda row: get_route(row["address"], row["address_company"]), axis=1, result_type="expand")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[["routes", "node_orig", "node_dest"]] =  filtered_df.apply(lambda row: get_route(row["address"], row["address_company"]), axis=1, result_type="expand")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in

In [47]:
filtered_df

Unnamed: 0,id,first_name,last_name,email,gender,lat,lon,address,lat_company,lon_company,name,address_company,routes,node_orig,node_dest
0,1,Linnet,Willoughby,lwilloughby0@apache.org,Female,49.006584,12.182435,"Am Kreuzhof 2, Regensburg",49.016493,12.127319,Autolackiererei Baumer GmbH,"Auweg 11 b, Regensburg","[61273644, 61273529, 61273544, 1022965332, 839...",61273644,76718269
3,4,Brad,Pettis,bpettis3@unesco.org,Male,49.005052,12.11524,"Hornstraße 6, Regensburg",49.028822,12.099478,Eco Management Solarrente 2 GmbH,"Drehergasse 20, Regensburg","[73159996, 60538399, 6678200612, 247646199, 27...",73159996,52061185
4,5,Gertrude,Sarson,gsarson4@cpanel.net,Female,49.005403,12.117833,"Kleiberstraße 2, Regensburg",49.01981,12.089037,DSVW GmbH + Co. KG,"Arnulfsplatz 4, Regensburg","[268458030, 268458027, 60538393, 60538394, 605...",268458030,1219408575
5,6,Minni,Narracott,mnarracott5@blogger.com,Female,48.994003,12.109968,"Franz-Hartl-Straße 1, Regensburg",49.018992,12.097724,LEDA Handelsgesellschaft mbH,"Domplatz 4, Regensburg","[134670791, 506484375, 68747114, 272849805, 10...",134670791,725358683


In [48]:
filtered_df.to_csv('data/filtered.csv', index=False)

In [None]:
import streamlit_authenticator as stauth

hashed_passwords = stauth.Hasher(['1234', '1234', "1234", "1234"]).generate()
print(hashed_passwords)

['$2b$12$1OXeDoHxxOvpyvVRgUfjeuQ/zxJpPzph0.NOyoI9jW5fAKyR0I2zC', 
 '$2b$12$imW3I4NK7wBh82LxFap0x..qTu4iwK2inlbCNO8hTvpqT.42IMJ6C', 
 '$2b$12$YC2nOXJ8L7R3t.4I9ngzlu3uS10tQAYv9WhPUE7VkNmu6NKDrpJXS', 
 '$2b$12$rMu2b3vIzIvWIcKNZ76Q7.TtQcYAm9vneyGmL2CqKS0DAXl29/4zS' 