In [1]:
import osmnx as ox
import geopandas as gpd
import pandas as pd
import numpy as np

import re
from shapely import wkt
import ast

In [2]:
# Function to extract geo data from OSM
def geodata_to_df(country, city):

    G = ox.graph_from_place(city, network_type='bike')  # download raw geospatial data from OSM

    nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True)
    nodes["city"], edges["city"] = city, city
    nodes["country"], edges["country"] = country, country

    edges["lat_long"] = edges["geometry"].apply(lambda x: re.sub(r'[^0-9., ]', "", str([re.sub(r'[^0-9. ]', '', str(i)) for i in list(zip(x.xy[1], x.xy[0]))])))
    edges["geometry"] = edges["geometry"].apply(lambda x: wkt.dumps(x))

    edges["highway"] = edges["highway"].apply(lambda x: ", ".join(x) if x.__class__.__name__=="list" else x)
    edges["name"] = edges["name"].apply(lambda x: ", ".join(x) if x.__class__.__name__=="list" else x)
    edges["maxspeed"] = edges["maxspeed"].apply(lambda x: ", ".join(x) if x.__class__.__name__ == "list" else x)
    edges["ref"] = edges["ref"].apply(lambda x: ", ".join(x) if x.__class__.__name__ == "list" else x)
    edges["reversed"] = edges["reversed"].apply(lambda x: x[0] if x.__class__.__name__ == "list" else x)
    edges["oneway"] = edges["oneway"].apply(lambda x: x[0] if x.__class__.__name__ == "list" else x)

    edges.fillna(-99, inplace=True)
    nodes.fillna(-99, inplace=True)
    edges["name"] = edges["name"].astype(str).replace("-99", None)

    # nodes_and_edges = gpd.sjoin(edges, nodes, how="left", predicate="intersects")

    return G, nodes, edges

In [3]:
G, nodes, edges = geodata_to_df('Germany', 'Stuttgart')

  edges["geometry"] = edges["geometry"].apply(lambda x: wkt.dumps(x))


In [216]:
# Function to extract OSM features
def osm_features(city):
    # Get type of roads
    road_types = edges.reset_index()[['osmid','highway']]

    # Get road width
    width = edges.reset_index()[['osmid', 'width']]

    # Create a GeoDataFrame for intersections
    # intersections = gpd.GeoDataFrame(geometry=nodes.geometry)

    # Get nodes with the highway=traffic_signals tag (intersections with traffic lights)
    traffic_nodes = ox.features_from_place(city, tags={"highway": "traffic_signals"}).reset_index()[['osmid','highway']].rename(columns={'highway': 'trafficSignals'})

    # Get spots with bicycle parking
    bicycle_parking = ox.features_from_place(city, tags={"amenity": "bicycle_parking"}).reset_index()[['osmid','amenity']].rename(columns={'amenity': 'bicycleParking'})

    # Public transit options
    # Get tram stops
    transit_tram = ox.features_from_place(city, tags={"railway": 'tram_stop'}).reset_index()[['osmid','railway']].rename(columns={'railway': 'tramStop'})
    # Get bus stops
    transit_bus = ox.features_from_place(city, tags={"highway": 'bus_stop'}).reset_index()[['osmid','highway']].rename(columns={'highway': 'busStop'})

    # On street parking
    street_parking_right = ox.features_from_place(city, tags={"parking:right": True})['parking:right'].reset_index()[['osmid','parking:right']]
    street_parking_left = ox.features_from_place(city, tags={"parking:left": True})['parking:left'].reset_index()[['osmid','parking:left']]
    street_parking_both = ox.features_from_place(city, tags={"parking:both": True})['parking:both'].reset_index()[['osmid','parking:both']]
    
    # Merge all features
    geodfs_to_merge = [bicycle_parking, transit_tram, transit_bus,
                   street_parking_right, street_parking_left, street_parking_both]

    # Initial merge with nodes_and_edges
    merged_osm = traffic_nodes

    # Perform outer merges in a loop
    for geodf in geodfs_to_merge:
        merged_osm = merged_osm.merge(geodf, on='osmid', how='outer')
        
    return merged_osm

In [217]:
merged_osm = osm_features('Stuttgart')

merged_osm['rawScore'] = 0

# Function to calculate the raw score
def calculate_raw_score(row):
    raw_score = 0
    if row['trafficSignals'] == 'traffic_signals':
        raw_score += 1
    if row['bicycleParking'] == 'bicycle_parking':
        raw_score += 1
    if pd.isna(row['tramStop']):
        raw_score += 1
    if pd.isna(row['busStop']):
        raw_score += 1
    if pd.isna(row['parking:right']) or row['parking:right'] == 'no':
        raw_score += 1
    if pd.isna(row['parking:left']) or row['parking:left'] == 'no':
        raw_score += 1
    if pd.isna(row['parking:both']) or row['parking:both'] == 'no':
        raw_score += 1

    return raw_score

In [218]:
merged_osm['rawScore'] = merged_osm.apply(calculate_raw_score, axis=1)
merged_osm['scaledScore'] = merged_osm['rawScore'] / 7
merged_osm

Unnamed: 0,osmid,trafficSignals,bicycleParking,tramStop,busStop,parking:right,parking:left,parking:both,rawScore,scaledScore
0,483750,traffic_signals,,,,,,,6,0.857143
1,483761,traffic_signals,,,,,,,6,0.857143
2,490552,traffic_signals,,,,,,,6,0.857143
3,490721,traffic_signals,,,,,,,6,0.857143
4,498263,traffic_signals,,,,,,,6,0.857143
...,...,...,...,...,...,...,...,...,...,...
4525,1209687037,,,,,,,street_side,4,0.571429
4526,1209694031,,,,,,,no,5,0.714286
4527,1213878443,,,,,,,no,5,0.714286
4528,1213878444,,,,,,,no,5,0.714286


In [220]:
road_types = edges[['osmid','highway']]

# Function to map road type to score
def road_type_to_score(road_type):
    if re.search(r'\[.*residential.*\]', road_type):
        return 0.7
    elif re.search(r'\[.*service.*\]', road_type) or re.search(r'\[.*track.*\]', road_type):
        return 0.1
    elif re.search(r'\[.*living_street.*\]', road_type):
        return 0.7
    elif re.search(r'\[.*pedestrian.*\]', road_type):
        return 0.8
    elif re.search(r'\[.*cycleway.*\]', road_type):
        return 1
    elif re.search(r'\[.*primary.*\]', road_type) or re.search(r'\[.*primary_link.*\]', road_type):
        return 0.2
    elif re.search(r'\[.*tertiary.*\]', road_type) or re.search(r'\[.*tertiary_link.*\]', road_type):
        return 0.5
    elif re.search(r'\[.*secondary.*\]', road_type) or re.search(r'\[.*secondary_link.*\]', road_type):
        return 0.4
    elif road_type in ['service', 'track']:
        return 0.1
    elif road_type in ['primary', 'primary_link']:
        return 0.2
    elif road_type in ['secondary', 'secondary_link']:
        return 0.4
    elif road_type in ['tertiary', 'tertiary_link']:
        return 0.5
    elif road_type == 'unclassified':
        return 0.6
    elif road_type in ['residential', 'living_street']:
        return 0.7
    elif road_type in ['pedestrian', 'path']:
        return 0.8
    elif road_type == 'cycleway':
        return 1
    else:
        return 0

In [221]:
road_types['type_score'] = road_types['highway'].apply(road_type_to_score)
road_types

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  road_types['score'] = road_types['highway'].apply(map_road_type_to_score)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,highway,score
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
433908,1420150732,0,4067183,residential,0.7
433908,274695255,0,4067183,residential,0.7
433908,1420150685,0,128522353,service,0.1
433912,260753480,0,8115675,unclassified,0.6
433912,6053439226,0,8879915,unclassified,0.6
...,...,...,...,...,...
11375635419,11375635423,0,1226745579,track,0.1
11375635423,129997079,0,868287818,track,0.1
11375635423,129997091,0,868287818,track,0.1
11375635423,11375635419,0,1226745579,track,0.1


In [289]:
width = edges.reset_index()[['osmid', 'width']]

# Function to calculate the mean width
def calculate_mean_width(width):
    if isinstance(width, list):
        # Extract numeric values from the list and calculate the mean
        values = [float(re.search(r'-?\d+\.\d+', str(val)).group()) for val in width if re.search(r'-?\d+\.\d+', str(val))]
        if values:
            return np.mean(values)
    else:
        # Handle single numeric value or other cases
        return float(re.search(r'-?\d+\.\d+', str(width)).group()) if re.search(r'-?\d+\.\d+', str(width)) else np.nan
    
width['mean_width'] = width['width'].apply(calculate_mean_width)

In [290]:
# Function to map width to score
def width_score(width):
    if width <= 10:
        return width / 10
    elif width > 10:
        return 1
    else:
        return None

In [291]:
width['width_score'] = width['mean_width'].apply(width_score)
width['width_score'].value_counts(dropna=False)

width_score
NaN         78634
0.250000     2509
0.350000      904
0.150000      729
0.050000      649
            ...  
0.965000        1
0.475000        1
0.855000        1
0.935000        1
0.883333        1
Name: count, Length: 136, dtype: int64

In [10]:
probeCount = pd.read_csv('probeCount.csv')
probeCount

mergedtom = pd.merge(edges, probeCount, left_on='name', right_on='streetName', how='outer')

MemoryError: Unable to allocate 4.61 GiB for an array with shape (618434407, 1) and data type object

In [8]:
edges

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,osmid,name,highway,maxspeed,oneway,reversed,length,geometry,service,access,...,lanes,ref,bridge,tunnel,width,est_width,area,city,country,lat_long
u,v,key,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
433908,1420150732,0,4067183,Friolzheimer Straße,residential,50,False,False,31.438,LINESTRING (9.0915728999999992 48.821332499999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.8213325 9.0915729, 48.8214058 9.0912874, 48..."
433908,274695255,0,4067183,Friolzheimer Straße,residential,50,False,True,40.027,LINESTRING (9.0915728999999992 48.821332499999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.8213325 9.0915729, 48.8212212 9.0919702, 48..."
433908,1420150685,0,128522353,,service,-99,False,False,43.644,LINESTRING (9.0915728999999992 48.821332499999...,parking_aisle,permissive,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.8213325 9.0915729, 48.8212719 9.0915312, 48..."
433912,260753480,0,8115675,Kannengießerstraße,unclassified,50,False,True,29.361,LINESTRING (9.0943289000000007 48.823427899999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.8234279 9.0943289, 48.8234074 9.0941412, 48..."
433912,6053439226,0,8879915,Ingersheimer Straße,unclassified,50,False,True,171.820,LINESTRING (9.0943289000000007 48.823427899999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.8234279 9.0943289, 48.8233771 9.0943193, 48..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11375635419,11375635423,0,1226745579,,track,-99,False,True,93.968,LINESTRING (9.2221171999999996 48.695236999999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.695237 9.2221172, 48.6955522 9.2220367, 48...."
11375635423,129997079,0,868287818,,track,-99,False,False,3.503,LINESTRING (9.2218698000000003 48.696065599999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.6960656 9.2218698, 48.6960645 9.2218221"
11375635423,129997091,0,868287818,,track,-99,False,True,147.179,LINESTRING (9.2218698000000003 48.696065599999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.6960656 9.2218698, 48.6961117 9.2238739"
11375635423,11375635419,0,1226745579,,track,-99,False,False,93.968,LINESTRING (9.2218698000000003 48.696065599999...,-99,-99,...,-99,-99,-99,-99,-99,-99,-99,Stuttgart,Germany,"48.6960656 9.2218698, 48.6959218 9.2219268, 48..."
