In [None]:
import pandas as pd
df = pd.read_csv('/Users/tamasmakos/dev/survey_methods_room/data_files/output_files/address_geocoded.csv')

In [5]:
import osmnx as ox
import pandas as pd
import geopandas as gpd
import networkx as nx
import pickle
from shapely.geometry import Point

def save_hungarian_network():
    ox.config(use_cache=True)
    G = ox.graph_from_place('Hungary', network_type='drive')
    with open('hungary_network.pkl', 'wb') as f:
        pickle.dump(G, f)

def load_hungarian_network():
    with open('hungary_network.pkl', 'rb') as f:
        return pickle.load(f)

def analyze_hungarian_network(df, lat_col, lon_col):
    # Load the pre-saved road network of Hungary
    G = load_hungarian_network()

    # Convert the graph to a GeoDataFrame
    gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)

    # Take a DataFrame with lat and lon and convert to GeoDataFrame
    df['geometry'] = df.apply(lambda row: Point(row[lon_col], row[lat_col]), axis=1)
    geo_df = gpd.GeoDataFrame(df, geometry='geometry')

    # Calculate centrality measures
    centrality_measures = {
        'closeness_centrality': nx.closeness_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G),
        'degree_centrality': nx.degree_centrality(G),
        'load_centrality': nx.load_centrality(G),
        'eigen_centrality': nx.eigenvector_centrality(G),
        'katz_centrality': nx.katz_centrality(G),
        'harmonic_centrality': nx.harmonic_centrality(G),
        'clustering': nx.clustering(G),
        'average_neighbor_degree': nx.average_neighbor_degree(G),
        'square_clustering': nx.square_clustering(G),
        'pagerank': nx.pagerank(G),
        'communicability_betweenness_centrality': nx.communicability_betweenness_centrality(G),
    }

    # Map centrality values to the nodes in the GeoDataFrame
    for measure, values in centrality_measures.items():
        gdf_nodes[measure] = gdf_nodes.index.map(values)

    # Perform spatial join to associate centrality with points in geo_df
    for measure in centrality_measures.keys():
        geo_df_with_centrality = gpd.sjoin(geo_df, gdf_nodes[[measure]], how='left', op='nearest')
        geo_df[measure] = geo_df_with_centrality[measure]

    return geo_df

In [6]:
save_hungarian_network()

  ox.config(use_cache=True)


In [7]:
result_gdf_net = analyze_hungarian_network(df.head(10), 'lat', 'lon')
print(result_gdf_net)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['geometry'] = df.apply(lambda row: Point(row[lon_col], row[lat_col]), axis=1)


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/tamasmakos/Library/Python/3.11/lib/python/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/sb/_vwyhkqj2rqfjwlpwnlh2fzc0000gp/T/ipykernel_93583/852151362.py", line 1, in <module>
    result_gdf_net = analyze_hungarian_network(df.head(10), 'lat', 'lon')
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/sb/_vwyhkqj2rqfjwlpwnlh2fzc0000gp/T/ipykernel_93583/2655409572.py", line 31, in analyze_hungarian_network
    'closeness_centrality': nx.closeness_centrality(G),
                            ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/networkx/algorithms/centrality/closeness.py", line 122, in closeness_centrality
    sp = path_length(G, n)
         ^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/networkx/classes/backends.py", line 148, in wrapper


In [None]:
import osmnx as ox
import geopandas as gpd
from geopy.geocoders import Nominatim
from shapely.geometry import Point, Polygon

def add_building_footprints(df, address_col):
    """
    Add building footprints to a dataframe containing addresses.
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing addresses.
    address_col (str): Column name for addresses.

    Returns:
    geopandas.GeoDataFrame: GeoDataFrame with an additional column for building footprints.
    """
    # Initialize geocoder
    geolocator = Nominatim(user_agent="geoapiExercises")

    # Geocode addresses
    df['coords'] = df[address_col].apply(lambda x: geolocator.geocode(x))
    df['geometry'] = df['coords'].apply(lambda x: Point(x.longitude, x.latitude) if x else None)

    # Convert DataFrame to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry='geometry')

    # Initialize column for building footprints
    gdf['building_footprint'] = None

    for index, row in gdf.iterrows():
        if row['geometry']:
            # Query OSM for building footprints around the point
            buildings = ox.geometries_from_point(center_point=(row['geometry'].y, row['geometry'].x), tags={'building': True}, dist=100)
            
            # Find the building footprint containing the geocoded point
            for _, building in buildings.iterrows():
                if isinstance(building['geometry'], Polygon) and building['geometry'].contains(row['geometry']):
                    gdf.at[index, 'building_footprint'] = building['geometry']
                    break

    # Drop unnecessary columns
    gdf = gdf.drop(columns=['coords'])

    return gdf

In [None]:
result_gdf_net_bfp = add_building_footprints(result_gdf_net, 'Address')
print(result_gdf_net_bfp)

In [None]:
import osmnx as ox
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

def count_pois_around_addresses(df, lat_col, lon_col, distance=1000):
    """
    Count Points of Interest around addresses in the DataFrame.
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing latitude and longitude.
    lat_col (str): Column name for latitude.
    lon_col (str): Column name for longitude.
    distance (int): Distance around the point to consider for counting POIs (in meters).
    
    Returns:
    pandas.DataFrame: DataFrame with an additional column for POI counts.
    """
    # Convert DataFrame to GeoDataFrame
    df['geometry'] = [Point(xy) for xy in zip(df[lon_col], df[lat_col])]
    gdf = gpd.GeoDataFrame(df, geometry='geometry')

    # Initialize POI count column
    gdf['poi_count'] = 0

    # Create a buffer around each point
    gdf['buffer'] = gdf.geometry.buffer(distance)

    for index, row in gdf.iterrows():
        # Query OSM for POIs within the buffer
        pois = ox.geometries_from_point(center_point=(row[lat_col], row[lon_col]), tags={'amenity': True}, dist=distance)
        # Count POIs and update the DataFrame
        gdf.at[index, 'poi_count'] = len(pois)

    # Drop the buffer column
    gdf = gdf.drop(columns=['buffer'])

    return gdf

# Example usage:
# df = pd.DataFrame({'Latitude': [47.4979, 47.1625], 'Longitude': [19.0402, 19.5033]})
# result_df = count_pois_around_addresses(df, 'Latitude', 'Longitude', distance=1000)
# print(result_df)
