In [None]:
import os
import sys
import pickle
import torch
import math
from tqdm import tqdm 

import osmnx as ox
import geopandas as gpd

sys.path.append("../..")

from cellspace import CellSpace
from node2vec import train_node2vec

from pipelines.utils import ROOT_DIR, load_config
from models.utils import meters2lonlat, lonlat2meters

In [None]:
data_config = load_config(name='porto', ctype="dataset")

In [None]:
x_min, y_min = lonlat2meters(data_config['min_lon'], data_config['min_lat'])
x_max, y_max = lonlat2meters(data_config['max_lon'], data_config['max_lat'])
x_min -= data_config['cellspace_buffer']
y_min -= data_config['cellspace_buffer']
x_max += data_config['cellspace_buffer']
y_max += data_config['cellspace_buffer']

In [None]:
cell_size = int(data_config['cell_size'])
cs = CellSpace(cell_size, cell_size, x_min, y_min, x_max, y_max)

In [None]:
# Todo: safe Cellspace

In [None]:
# Obtain for each cell the POIs inside it
# def get_cell_pois(cs):
#     for i in range(cs.x_size):
#         # obtain (x_i, y_i) of cell i
#         x_i, y_i = cs.get_xyidx_by_cellid(i)
#         # Get bbox
#         x1,y1, x2, y2 = cs.get_mbr(x_i, y_i)
#         # Get lon, lats
#         west_lon, south_lat = meters2lonlat(x1, y1)
#         east_lon, north_lat = meters2lonlat(x2, y2)
#         # Get POIs
#         tags = {"amenity": True, "building": True, "craft": True, "healthcare": True, 'highway':True, 'landuse':True, "office": True, "leisure": True, "shop": True, "tourism": True, "sport": True}
#         ox.features.features_from_bbox(north_lat, south_lat, east_lon, west_lon,  tags = tags)



In [None]:
# tags = {"amenity": True, "building": True, 'landuse':True, "office": True, "leisure": True, "shop": True, "tourism": True}
# pois_tag = ox.features.features_from_place("Porto, Portugal", tags = tags)

### New Approach

Obtain df with category, point

In [None]:
cell_gdf = cs.get_celldf()

In [None]:
len(cell_gdf)

In [None]:
cell_gdf.iloc[-1]

#### Get POI for whole grid

In [None]:
# https://wiki.openstreetmap.org/wiki/Map_Features
def preprocess_poi(poi_df, tags = ["healthcare", "amenity", "craft", "tourism", "office", "leisure", "shop", "building"]  ):
    
    poi_df_cat = poi_df[tags + ["geometry"]].copy()
    #poi_df_cat = poi_df_cat.loc["node", :]
    poi_df_cat.loc[:, "poi"] = poi_df_cat[tags].bfill(axis=1).iloc[:, 0]
    poi_df_cat.loc[:, "poi"] = poi_df_cat["poi"].astype('category')  
    poi_df_cat.loc[:, "category"] = poi_df_cat[tags].notnull().idxmax(axis=1)
    poi_df_cat = poi_df_cat[["poi", "category", "geometry"]].dropna(axis=0)
    # to gdf
    poi_df_cat = gpd.GeoDataFrame(poi_df_cat, geometry='geometry', crs='EPSG:4326')
    return poi_df_cat

In [None]:
# Need to select which tags we use, and if we divide into subgroups, especially for amenity
tags = {"healthcare": True, "amenity": True, "craft": True, "tourism": True, "office": True, "leisure": True, "shop": True, "building": True}
tags = {"amenity": True}
poi_df = ox.features.features_from_bbox(data_config['min_lat'], data_config['max_lat'], data_config['min_lon'], data_config['max_lon'], tags)

In [None]:
poi_df_procesed = preprocess_poi(poi_df)

In [None]:
len(cell_gdf)

In [None]:
# Get the distinct values of the 'category' column in poi_df_procesed
categories = poi_df_procesed['category'].unique()

# Create a spatial join between cell_gdf and poi_df_procesed
spatial_join = gpd.sjoin(cell_gdf, poi_df_procesed, how='left', op='intersects')

# Group the spatial join by the 'cell_id' column and count the number of POIs of each category within each cell
category_counts = spatial_join.groupby('cell_id')['category'].value_counts().unstack().fillna(0).astype(int)

# Add the category counts as new columns in cell_gdf
#cell_gdf = cell_gdf.merge(category_counts, left_on='cell_id', right_index=True, how='left').fillna(0)

In [None]:
len(cell_gdf)

In [None]:
cell_gdf = cell_gdf.merge(category_counts, left_on='cell_id', right_index=True, how='left').fillna(0)

In [None]:
len(cell_gdf)

In [None]:
# For features we want also to use the x_i, y_i from cell_tuple
cell_gdf['x'] = cell_gdf['cell_tuple'].apply(lambda x: x[0])
cell_gdf['y'] = cell_gdf['cell_tuple'].apply(lambda x: x[1])


In [None]:
cell_gdf

In [None]:
# Final cell feature matrix
cell_gdf.iloc[:,4:].values

In [None]:
#### Print Cells which do not have POIs

cell_gdf_nopois = cell_gdf[cell_gdf.iloc[:, 4:].eq(0).all(axis=1)]

import folium
from folium import GeoJson

# Get the centroid of the first polygon
first_polygon = cell_gdf_nopois.iloc[0].geometry
centroid = first_polygon.centroid

# Create a Folium map centered at the centroid of the first polygon
m = folium.Map(location=[centroid.y, centroid.x], zoom_start=13)

# Add the polygons from the geometry column to the map
GeoJson(cell_gdf_nopois).add_to(m)

# Display the map
#m


In [None]:
cell_gdf2.iloc[35364]

In [None]:
category_counts

In [None]:
cell_gdf.columns

In [None]:
cell_gdf

In [None]:
poi_df_procesed

### Try TF-IDF approach

In [None]:
# Create a list for each cell, containing the POI categories in that cell.
grid_poi_list = []
for index, row in tqdm(cell_gdf.iterrows()):
    mask = poi_df_procesed.within(row['geometry'])
    grid_poi_list.append(list(poi_df_procesed.loc[mask].category))

In [None]:
# Iterate over grid_poi_list and count if the list is empty
count = 0
count_empty = 0
for i in range(0, len(grid_poi_list)):
    if len(grid_poi_list[i]) != 0:
        count = count+1
    else:
        count_empty = count_empty+1
print(count)
print(count_empty)

In [None]:
# Iterate over grid_poi_list an create a corpus
corpus = []
for cell_poi in grid_poi_list:
    if len(cell_poi) != 0:
        string = ' '.join(cell_poi) # Obtain a string from the list of strings
        # add to corpus
        corpus.append(string)   

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Create a corpus of documents (in this example, just two documents)
#corpus = ['The car is driven on the road.', 'The truck is driven on the highway.']

# Initialize the TfidfVectorizer
vectorizer = TfidfVectorizer()

# Compute the TF-IDF vectors for the corpus
tfidf_vectors = vectorizer.fit_transform(corpus)

# The resulting vectors are stored in a sparse matrix
print(tfidf_vectors.todense())


In [None]:
tfidf_vectors.todense().shape

In [None]:
# Next:
# use the output as feats vector
# we need further feats: x_i, y_i, Space2vec?

# Get flows?
# Embeddng Model, similar to road, or even same