In [None]:
import geopandas as gpd
import pandas as pd
from shapely import geometry
import os
import time

from srai.constants import REGIONS_INDEX, WGS84_CRS
from srai.embedders import CountEmbedder
from srai.joiners import IntersectionJoiner
from srai.loaders.osm_loaders import OSMOnlineLoader, OSMPbfLoader
from srai.loaders.osm_loaders.filters import HEX2VEC_FILTER
from srai.regionizers import H3Regionizer, SlippyMapRegionizer
from srai.utils import geocode_to_region_gdf

In [None]:

def get_embedding_for_region(region_name: str) -> pd.DataFrame:
    bbox_gdf = geocode_to_region_gdf(region_name)
    regionizer = SlippyMapRegionizer(zoom=16)
    regions_gdf = regionizer.transform(bbox_gdf)
    path = f"data/pbfs/{region_name}"
    if not os.path.exists(path):
        os.makedirs(path)
    loader = OSMPbfLoader(download_directory=path)
    features_gdf = loader.load(bbox_gdf, tags=HEX2VEC_FILTER)
    joiner = IntersectionJoiner()
    joint_gdf = joiner.transform(regions_gdf, features_gdf)
    embedder = CountEmbedder()
    embedding = embedder.transform(regions_gdf, features_gdf, joint_gdf)
    return embedding

In [None]:
from tqdm import tqdm
from datetime import datetime

errors = []
with open('data/cities_v2.txt', 'r') as cities_file:
    for city in cities_file:
        print(city)
        city = city.replace("\n", "")
        path = f"data/embeddings/{city}.pkl"
        path2 = f"data/pbfs/{city}"
        if os.path.exists(path) and os.path.exists(path2):
            continue
        done = False
        while not done:
            try:
                df = get_embedding_for_region(city)
                df.to_pickle(path)
                done = True
            except Exception as e:
                now = datetime.now()
                current_time = now.strftime("%H:%M:%S")
                print(f"{current_time}: failed at {city}")
                print()
                time.sleep(600)