In [1]:
from dotenv import load_dotenv
from kaggle.api.kaggle_api_extended import KaggleApi

import pandas as pd
import h3
import geopandas as gpd
from shapely.geometry import Polygon
import folium
import branca.colormap as cm

import numpy as np

In [None]:
# Descargar dataset

# Cargar el archivo .env
load_dotenv()

# Inicializa la API de Kaggle
api = KaggleApi()
api.authenticate()

# Descarga un dataset
dataset = 'gidutz/autotel-shared-car-locations'

dataset_folder = dataset.split(sep='/')[-1]

output_path = f'datasets/{dataset_folder}/'  # Carpeta de descarga
api.dataset_download_files(dataset, path=output_path, unzip=True)

In [2]:
df = pd.read_csv(f'datasets/autotel-shared-car-locations/2020_02_25.csv')
print(df.shape)
df = df[df['total_cars'] >= 2]
print(df.shape)
# Convertir a listas asegurando que todos los valores sean listas
df['cars_list'] = df['cars_list'].apply(lambda x:
                                        eval(x) if isinstance(x, str)
                                        else (x if isinstance(x, list) else []))

df['timestamp'] = pd.to_datetime(df['timestamp'])
df['month'] = df['timestamp'].dt.strftime('%Y-%m')

(20049198, 5)
(1742287, 5)


In [5]:
df['h3_index'] = df.apply(lambda row: h3.geo_to_h3(row['latitude'],
                                                   row['longitude'], 9), axis=1)

print(df.shape)
df.head()

(1742287, 7)


Unnamed: 0,latitude,longitude,total_cars,cars_list,timestamp,month,h3_index
18,32.095042,34.792425,2,"[227, 243]",2020-02-25 10:48:02+00:00,2020-02,892db0cce97ffff
19,32.111344,34.801714,2,"[246, 272]",2020-02-25 10:48:02+00:00,2020-02,892db0ccc2bffff
20,32.1097,34.8408,2,"[134, 235]",2020-02-25 10:48:02+00:00,2020-02,892db0ccb87ffff
49,32.083,34.7806,2,"[45, 66]",2020-02-25 10:48:02+00:00,2020-02,892db0cc147ffff
50,32.086848,34.788229,2,"[52, 139]",2020-02-25 10:48:02+00:00,2020-02,892db0cc16fffff


In [3]:
df['h3_index'] = np.vectorize(h3.geo_to_h3)(df['latitude'].values,
                                            df['longitude'].values, 9)

print(df.shape)
df.head()

(1742287, 7)


Unnamed: 0,latitude,longitude,total_cars,cars_list,timestamp,month,h3_index
18,32.095042,34.792425,2,"[227, 243]",2020-02-25 10:48:02+00:00,2020-02,892db0cce97ffff
19,32.111344,34.801714,2,"[246, 272]",2020-02-25 10:48:02+00:00,2020-02,892db0ccc2bffff
20,32.1097,34.8408,2,"[134, 235]",2020-02-25 10:48:02+00:00,2020-02,892db0ccb87ffff
49,32.083,34.7806,2,"[45, 66]",2020-02-25 10:48:02+00:00,2020-02,892db0cc147ffff
50,32.086848,34.788229,2,"[52, 139]",2020-02-25 10:48:02+00:00,2020-02,892db0cc16fffff


In [4]:
df['h3_index'] = [h3.geo_to_h3(lat, lon, 9) for lat, lon in zip(df['latitude'],
                                                                df['longitude'])]

print(df.shape)
df.head()

(1742287, 7)


Unnamed: 0,latitude,longitude,total_cars,cars_list,timestamp,month,h3_index
18,32.095042,34.792425,2,"[227, 243]",2020-02-25 10:48:02+00:00,2020-02,892db0cce97ffff
19,32.111344,34.801714,2,"[246, 272]",2020-02-25 10:48:02+00:00,2020-02,892db0ccc2bffff
20,32.1097,34.8408,2,"[134, 235]",2020-02-25 10:48:02+00:00,2020-02,892db0ccb87ffff
49,32.083,34.7806,2,"[45, 66]",2020-02-25 10:48:02+00:00,2020-02,892db0cc147ffff
50,32.086848,34.788229,2,"[52, 139]",2020-02-25 10:48:02+00:00,2020-02,892db0cc16fffff


In [3]:
df_month = df.groupby(['h3_index', 'month']).apply(
                lambda group: pd.Series({
                    'n_bookings': round(group['total_cars'].sum(), 1),
                    'cars_list': list(set(sum(group['cars_list'], []))),
                }), include_groups=False
            ).reset_index()

df_month['num_cars'] = df_month['cars_list'].apply(len)

df_month.head()

Unnamed: 0,h3_index,month,n_bookings,cars_list,num_cars
0,892db01b657ffff,2018-11,330,"[80, 125]",2
1,892db01b657ffff,2019-09,3088,"[89, 4, 229, 13, 207, 272, 177, 248, 153, 250,...",11
2,892db01b657ffff,2019-10,3790,"[1, 130, 3, 6, 271, 17, 157, 42, 172, 53, 55, ...",20
3,892db01b657ffff,2019-11,1340,"[33, 2, 132, 229, 37, 136, 203, 11, 85, 247, 1...",12
4,892db01b657ffff,2019-12,6289,"[128, 129, 2, 4, 262, 271, 146, 148, 150, 27, ...",37


In [4]:
n_months = df_month['month'].nunique()

df_group = df_month.groupby(['h3_index']).apply(
                lambda group: pd.Series({
                    'Avg_Cars': round(group['num_cars'].sum() / n_months, 1),
                    'Cars': list(set(sum(group['cars_list'], []))),
                    'Avg_Bookings': round(group['n_bookings'].mean(), 1),
                }), include_groups=False
            ).reset_index()

df_group['Num_Cars'] = df_group['Cars'].apply(len)

df_group['Geometry'] = [Polygon(h3.h3_to_geo_boundary(h, geo_json=True))
                        for h in df_group['h3_index']]
df_group.head()

Unnamed: 0,h3_index,Avg_Cars,Cars,Avg_Bookings,Num_Cars,Geometry
0,892db01b657ffff,10.9,"[1, 2, 3, 4, 6, 10, 11, 13, 17, 27, 29, 33, 37...",2958.4,96,"POLYGON ((34.79678625834936 32.14472869105405,..."
1,892db01b6c3ffff,17.2,"[2, 4, 5, 11, 13, 14, 20, 22, 24, 26, 27, 30, ...",4599.9,128,"POLYGON ((34.78899730734896 32.14571167617894,..."
2,892db01b6cbffff,7.2,"[130, 131, 7, 138, 267, 13, 269, 271, 272, 22,...",1677.6,64,POLYGON ((34.790485892443336 32.14257259613436...
3,892db01b6cfffff,3.7,"[130, 261, 7, 13, 150, 25, 27, 156, 32, 162, 3...",675.7,35,"POLYGON ((34.79289184283307 32.1452202383227, ..."
4,892db0cc00fffff,15.8,"[5, 10, 13, 14, 22, 23, 25, 29, 30, 33, 34, 35...",1768.1,120,POLYGON ((34.761511673765646 32.07243523182685...


In [5]:
feature = 'Avg_Cars'

# Crear un GeoDataFrame
gdf = gpd.GeoDataFrame(df_group, geometry='Geometry')

gdf['Center_Coordinates'] = gdf['h3_index'].apply(lambda x: h3.h3_to_geo(x))

# Separate into columns of latitude and longitude
gdf['latitude'] = gdf['Center_Coordinates'].apply(lambda x: x[0])
gdf['longitude'] = gdf['Center_Coordinates'].apply(lambda x: x[1])
gdf = gdf.drop(columns=['Center_Coordinates'])

center = (gdf['latitude'].quantile(0.5), gdf['longitude'].quantile(0.5))
m = folium.Map(location=center, zoom_start=12, height=900)

vmin = gdf[feature].quantile(0.1)
vmax = gdf[feature].quantile(0.9)
colormap = cm.LinearColormap(['green', 'yellow', 'red'], vmin=vmin, vmax=vmax).add_to(m)

# Convertir el GeoDataFrame a JSON y añadirlo como una sola capa GeoJson
folium.GeoJson(
    gdf.to_json(),
    style_function=lambda features: {
        'fillColor': colormap(features['properties'][feature]),
        'color': 'black',
        'weight': 0,
        'fillOpacity': 0.6
    },
    tooltip=folium.GeoJsonTooltip(
        fields=['h3_index', feature],
        aliases=['H3 Index', feature]
    )
).add_to(m)

# Mostrar el mapa
m