# Analysis of Fluxes Applied on Vodafone Geometries

In [None]:
from global_import import *

## Platform Initialization

In [None]:
project_tourism = dh.get_or_create_project("overtourism1")
endpoint_url="http://minio:9000"

s3 = boto3.resource('s3',
                    endpoint_url=endpoint_url)

bucket = s3.Bucket('datalake')


## Set Configuration

In [None]:

config = set_config(str_name_project,                       # NAME PROJECT
               str_dir_data_path,                           # PATH DATA -> contains input data (shapefile, gtfs_data, istat_data)       
               str_dir_output_path,                         # PATH OUTPUT -> contains output data (plots, processed data) is the base path that is branched under each condition imposed by the analysis    
               complete_path_Istat_population,              # PATH ISTAT DATA
               str_prefix_complete_path,                    # PREFIX COMPLETE PATH   
               str_start_time_window_interest,              # TIME (str): START WINDOW INTEREST
               str_end_time_window_interest,                # TIME (str): END WINDOW INTEREST
               int_number_people_per_bus,                   # NUMBER PEOPLE PER BUS
               str_name_file_gtfs_zip,                      # NAME FILE GTFS ZIP: google_transit_extraurbano.zip
               str_name_dataset_gtfs,                       # NAME DATASET GTFS
               str_name_gdf_transport,                      # NAME GDF TRANSPORT (Road Network)
               str_name_graph_transport,                    # NAME GRAPH TRANSPORT (Graph of Road Network)
               str_name_grid,                         # NAME GRID
               str_name_grid_2_city,
               str_name_shape_city,
               str_name_centroid_city,
               str_route_idx,
               str_trip_idx,
               str_stop_idx,
               str_transport_idx,
               str_grid_idx,
               str_name_stop_2_trip,
               str_name_stop_2_route,
               str_name_grid_2_stop,
               str_name_grid_2_route,
               str_name_graph_2_route,
               str_name_route_2_graph,
               str_dir_plots_path,
               int_hour_start_window_interest,
               int_hour_end_window_interest,
               int_min_aggregation_OD,
               Lx,
               Ly)


# Gtfs Data

In [None]:
if os.path.exists(config[f"{str_prefix_complete_path}_{str_name_dataset_gtfs}"]):                                             # NOTE: check if the gtfs file exists
    print("GTFS file exists, loading it")                                                                                     # NOTE: load gtfs file
    feed = Preprocessing_gtfs(config[f"{str_prefix_complete_path}_{str_name_dataset_gtfs}"])                                  # NOTE: load gtfs file from the directory set -> feed object is typical of gtfs_kit
    is_gtfs_available = False
else:
    print("GTFS file does not exist, downloading it")                                                                         # NOTE: download gtfs file
    is_gtfs_available = False
    feed = None

# Comuni -> Shape file + Istat (Gravity Model)

In [None]:
# ------------------  Extract Cities of interest from Vodafone ------------------ #                                                                                 # NOTE: here you can define what is the list of comuni you want to consider
print("Compute comuni polygons...")

#attendences_foreigners = data_handler.vodafone_attendences_df.join(data_handler.vodafone_aree_df, on='locId', how='left',coalesce=True)                                                 # joining dataframes (just to have the needed columns)
#names_zones_to_consider = attendences_foreigners.filter(pl.col("locName").is_in(["comune"])).unique("locDescr")["locDescr"].to_list()                               # pick the comune
complete_path_shape_gdf = config[f"{str_prefix_complete_path}_{str_name_shape_city}"]                                                                               # path to the shape file of the cities
complete_path_centroid_gdf = config[f"{str_prefix_complete_path}_{str_name_centroid_city}"]                                                                         # path to the centroids associated to the cities
#centroids_gdf,cities_gdf = pipeline_extract_boundary_and_centroid_gdf_from_name_comuni(names_zones_to_consider,                                                     # list of the comuni to consider        
#                                                                                       complete_path_shape_gdf,                                                     #
#                                                                                       complete_path_centroid_gdf)

# ---------------- Extrct Population From Istat ------------------ #
print("Upload Istat data...")
Istat_obj = Istat_population_data(complete_path_Istat_population)

# NOTE: Read geometries from the shapefile of the cities
cities_gdf = gpd.read_file(os.path.join(os.getcwd(),"Data","mavfa-fbk_AIxPA_tourism-delivery_2025.08.22-zoning","fbk-aixpa-turismo.shp"))
#cities_gdf = gpd.read_file(os.path.join(os.getcwd(),"Data","shapefile_fbk_2025.05.21","fbk.shp"))

# NOTE: Join the informations of the population (from Istat) with the geometries of the city
cities_gdf = simple_join_cities_with_population(cities_gdf, 
                                    Istat_obj.population_by_comune,
                                    str_col_comuni_istat = str_col_comuni_istat,
                                    str_col_popolazione_totale = str_population_col_grid,
                                    str_col_city_name = str_col_comuni_name,
                                    is_Vodafone_Trento_ZDT = True)                                                                                        # NOTE: here we join the cities with the population data from Istat
# NOTE: When the city appears in subdivisions (like Trento, Rovereto, etc.) we need to know how much of the population is in each diivision

cities_gdf = add_column_area_and_fraction(cities_gdf, 
                                str_col_comuni_name,
                                str_col_area = str_col_area,
                                str_col_tot_area = str_col_tot_area,
                                str_col_fraction = str_col_fraction,
                                crs_proj = 3857, 
                                crs_geographic = 4326)
# NOTE: When there are multiple subdivisions we add an integer suffix

cities_gdf = add_suffix_to_repeated_values(cities_gdf, str_col_comuni_name)

# NOTE: We associate population as the fraction of the area of that sub-region times the total population of the city
cities_gdf = redistribute_population_by_fraction(cities_gdf, 
                                    str_col_popolazione_totale = str_population_col_grid, 
                                    str_col_fraction = str_col_fraction,
                                    str_col_comuni_name = str_col_comuni_name,
                                    conserve_total=True)

# ------- Users Profiles ------- #                                                                                                                                  # NOTE: here you can define what is the list of user profiles you want to consider
#UserProfiles = attendences_foreigners[str_user_profile_vodafone_col].unique().to_list()                                                                             # list of the user profiles ['VISITOR', 'TOURIST', 'COMMUTER', 'INHABITANT']
#UserProfiles.append("AGGREGATED")                                                                                                                                   # add the aggregated user profile -> NOTE: the analysis for fluxes will be done on all these.


# Road Network  

In [None]:
if False:
    import logging
    import osmnx as ox
    ox.utils.log('INFO')
    config.update(extract_bbox_with_buffer(cities_gdf_trentino, buffer_degrees=0.1))
    complete_path_transport_gdf = config[f"{str_prefix_complete_path}_{str_name_gdf_transport}"]                                                                        # path to the gdf transport
    complete_path_transport_graph = config[f"{str_prefix_complete_path}_{str_name_graph_transport}"]                                                                    # path to the graph transport
    gdf_road_network, G_road_network = pipeline_get_transport_network(config,                                                                                           # get the road network, save it in the output folder                     
                                                                    crs,                                                                                                # coordinate reference system                          
                                                                    str_transport_idx,                                                                                  # transport index               
                                                                    complete_path_transport_gdf,                                                                        # complete path to the transport gdf
                                                                    complete_path_transport_graph)                                                                      # compute the transport network, save it in the output folder 


# Distance Matrix

In [None]:
# ---------- Distance and Direction Matrix ------------ #
config[str_name_distance_matrix] = os.path.join(config[str_dir_output],f"{str_name_distance_matrix}.parquet")      # path to the distance matrix file
complete_path_direction_distance_df = config[str_name_distance_matrix]                                                                                          # path to the distance matrix file
print(f"Compute distance and direction matrix: {complete_path_direction_distance_df}")                                                                          # path to the distance matrix file
direction_matrix,distance_matrix = compute_direction_matrix_optimized(cities_gdf,                                                                                #
                                    str_centroid_x_col = str_centroid_lat,                                                                                                        #
                                    str_centroid_y_col = str_centroid_lon,                                                                                                        #
                                    complete_path_direction_distance_df = complete_path_direction_distance_df
                                    )                                                                                        # compute the distance and direction matrix, save it in the output folder
df_distance_matrix = direction_distance_2_df(direction_matrix,                                                                                                  #                               
                                             distance_matrix,                                                                                                   #
                                             complete_path_direction_distance_df
                                             )   





# Dictionaries Handling Data

In [None]:
print("Extract list of files from bucket...")
list_files_od, list_files_presenze, list_str_dates_yyyymm = extract_filenames_and_date_from_bucket(bucket)
print("Extract days available for analysis flows...")
list_all_avaliable_days_flows = extract_all_days_available_analysis_flows_from_raw_dataset(list_files_od,col_str_day_od,str_period_id_presenze,col_str_is_week,s3)                             # NOTE: Needed to create dict_column_flows,grid for post-processing 
# NOTE: Preparing the dicts that will hold the results of the analysis
#dict_column_flows, dict_column_grid, dict_output_hotspot_analysis = initialize_dicts_that_hold_grid_flows_columns_and_hotspot_analysis(list_all_avaliable_days_flows = list_all_avaliable_days_flows, 
#                                                                                                                                        list_time_intervals = list_time_intervals,
#                                                                                                                                        UserProfiles = UserProfiles,
#                                                                                                                                        week_days = week_days,
#                                                                                                                                        case_2_is_in_flow = case_2_is_in_flow,
#                                                                                                                                        case_pipeline = CASE_PIPELINE_AGGREGATION_DAY_HOUR_USER_WEEKDAY)


# Null Day

In [None]:
map_idx_cities_gdf_2_area_code = dict(zip(cities_gdf.index, cities_gdf[str_area_id_presenze]))  # create a map from the index of the cities gdf to the area code
# NOTE: Add column indices from AREA_CODE that is the one characteristic of df_presenze, df_od, but not to the case of generated  by gravity flows
df_distance_matrix = add_column_area_code_OD_df_distance(df_distance_matrix,
                                                        map_idx_cities_gdf_2_area_code,
                                                        str_col_origin=str_col_origin,
                                                        str_col_destination=str_col_destination,
                                                        str_area_code_origin_col=str_area_code_origin_col,
                                                        str_area_code_destination_col=str_area_code_destination_col
                                                        )  # add the area code to the origin and destination columns of the distance matrix

# NOTE: Extract the null day
print("Initialize null day OD-presenze...")
df_presenze_null_days = extract_presences_vodafone_from_bucket(s3,list_files_presenze, 2)                                                                                                      # NOTE: download the file from the bucket
# NOTE: Extract OD
df_od_null_days = extract_od_vodafone_from_bucket(s3,list_files_od, 2) 
df_od_null_days = add_column_is_week_and_str_day(df_od = df_od_null_days,
                                str_period_id_presenze = str_period_id_presenze,
                                col_str_day_od = col_str_day_od,
                                col_str_is_week = col_str_is_week,
                                is_null_day = True)


df_od_null_days = join_Tij_Vodafone_with_distance_matrix(df_od=df_od_null_days,
                                                df_distance_matrix = df_distance_matrix,
                                                str_origin_od = str_origin_od,                                   # NOTE: origin area (ITA.<code>)
                                                str_destination_od = str_destination_od,                              # NOTE: destination area (ITA.<code>)    
                                                str_area_code_origin_col = str_area_code_origin_col,
                                                str_area_code_destination_col = str_area_code_destination_col)


"""
Tij_dist_baseline_init = pipeline_initial_df_flows_aggregation_on_dat_hour_user_weekday(df = Tij_dist_baseline_init,
                                                                                    tuple_filters = (pl.col(str_trip_type_od) != "out-out",
                                                                                                        pl.col(str_trip_type_od) != "in-out"),
                                                                                    message_filters = (f"{str_trip_type_od} != out-out",
                                                                                                        f"{str_trip_type_od} != in-out"),
                                                                                    list_columns_groupby = conditioning_2_columns_to_hold_when_aggregating["hour_user_weekday"],            # NOTE: T_{origin}_{destination}_{hour}_{user_profile}_{str_day}_{is_weekday}
                                                                                    str_col_trips_to_be_aggregated = "TRIPS",
                                                                                    str_col_name_aggregated = "TRIPS",
                                                                                    method_aggregation="sum"
                                                                                    )
"""
# Memory management: Force garbage collection after loading large datasets
gc.collect()   


In [None]:
pl.read_parquet("/home/aamaduzzi/aixpa-overtourism-backend/scripts_overtoruism_analysis/Output/Vodafone-Data/grid_all_columns_user.parquet").columns

In [None]:
pl.read_parquet("/home/aamaduzzi/aixpa-overtourism-backend/scripts_overtoruism_analysis/Output/Vodafone-Data/grid_all_columns_weekday.parquet").columns

# Analysis Average

In [None]:
%matplotlib inline
import polars as pl
is_normalize = False
# Example filters
is_weekday = "Feriale"   # could also be ["Feriale", "Prefestivo"]
hour_filter = [7, 8, 25]     # list of allowed hours
col_output_total_trips = "total_trips"
df_concat = concat_df_od_and_add_columns(list_files_od, s3, str_period_id_presenze, col_str_day_od, col_str_is_week)  
Tij_dist = df_concat.filter(pl.col(str_trip_type_od) != "out-out",
                            pl.col(str_trip_type_od) != "in-out")
# Tij_dist = Tij_dist.filter(pl.col(col_str_is_week) == is_weekday)                                        # 1. filter the dataframe to keep only the rows with the correct weekday/weekend

Tij_dist = Tij_dist.group_by([str_origin_od, str_destination_od, str_departure_hour_od, str_origin_visitor_class_id_od, col_str_day_od, col_str_is_week]).agg(
        total_trips=pl.col("TRIPS").sum()                                                                                           # 2. aggregate trips across TRIP_TYPE and NATIONALITY_CLASS_ID
    ).group_by([str_origin_od, str_destination_od, str_departure_hour_od, str_origin_visitor_class_id_od, col_str_is_week]).agg(
        average_trips_over_days=pl.col(col_output_total_trips).mean()                                                               # 3. compute average over days
    )

for is_weekday in week_days:
    fig,ax = plt.subplots(figsize=(10,6))
    User2Marker = {user_profile: marker for user_profile, marker in zip(UserProfiles, ['o', 's', '^', 'D', 'x'])}
    Type_user_2_count = {user_profile: np.zeros(len(hour_ids)) for user_profile in UserProfiles}
    Average_count = np.zeros(len(hour_ids))
    n_type_users_considered = 0
    for i, user_profile in enumerate(UserProfiles):
        if user_profile == "AGGREGATED":
            continue
        else:
            for j, hour_id in enumerate(hour_ids):
                Type_user_2_count[user_profile][j] = Tij_dist.filter(pl.col(str_departure_hour_od) == hour_id,                                      # 
                                                                    pl.col(str_origin_visitor_class_id_od) == UserProfile2IndexVodafone[user_profile],
                                                                    pl.col("is_weekday") == is_weekday).select(pl.col("average_trips_over_days")).to_numpy().sum()
                Average_count[j] += Type_user_2_count[user_profile][j]
            if is_normalize:
                Type_user_2_count[user_profile] /= Type_user_2_count[user_profile].sum()
            ax.scatter(hour_ids, Type_user_2_count[user_profile], label=user_profile, marker=User2Marker[user_profile], s=50)
            ax.set_title(f"total trips per user profile {is_weekday}")
            ax.set_xlabel("Hour of Day")
            ax.set_ylabel("Fraction total Trips")
            n_type_users_considered += 1
    if n_type_users_considered > 0:
        Average_count /= n_type_users_considered
        if is_normalize:
            Average_count /= Average_count.sum()
        ax.scatter(hour_ids, Average_count, label="Average", marker='*', s=200, color='black')
    ax.grid(True)
    plt.legend()
    dir_output_average = os.path.join(config["dir_output"],is_weekday)
    os.makedirs(dir_output_average, exist_ok=True)
    plt.savefig(os.path.join(dir_output_average,f"total_trips_per_user_profile_{is_weekday}.png"))
    plt.show()
    plt.close()






# Pipeline Average over all days and Hours

In [None]:

        # ------------------------------------------------------
        # ⬇️ Put your full analysis pipeline here (the big block)
        # Replace your hardcoded values with these loop vars:
        # - str_day
        # - time_interval
        # - user_profile
        # - is_weekday
        # ------------------------------------------------------


In [None]:
# NOTE: Initialize the dataframe flows from the concatenation of all the OD files


df_concat = concat_df_od_and_add_columns(list_files_od, 
                                         s3, 
                                         str_period_id_presenze, 
                                         col_str_day_od, 
                                         col_str_is_week)  
list_unique_days_od = df_concat[col_str_day_od].unique().to_list()

# NOTE: Prepare the dataframe to give to the prepare_flow_dataframe_for_hierarchical_prcedure: essentially here it considers just the in-in and out-in trips, and joins with the distance matrix -> i,j col are given
df_od_with_just_in_in_out_in_trips = default_initial_preparation_common_to_all_cases_df_flows_not_baseline(df_od = df_concat,
                                                                          df_distance_matrix = df_distance_matrix,
                                                                          str_origin_od = str_origin_od,
                                                                          str_destination_od = str_destination_od,
                                                                          str_area_code_origin_col = str_area_code_origin_col,
                                                                          str_area_code_destination_col = str_area_code_destination_col,
                                                                          str_trip_type_od = str_trip_type_od)


columns_2_hold_geopandas_base = [str_area_id_presenze,str_centroid_lat,str_centroid_lon,str_col_comuni_name,str_grid_idx,"geometry"]                        # base columns to hold in the geopandas
columns_hold_output_hotspot_df_base = [str_area_id_presenze]

# Save the output at the base level of
cities_gdf[str_grid_idx] = cities_gdf.index  # add the grid idx as a column
cities_gdf[columns_2_hold_geopandas_base].to_file(os.path.join(config[str_dir_output],f"cities_gdf_base_columns_{case_pipeline}.geojson"),driver="GeoJSON")  # save the base columns of the cities gdf
#for case_pipeline in conditioning_2_columns_to_hold_when_aggregating.keys():
#    print(f"Case pipeline: {case_pipeline}")

global_Tij_holding_all_columns_flows = None 
global_cities_gdf_holding_all_columns_flows = cities_gdf


case_pipeline = "weekday"


dict_column_flows_aggregation_weekday, dict_column_grid_aggregation_weekday, dict_output_hotspot_analysis_aggregation_weekday = initialize_dicts_that_hold_grid_flows_columns_and_hotspot_analysis(list_all_avaliable_days_flows = list_all_avaliable_days_flows, 
                                                                                                                                                                                                    list_time_intervals = list_time_intervals,
                                                                                                                                                                                                    UserProfiles = UserProfiles,
                                                                                                                                                                                                    week_days = week_days,
                                                                                                                                                                                                    case_2_is_in_flow = case_2_is_in_flow,
                                                                                                                                                                                                    case_pipeline = case_pipeline)
# NOTE: Aggregate the dataframe at the level you need
Tij_dist_init, Tij_dist_baseline_init = prepare_flow_dataframe_for_hierarchical_prcedure(df_od_with_just_in_in_out_in_trips = df_od_with_just_in_in_out_in_trips,
                                                                                        Tij_dist_baseline_init = df_od_null_days ,
                                                                                        case = case_pipeline ,
                                                                                        user_profile = "")



for is_weekday in week_days:
    # NOTE: Directory to save the output
    str_dir_output_date = os.path.join(os.path.join(config[str_dir_output], is_weekday))  # NOTE: create a directory for the date
    Path(str_dir_output_date).mkdir(parents=True, exist_ok=True)                                                                                                # create the directory if it does not exist                
    idx_case = 0
    columns_2_hold_geopandas_base = [str_area_id_presenze,str_centroid_lat,str_centroid_lon,str_col_comuni_name,str_grid_idx,"geometry"]                        # base columns to hold in the geopandas
    str_day = ""                                                                                                                                            # NOTE: here we do not consider a specific day
    int_hour_start_window_interest = int_hour_start_window_interest                                                                                     # NOTE: here we do not consider a specific hour
    user_profile = "average_all_days"                                                                                                                                        # NOTE: here we do not consider a specific user profile
    time_interval = [0]                                                                                                                                        # NOTE: here we do not consider a specific time
    str_t = ""
    str_t1 = ""
    for suffix_in,is_in_flows in case_2_is_in_flow.items():                                                   
        # NOTE: Pick the selected columns for the analysis
        Tij_dist,Tij_dist_baseline = filter_flows_by_conditions_from_cases(Tij_dist_init = Tij_dist_init,
                                                                Tij_dist_baseline_init = Tij_dist_baseline_init,
                                                                case_analysis = case_pipeline,
                                                                is_weekday = is_weekday,
                                                                day_id_of_interest = str_day,
                                                                hour_id_of_interest = int_hour_start_window_interest,
                                                                user_profile = user_profile
                                                                )
        dict_column_flows_aggregation_weekday, dict_column_grid_aggregation_weekday, extension_columns_2_hold, columns_2_hold_geopandas_for_flows_plot, columns_flows_2_be_merged_2_keep, on_colums_flows_2_join, on_columns_grid_2_join = define_columns_to_hold_and_merge_both_for_grid_and_flows_OD_analysis(columns_2_hold_geopandas_base = columns_2_hold_geopandas_base,
                                                                                                                                                                                                                                                                        str_col_origin = str_col_origin,
                                                                                                                                                                                                                                                                        str_col_destination = str_col_destination,
                                                                                                                                                                                                                                                                        str_grid_idx = str_grid_idx,
                                                                                                                                                                                                                                                                        dict_column_flows = dict_column_flows_aggregation_weekday,
                                                                                                                                                                                                                                                                        dict_column_grid = dict_column_grid_aggregation_weekday,
                                                                                                                                                                                                                                                                        str_day = str_day,
                                                                                                                                                                                                                                                                        time_interval = time_interval,
                                                                                                                                                                                                                                                                        user_profile = user_profile,
                                                                                                                                                                                                                                                                        is_weekday = is_weekday,
                                                                                                                                                                                                                                                                        suffix_in = suffix_in,
                                                                                                                                                                                                                                                                        str_t = str_t,
                                                                                                                                                                                                                                                                        str_t1 = str_t1,
                                                                                                                                                                                                                                                                        case_pipeline = case_pipeline)
        

        # NOTE: Extract columns that are for the analysis
        str_col_n_trips, str_col_n_trips_baseline, str_col_difference_baseline, str_col_total_flows_grid = extract_name_columns_for_difference_pipeline(dict_column_flows = dict_column_flows_aggregation_weekday,
                                                                                                                                                        dict_column_grid = dict_column_grid_aggregation_weekday,str_day = str_day,time_interval = time_interval,
                                                                                                                                                        user_profile = user_profile,is_weekday = is_weekday,
                                                                                                                                                        is_in_flows = is_in_flows,suffix_in = suffix_in,case_pipeline = case_pipeline)
        Tij_dist = Tij_dist.with_columns((pl.col("TRIPS")).alias(str_col_n_trips))  # Divide by two the trips since we are considering both nationalities
        Tij_dist_baseline = Tij_dist_baseline.with_columns(pl.col("TRIPS").alias(str_col_n_trips_baseline))                # NOTE: rename the column with the trips to the one needed for the analysis
        print(f"Compare Tij_dist to Tij_dist_baseline: {suffix_in}")
        Tij_dist = compute_difference_trips_col_day_baseline(Tij_dist = Tij_dist,
                                                            Tij_dist_baseline = Tij_dist_baseline,
                                                            str_col_n_trips = str_col_n_trips,
                                                            str_col_n_trips_baseline = str_col_n_trips_baseline,
                                                            str_col_difference_baseline = str_col_difference_baseline,
                                                            str_col_origin = str_col_origin,
                                                            str_col_destination = str_col_destination,
                                                            on_colums_flows_2_join = on_colums_flows_2_join
                                                            )

        # NOTE: Step 1 Start the Hierarchical Analysis
        if idx_case == 0:
            geojson_input_hierarchy = cities_gdf
        else:
            geojson_input_hierarchy = mh.grid
        # NOTE: Mobility Hierarchy Analysis - Inflows and Outflows -> Reformat Input and choose outside what it is going to be (Either in or out)
        mh, hotspot_2_origin_idx_2_crit_dest_idx, hotspot_flows, list_indices_all_fluxes_for_colormap = pipeline_mobility_hierarchy_time_day_type_trips(cities_gdf = geojson_input_hierarchy,Tij_dist_fit_gravity = Tij_dist,str_population_col_grid = str_population_col_grid,
                                                                                                                                                        str_col_comuni_name = str_col_comuni_name,str_col_origin = str_col_origin,str_col_destination = str_col_destination,
                                                                                                                                                        str_col_n_trips = str_col_n_trips,
                                                                                                                                                        str_col_total_flows_grid = str_col_total_flows_grid,
                                                                                                                                                        str_hotspot_prefix = str_hotspot_prefix,str_centroid_lat = str_centroid_lat,str_centroid_lon = str_centroid_lon,
                                                                                                                                                        str_grid_idx = str_grid_idx,user_profile = user_profile,str_t = str_t,str_t1 = str_t1,is_in_flows = is_in_flows,                                                                                                  # NOTE: is_in_flows = True means that we are considering the incoming fluxes to the hotspot
                                                                                                                                                        columns_2_hold_geopandas = columns_2_hold_geopandas_for_flows_plot,int_levels = 7)
        
        dict_output_hotspot_analysis_aggregation_weekday = fill_dict_output_hotspot_analysis_OD_analysis_from_case_pipeline(
                                                                                                                            dict_output_hotspot_analysis = dict_output_hotspot_analysis_aggregation_weekday,
                                                                                                                            str_day = str_day,
                                                                                                                            time_interval = time_interval,
                                                                                                                            user_profile = user_profile,
                                                                                                                            is_weekday = is_weekday,
                                                                                                                            is_in_flows = is_in_flows,
                                                                                                                            suffix_in = suffix_in,
                                                                                                                            case_pipeline = case_pipeline,
                                                                                                                            hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx,
                                                                                                                            list_indices_all_fluxes_for_colormap = list_indices_all_fluxes_for_colormap,
                                                                                                                            hotspot_flows = hotspot_flows
                                                                                                                            )   
        hotspot_levels = get_values_from_case_pipeline_OD_analysis(dict_column_flows = None,dict_column_grid = None,dict_output_hotspot_analysis = dict_output_hotspot_analysis_aggregation_weekday,str_day = str_day,time_interval = time_interval,user_profile = user_profile,is_weekday = is_weekday,is_in_flows = is_in_flows,suffix_in = suffix_in,name_dict = "dict_output_hotspot_analysis",name_key = "hotspot_levels",case_pipeline = case_pipeline)
        print(f"Map flux generated for {suffix_in} flows")
        # NOTE: Save the maps for incoming fluxes -> NOTE: The str_output_dir_date is unique 
        save_output_mobility_hierarchy_dependent_is_in_fluxes(
                                                            str_dir_output_date = str_dir_output_date,
                                                            map_hierarchy = mh.fmap,
                                                            user_profile = user_profile,
                                                            hotspot_levels = hotspot_levels,
                                                            hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx,
                                                            str_t = str_t,
                                                            str_t1 = str_t1,
                                                            is_in_flows = is_in_flows                                                                                                  # NOTE: is_in_flows = True means that we are considering the incoming fluxes to the hotspot
                                                                )                                                                                                             # show the map in the browser
        # Memory management: Clear map_flux after saving
        gc.collect()                                        
        # NOTE: Global save the flows and cities_gdf by merging the str_day, str_t, str_t1, user_profile
        print(f"Case: {suffix_in} - mh.flows: {mh.flows.columns}\nflows merged: {Tij_dist.columns}\nmh.grid: {mh.grid.columns}")
        
        # Ensure AREA_ID has the same type before merging
        if str_area_id_presenze in global_cities_gdf_holding_all_columns_flows.columns:
            global_cities_gdf_holding_all_columns_flows[str_area_id_presenze] = global_cities_gdf_holding_all_columns_flows[str_area_id_presenze].astype(str)
        if str_area_id_presenze in mh.grid.columns:
            mh.grid[str_area_id_presenze] = mh.grid[str_area_id_presenze].astype(str)

        global_Tij_holding_all_columns_flows, global_cities_gdf_holding_all_columns_flows = merge_flows_and_grid_with_global_to_obtain_unique_dfs(global_Tij_holding_all_columns_flows = global_Tij_holding_all_columns_flows,
                                                                                                                                                    flows_2_be_merged = Tij_dist,
                                                                                                                                                    global_cities_gdf_holding_all_columns_flows = global_cities_gdf_holding_all_columns_flows,
                                                                                                                                                    grid_single_case_2_be_merged = mh.grid,
                                                                                                                                                    columns_join_global_geopandas = columns_2_hold_geopandas_for_flows_plot,
                                                                                                                                                    columns_flows_2_be_merged_2_keep = columns_flows_2_be_merged_2_keep,
                                                                                                                                                    on_columns_flows_2_join = on_colums_flows_2_join,
                                                                                                                                                    on_columns_grid_2_join = on_columns_grid_2_join,
                                                                                                                                                    message_geojson = f"average {suffix_in} grid",
                                                                                                                                                    message_flows = f"average {suffix_in} flows",
                                                                                                                                                    )     
        grid_global = pl.from_pandas(global_cities_gdf_holding_all_columns_flows.copy().drop("geometry", axis=1))                               
        # Memory management: Clear baseline analysis variables
        gc.collect()
        idx_case += 1
        print(f"Number columns global flows after join: ",len(global_Tij_holding_all_columns_flows.columns),f" cities: ",len(global_cities_gdf_holding_all_columns_flows.columns))
    # NOTE: Visualize All Outputs
    for suffix_in,is_in_flows in case_2_is_in_flow.items():                                                   
        hotspot_2_origin_idx_2_crit_dest_idx, str_col_total_flows_grid, str_col_hotspot_level, str_col_n_trips, str_caption_colormap, str_col_difference = extract_name_columns_for_hierarchical_plot(dict_column_flows = dict_column_flows_aggregation_weekday,dict_column_grid = dict_column_grid_aggregation_weekday,
                                                                                                                                                                                                      dict_output_hotspot_analysis = dict_output_hotspot_analysis_aggregation_weekday,
                                                                                                                                                                                                    str_day = str_day,time_interval = time_interval,user_profile = user_profile,is_weekday = is_weekday,is_in_flows = is_in_flows,
                                                                                                                                                                                                    suffix_in = suffix_in,case_pipeline = case_pipeline)
        
        map_flux = visualize_critical_fluxes_with_lines(grid = global_cities_gdf_holding_all_columns_flows,
                                                        df_flows = global_Tij_holding_all_columns_flows,
                                                        hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx,                        # NOTE:
                                                        str_col_total_flows_grid = str_col_total_flows_grid,
                                                        str_col_hotspot = str_col_hotspot_level,
                                                        str_col_n_trips = str_col_n_trips,
                                                        is_in_flows = is_in_flows,
                                                        str_col_origin = str_col_origin,
                                                        str_col_destination = str_col_destination,
                                                        str_centroid_lat = str_centroid_lat,
                                                        str_centroid_lon = str_centroid_lon,
                                                        str_col_comuni_name= str_col_comuni_name,
                                                        str_grid_idx = str_grid_idx,
                                                        str_caption_colormap = str_caption_colormap,
                                                        str_colormap= 'YlOrRd'
                        )
        complete_path_map = os.path.join(str_dir_output_date,f"map_fluxes_{user_profile}_{suffix_in}_{str_day}_{str_t}_{str_t1}_{str_day}.html")                                                # path to the map file
        with open(os.path.join(str_dir_output_date,f"dict_column_flows_aggregation_weekday_{user_profile}_{suffix_in}_t_{str_t}_{str_t1}_d_{str_day}.json"), 'w') as f:
            json.dump(dict_column_flows_aggregation_weekday, f, indent=4)
        with open(os.path.join(str_dir_output_date,f"dict_column_grid_aggregation_weekday_{user_profile}_{suffix_in}_t_{str_t}_{str_t1}_d_{str_day}.json"), 'w') as f:
            json.dump(dict_column_grid_aggregation_weekday, f, indent=4)
        with open(os.path.join(str_dir_output_date,f"dict_output_hotspot_analysis_aggregation_weekday_{user_profile}_{suffix_in}_t_{str_t}_{str_t1}_d_{str_day}.json"), 'w') as f:
            json.dump(dict_output_hotspot_analysis_aggregation_weekday, f, indent=4)
        try:
            map_flux.save(complete_path_map)                                                                                                                         # save the map to the output folder
        except Exception as e:
            print(f"Error saving map_flux: {e}")
global_Tij_holding_all_columns_flows.write_parquet(os.path.join(str_dir_output_date,f"Tij_all_columns_{case_pipeline}.parquet"))                                      # save the global flows
grid_global.write_parquet(os.path.join(str_dir_output_date,f"grid_all_columns_{case_pipeline}.parquet"))  # save the global grid
#    global_cities_gdf_holding_all_columns_flows.to_file(os.path.join(str_dir_output_date,f"global_cities_gdf_holding_all_columns_flows.geojson"),driver='GeoJSON')  # save the global grid

# Analysis Fluxes: Hotspots Diffusion 1-2

##  Global Variables Pipeline - Setting the stage for Analysis

In [None]:

for i,file in tqdm(enumerate(list_files_od),desc="Files OD Vodafone"):                                                                                           # for each file in the list of files
    print("Processing file: ", file,f" iter: {i}")                                                                                                                                          # print the file name
    # NOTE: Define the null day being october 2024 -> Avoid the analysis on it since we are considering just the summer days in relation to it.
    if file == 'projects/tourism/_origin/vodafone-aixpa/od-mask_202410.parquet':
        is_null_day = True
    else:
        is_null_day = False
    if is_null_day:
        continue
    else:
        # NOTE: Extract the presenze and OD data from the bucket -> otherwise generate the flows from gravitational model
        if not is_generate_fluxes:
            # NOTE: Extract presenze
            df_presenze = extract_presences_vodafone_from_bucket(s3,list_files_presenze, i)                                                                                                      # NOTE: download the file from the bucket
            # NOTE: Extract OD
            df_od = extract_od_vodafone_from_bucket(s3,list_files_od, i) 
            # Add the column
            if "202410" in file:
                is_null_day = True
            else:
                is_null_day = False

            # NOTE: Fill info geodataframe with presences 
#            df_presenze_pd = df_presenze.to_pandas()  # Convert to pandas first: add -> AREA_CODE
#            df_presenze_unique = df_presenze_pd.drop_duplicates(subset=[str_area_id_presenze], keep='first')
#            cities_gdf = cities_gdf.merge(df_presenze_unique[str_area_id_presenze], left_on=str_area_id_presenze,right_on=str_area_id_presenze,how="left")
            # NOTE: Add the column is_week and str_day to the df_od
            df_od = add_column_is_week_and_str_day(df_od = df_od,
                                                    str_period_id_presenze = str_period_id_presenze,
                                                    col_str_day_od = col_str_day_od,
                                                    col_str_is_week = col_str_is_week,
                                                    is_null_day = is_null_day)  
            list_unique_days_od = df_od[col_str_day_od].unique().to_list()
            print(f"Extracted dates analysis: {list_unique_days_od}")            
            # NOTE: Join the distance matrix with the flows -> add the columns that hold information about the unit vector and the distance between O-D
            Tij_dist_init = join_Tij_Vodafone_with_distance_matrix(df_od=df_od,
                                                            df_distance_matrix = df_distance_matrix,
                                                            str_origin_od = str_origin_od,                                   # NOTE: origin area (ITA.<code>)
                                                            str_destination_od = str_destination_od,                              # NOTE: destination area (ITA.<code>)    
                                                            str_area_code_origin_col = str_area_code_origin_col,
                                                            str_area_code_destination_col = str_area_code_destination_col)
                        
            # NOTE: Initialize the dataframe flows to the form where the trips are the sum over all observations ut still are conditioned to the day, hour, user profile and weekday/weekend
            Tij_dist_init = pipeline_initial_df_flows_aggregation_on_dat_hour_user_weekday(df = Tij_dist_init,
                                                                                      tuple_filters = (pl.col(str_trip_type_od) != "out-out",
                                                                                                        pl.col(str_trip_type_od) != "in-out"),
                                                                                      message_filters = (f"{str_trip_type_od} != out-out",
                                                                                                         f"{str_trip_type_od} != in-out"),
                                                                                      list_columns_groupby = conditioning_2_columns_to_hold_when_aggregating["day_hour_user_weekday"],            # NOTE: T_{origin}_{destination}_{hour}_{user_profile}_{str_day}_{is_weekday}
                                                                                      str_col_trips_to_be_aggregated = "TRIPS",
                                                                                      str_col_name_aggregated = "TRIPS",
                                                                                      method_aggregation="sum"
                                                                                     )

            # Memory management: Delete intermediate dataframes after processing
#            del df_presenze_pd, df_presenze_unique
            gc.collect()
        else:
            list_unique_days_od = ["2024-09-09"]                                                                                                                                              # NOTE: the days of interest -> string format
            pass
        
        if not list_unique_days_od:
            print(f"Warning: No days found for file {file}. Skipping...")
            continue
        for str_day in list_unique_days_od:
             else:        
                datetime_day = pd.to_datetime(str_day)     
                # NOTE: Time intervals of interest
                # NOTE: Time intervals
                for time_interval in list_time_intervals:
                    print("Time interval: ", time_interval)                                                                                                                               # for each time interval
                    int_hour_start_window_interest = time_interval[0]                                                                                                                   # start time window of interest
                    int_hour_end_window_interest = time_interval[1]                                                                                                                     # end time window of interest
                    str_t = str(int_hour_start_window_interest)
                    str_t1 = str(int_hour_end_window_interest)
                    if int_hour_start_window_interest != 25:                                                                                                                   # for each user profile                                   
                        # NOTE: chnge the time according to the source                                                                                                                  
                        is_fluxes_hourly = True                                                                                                                         # NOTE: if the fluxes are generated hourly
                    else:
                        is_fluxes_hourly = False                                                                                                                         # NOTE: if the fluxes are generated hourly
                    for is_weekday in week_days:                                                                                                                                            # for each day type (weekday/weekend)
                        print(f"Processing {is_weekday}")                                                                                                                                              # print the day type
                        # NOTE: Days of interest
                        # NOTE: Profile user
                        print("Initialize flows and cities that will hold all columns analysis...")
                        global_cities_gdf_holding_all_columns_flows = cities_gdf.copy()                                                                                                                                # Create a global copy of cities_gdf to hold all columns: name_file_system: flows_all_analysis.parquet
                        global_Tij_holding_all_columns_flows = None                                                                                                                                                    # Create a global variable to hold Tij_dist with all columns:                         
                        for user_profile in UserProfiles:
                            Tij_dist = Tij_dist_init
                            Tij_dist_baseline = Tij_dist_baseline_init                                
                            print("Processing user profile: ", user_profile)                                                                                                                   # for each user profile
                            # Initialize variables that are relevant for the time analysis (time and user profile)
                            name_week_day = is_weekday                                                                                                                                  # name of the day type
                            # NOTE: These are the columns that we hold in the plot of the hierarchy (These are the ones that are in 
                            # the geojson produced by the MobilityHierarchy that are going to be important for the output plots)
                            columns_2_hold_geopandas_base = [str_area_id_presenze,str_centroid_lat,str_centroid_lon,str_col_comuni_name,str_grid_idx,"geometry"]                        # base columns to hold in the geopandas
                            # NOTE: Directory to save the output
                            str_dir_output_date = os.path.join(config[str_dir_output],str_day,f"{str_t}_{str_t1}",name_week_day)                                                                      # NOTE: create a directory for the date
                            Path(str_dir_output_date).mkdir(parents=True, exist_ok=True)                                                                                                # create the directory if it does not exist                
                            idx_case = 0
                            if WORK_IN_PROGRESS and os.path.exists(os.path.join(str_dir_output_date, f"most_critical_directions_{user_profile}_{str_t}_{str_t1}_{str_day}.html")):
                                print(f"Skipping analysis for {str_day} {str_t}-{str_t1} {user_profile} as output already exists.")
                                continue
                            else:
                                for suffix_in,is_in_flows in case_2_is_in_flow.items():
                                    case_pipeline = "day_hour_user_weekday" 
                                    dict_column_flows, dict_column_grid, extension_columns_2_hold, columns_2_hold_geopandas_for_flows_plot, columns_flows_2_be_merged_2_keep, on_colums_flows_2_join, on_columns_grid_2_join = define_columns_to_hold_and_merge_both_for_grid_and_flows_OD_analysis(columns_2_hold_geopandas_base = columns_2_hold_geopandas_base,
                                                                                                                                                                                                                                                                                                    str_col_origin = str_col_origin,
                                                                                                                                                                                                                                                                                                    str_col_destination = str_col_destination,
                                                                                                                                                                                                                                                                                                    str_grid_idx = str_grid_idx,
                                                                                                                                                                                                                                                                                                    dict_column_flows = dict_column_flows,
                                                                                                                                                                                                                                                                                                    dict_column_grid = dict_column_grid,
                                                                                                                                                                                                                                                                                                    str_day = str_day,
                                                                                                                                                                                                                                                                                                    time_interval = time_interval,
                                                                                                                                                                                                                                                                                                    user_profile = user_profile,
                                                                                                                                                                                                                                                                                                    is_weekday = is_weekday,
                                                                                                                                                                                                                                                                                                    suffix_in = suffix_in,
                                                                                                                                                                                                                                                                                                    str_t = str_t,
                                                                                                                                                                                                                                                                                                    str_t1 = str_t1,
                                                                                                                                                                                                                                                                                                    case_pipeline = case_pipeline)
                                    # NOTE: Columns that are held by the final geopandas -> The one used for all plots
                                    # Generation Fluxes                                                                                                                                           #
                                    if is_generate_fluxes:                                                                                                                         # if the fluxes are generated
                                        Tij_dist, Tij_dist_baseline = routine_generation_flows(df_distance_matrix,
                                                                                                cities_gdf,
                                                                                                str_col_i = str_col_origin,
                                                                                                str_col_j = str_col_destination,
                                                                                                str_population_col = str_population_col_grid,
                                                                                                str_population_i_col = str_population_i,
                                                                                                str_population_j_col = str_population_j)
                                        gc.collect()
                                    else:
                                        # NOTE: Process vodafone data to match the format of generated fluxes
                                        # NOTE: The analysis is unique for (df_od,df_presenze,str_col_n_trips,str_day) -> other variables are needed for
                                        # functions and filtering but these steps are needed for the comparison of the baseline and the day of analysis.
                                        Tij_dist,Tij_dist_baseline = filter_flows_by_conditions_from_cases(Tij_dist_init = Tij_dist_init,
                                                                                                Tij_dist_baseline_init = Tij_dist_baseline_init,
                                                                                                case_analysis = case_pipeline,
                                                                                                is_weekday = is_weekday,
                                                                                                day_id_of_interest = str_day,
                                                                                                hour_id_of_interest = int_hour_start_window_interest,
                                                                                                user_profile = user_profile
                                                                                                )
                                        if user_profile != "AGGREGATED":
                                            pass
                                        else:
                                            # NOTE: We are summing over all the days as they are summed implicitly in Tij_dist_init (Vodafone gave us the sum over 15 days, here we sum over the user profiles)
                                            Tij_dist_baseline = Tij_dist_baseline.with_columns((pl.col("TRIPS")/2).floor().alias("TRIPS"))  # Divide by two the trips since we are considering both nationalities
                                        # Memory management: Clear intermediate dataframes
                                        str_col_n_trips, str_col_n_trips_baseline, str_col_difference_baseline, str_col_total_flows_grid = extract_name_columns_for_difference_pipeline(dict_column_flows = dict_column_flows,
                                                                                                                                                                                        dict_column_grid = dict_column_grid,str_day = str_day,time_interval = time_interval,
                                                                                                                                                                                        user_profile = user_profile,is_weekday = is_weekday, 
                                                                                                                                                                                        is_in_flows = is_in_flows,suffix_in = suffix_in,case_pipeline = case_pipeline)
                                        # Change the name of the columns of the flows
                                        Tij_dist = Tij_dist.with_columns(pl.col("TRIPS").alias(str_col_n_trips))                                  # NOTE: rename the column with the trips to the one needed for the analysis
                                        Tij_dist_baseline = Tij_dist_baseline.with_columns(pl.col("TRIPS").alias(str_col_n_trips_baseline))                # NOTE: rename the column with the trips to the one needed for the analysis
                                        print(f"Compare Tij_dist to Tij_dist_baseline: {str_t}-{str_t1}, {user_profile}, {suffix_in}")
                                        Tij_dist = compute_difference_trips_col_day_baseline(Tij_dist = Tij_dist,
                                                            Tij_dist_baseline = Tij_dist_baseline,
                                                            str_col_n_trips = str_col_n_trips,
                                                            str_col_n_trips_baseline = str_col_n_trips_baseline,
                                                            str_col_difference_baseline = str_col_difference_baseline,
                                                            str_col_origin = str_col_origin,
                                                            str_col_destination = str_col_destination,
                                                            on_colums_flows_2_join = on_colums_flows_2_join
                                                            )
                                        print(f"Resulting flows to analyze:  {Tij_dist.shape} & baseline: {Tij_dist_baseline.shape} ")
                                        print(f"Total number trips recorded: {Tij_dist[str_col_n_trips].sum()}, & baseline: {Tij_dist_baseline[str_col_n_trips_baseline].sum()}")                                                                                                            # print the shape of the resulting dataframe
                                        print(f"Total number missing values: {Tij_dist.select(pl.col(str_col_n_trips).is_null().sum()).item()}, & baseline: {Tij_dist_baseline.select(pl.col(str_col_n_trips_baseline).is_null().sum()).item()}")                                                                                   # print the shape of the resulting dataframe
                                        
                                        
                                        # NOTE: Step 1 Start the Hierarchical Analysis
                                        if idx_case == 0:
                                            geojson_input_hierarchy = cities_gdf
                                        else:
                                            geojson_input_hierarchy = mh.grid
                                        # NOTE: Mobility Hierarchy Analysis - Inflows and Outflows -> Reformat Input and choose outside what it is going to be (Either in or out)
                                        mh, hotspot_2_origin_idx_2_crit_dest_idx, hotspot_flows, list_indices_all_fluxes_for_colormap = pipeline_mobility_hierarchy_time_day_type_trips(cities_gdf = geojson_input_hierarchy,
                                                                                                                                                                                        Tij_dist_fit_gravity = Tij_dist,
                                                                                                                                                                                        str_population_col_grid = str_population_col_grid,
                                                                                                                                                                                        str_col_comuni_name = str_col_comuni_name,
                                                                                                                                                                                        str_col_origin = str_col_origin,
                                                                                                                                                                                        str_col_destination = str_col_destination,
                                                                                                                                                                                        str_col_n_trips = str_col_n_trips,
                                                                                                                                                                                        str_col_total_flows_grid = str_col_total_flows_grid,
                                                                                                                                                                                        str_hotspot_prefix = str_hotspot_prefix,
                                                                                                                                                                                        str_centroid_lat = str_centroid_lat,
                                                                                                                                                                                        str_centroid_lon = str_centroid_lon,
                                                                                                                                                                                        str_grid_idx = str_grid_idx,
                                                                                                                                                                                        user_profile = user_profile,
                                                                                                                                                                                        str_t = str_t,
                                                                                                                                                                                        str_t1 = str_t1,
                                                                                                                                                                                        is_in_flows = is_in_flows,                                                                                                  # NOTE: is_in_flows = True means that we are considering the incoming fluxes to the hotspot
                                                                                                                                                                                        columns_2_hold_geopandas = None,
                                                                                                                                                                                            int_levels = 7)
                                        dict_output_hotspot_analysis = fill_dict_output_hotspot_analysis_OD_analysis_from_case_pipeline(
                                                                                                            dict_output_hotspot_analysis = dict_output_hotspot_analysis,
                                                                                                            str_day = str_day,
                                                                                                            time_interval = time_interval,
                                                                                                            user_profile = user_profile,
                                                                                                            is_weekday = is_weekday,
                                                                                                            is_in_flows = is_in_flows,
                                                                                                            suffix_in = suffix_in,
                                                                                                            case_pipeline = case_pipeline,
                                                                                                            hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx,
                                                                                                            list_indices_all_fluxes_for_colormap = list_indices_all_fluxes_for_colormap,
                                                                                                            hotspot_flows = hotspot_flows
                                                                                                            )   
                                        print(f"Map flux generated for {str_t}-{str_t1}, {user_profile}, {suffix_in} flows")
                                        hotspot_levels = get_values_from_case_pipeline_OD_analysis(dict_column_flows = None,dict_column_grid = None,dict_output_hotspot_analysis = dict_output_hotspot_analysis,str_day = str_day,time_interval = time_interval,user_profile = user_profile,is_weekday = is_weekday,is_in_flows = is_in_flows,suffix_in = suffix_in,name_dict = "dict_output_hotspot_analysis",name_key = "hotspot_levels",case_pipeline = case_pipeline)
                                        # NOTE: Save the maps for incoming fluxes -> NOTE: The str_output_dir_date is unique 
                                        save_output_mobility_hierarchy_dependent_is_in_fluxes(
                                                                                            str_dir_output_date = str_dir_output_date,
                                                                                            map_hierarchy = mh.fmap,
                                                                                            user_profile = user_profile,
                                                                                            hotspot_levels = hotspot_levels,
                                                                                            hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx,
                                                                                            str_t = str_t,
                                                                                            str_t1 = str_t1,
                                                                                            is_in_flows = is_in_flows                                                                                                  # NOTE: is_in_flows = True means that we are considering the incoming fluxes to the hotspot
                                                                                                )                                                                                                             # show the map in the browser
                                        # Memory management: Clear map_flux after saving
                                        gc.collect()                                        
                                        # NOTE: Global save the flows and cities_gdf by merging the str_day, str_t, str_t1, user_profile
                                        print(f"Day: {str_day}, Time: {str_t}-{str_t1}, User Profile: {user_profile}, Case: {suffix_in} - mh.flows: {mh.flows.columns}\nflows merged: {Tij_dist.columns}\nmh.grid: {mh.grid.columns}")
                                        
                                        # Ensure AREA_ID has the same type before merging
                                        if str_area_id_presenze in global_cities_gdf_holding_all_columns_flows.columns:
                                            global_cities_gdf_holding_all_columns_flows[str_area_id_presenze] = global_cities_gdf_holding_all_columns_flows[str_area_id_presenze].astype(str)
                                        if str_area_id_presenze in mh.grid.columns:
                                            mh.grid[str_area_id_presenze] = mh.grid[str_area_id_presenze].astype(str)

                                        global_Tij_holding_all_columns_flows, global_cities_gdf_holding_all_columns_flows = merge_flows_and_grid_with_global_to_obtain_unique_dfs(global_Tij_holding_all_columns_flows = global_Tij_holding_all_columns_flows,
                                                                                                                                                                                    flows_2_be_merged = Tij_dist,
                                                                                                                                                                                    global_cities_gdf_holding_all_columns_flows = global_cities_gdf_holding_all_columns_flows,
                                                                                                                                                                                    grid_single_case_2_be_merged = mh.grid,
                                                                                                                                                                                    columns_join_global_geopandas = columns_2_hold_geopandas_for_flows_plot,
                                                                                                                                                                                    columns_flows_2_be_merged_2_keep = columns_flows_2_be_merged_2_keep,
                                                                                                                                                                                    on_columns_flows_2_join = on_colums_flows_2_join,
                                                                                                                                                                                    on_columns_grid_2_join = on_columns_grid_2_join,
                                                                                                                                                                                    message_geojson = f"{str_day} {str_t}-{str_t1} {user_profile}",
                                                                                                                                                                                    message_flows = f"{str_day} {str_t}-{str_t1} {user_profile}"
                                                                                                                                                                                    )                                    
                                        # Memory management: Clear baseline analysis variables
                                        gc.collect()
                                        idx_case += 1
                                        print(f"Number columns global flows after join {str_t}-{str_t1} {user_profile}: ",len(global_Tij_holding_all_columns_flows.columns),f" cities: ",len(global_cities_gdf_holding_all_columns_flows.columns))
                                # NOTE: Visualize All Outputs
                                for suffix_in,is_in_flows in case_2_is_in_flow.items():                                                   
                                    # Plot!
                                    hotspot_2_origin_idx_2_crit_dest_idx, str_col_total_flows_grid, str_col_hotspot_level, str_col_n_trips, str_caption_colormap, str_col_difference = extract_name_columns_for_hierarchical_plot(dict_column_flows = dict_column_flows,dict_column_grid = dict_column_grid,dict_output_hotspot_analysis = dict_output_hotspot_analysis,
                                                                                                                                                                                                                                str_day = str_day,time_interval = time_interval,user_profile = user_profile,is_weekday = is_weekday,is_in_flows = is_in_flows,
                                                                                                                                                                                                                                suffix_in = suffix_in,case_pipeline = case_pipeline)
                                    # Map Hierarchical Flows
                                    map_flux = visualize_critical_fluxes_with_lines(grid = global_cities_gdf_holding_all_columns_flows,
                                                                                    df_flows = global_Tij_holding_all_columns_flows,
                                                                                    hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx,                        # NOTE:
                                                                                    str_col_total_flows_grid = str_col_total_flows_grid,
                                                                                    str_col_hotspot = str_col_hotspot_level,
                                                                                    str_col_n_trips = str_col_n_trips,
                                                                                    is_in_flows = is_in_flows,
                                                                                    str_col_origin = str_col_origin,
                                                                                    str_col_destination = str_col_destination,
                                                                                    str_centroid_lat = str_centroid_lat,
                                                                                    str_centroid_lon = str_centroid_lon,
                                                                                    str_col_comuni_name= str_col_comuni_name,
                                                                                    str_grid_idx = str_grid_idx,
                                                                                    str_caption_colormap = str_caption_colormap,
                                                                                    str_colormap= 'YlOrRd'
                                                    )
                                    complete_path_map = os.path.join(str_dir_output_date,f"map_fluxes_{user_profile}_t_{str_t}_{str_t1}_{suffix_in}.html")                                                # path to the map file
                                    
                                    # Map Critical Flows
                                    fmap_baseline = plot_negative_differences_interactive(
                                                                                grid = geojson_input_hierarchy, 
                                                                                flows_negative = global_Tij_holding_all_columns_flows, 
                                                                                str_col_i = str_col_origin, 
                                                                                str_col_j = str_col_destination, 
                                                                                str_col_difference = str_col_difference,
                                                                                str_centroid_lat = str_centroid_lat, 
                                                                                str_centroid_lon = str_centroid_lon, 
                                                                                caption_colorbar = f"Excess in tourism with respect to baseline {user_profile}"
                                                                                )
                                    try:
                                        map_flux.save(complete_path_map)                                                                                                                         # save the map to the output folder
                                    except Exception as e:
                                        print(f"Error saving map_flux: {e}")
                                    print(f"Map critical flows:  {str_t}-{str_t1}, {user_profile}")
                                    fmap_baseline.save(os.path.join(str_dir_output_date, f"most_critical_directions_{user_profile}_{str_t}_{str_t1}_{str_day}.html"))                                                                                     # save the map in the output folder                            
                                    del map_flux,fmap_baseline
                                    gc.collect()
                            
                            
                            
                            # NOTE: Initialize Pipeline for GTFS data
                            if is_gtfs_available:                                                                                                                         # if the gtfs is available
                                time_vector_OD = pd.timedelta_range(start = f"{int_hour_start_window_interest}h",end = f"{int_hour_end_window_interest}h",freq = f"{int_min_aggregation_OD}min")
                                str_col_n_trips_bus = f"n_trips_bus_{str_t}_{str_t1}"
                                columns_2_hold_geopandas_in_bus = [str_col_n_trips_bus, str_centroid_lat, str_centroid_lon,
                                                                    str_col_comuni_name, str_grid_idx, "geometry", str_col_hotspot_level_in]
                                str_col_n_users_bus = f"n_users_bus_{str_t}_{str_t1}"                                                                                                 # column name for the number of users bus
                                int_number_people_per_bus = 50                                                                                                                         # number of people per bus
                                str_col_difference_bus = f"difference_bus_{str_t}_{str_t1}"                            
                                idx_case_bus = 0
                                for suffix_in,is_in_flow_buses in case_2_is_in_flow.items():                                                   
                                    # ------------ Initialize Analysis of the Gtfs Data ------------- # 
                                    # NOTE: Init GTFS analysis
                                    config[f"{str_prefix_complete_path}_{str_name_gdf_stops}"] = os.path.join(str_dir_output_date,f"{str_name_gdf_stops}_{str_t}_{str_t1}.geojson")             # full path stops (gdf): NOTE: it changes from date to date
                                    print(f"Initializing gdf stops: ",config[f"{str_prefix_complete_path}_{str_name_gdf_stops}"])
                                    gdf_stops, trips_in_time_interval, stop_times_in_time_interval = compute_routes_trips(feed, str_day, time_vector_OD, config,
                                                                                                                           str_prefix_complete_path,str_name_gdf_stops,str_trip_idx)
                                    gc.collect()                                
                                    # NOTE: Associate Gtfs trips available in the Grid (shape of the city) -> helps estimate the number of trips that go from O -> D
                                    geojson_input_hierarchy, config, grid_idx_2_route_idx, stop_id_2_trip_id, stop_id_2_route_id, grid_idx_2_stop_idx, stop_idx_2_grid_idx, name_stop_idx_2_grid_idx = pipeline_associate_stops_trips_routes_2_grid(config,
                                                                                                                                                                                                                                    stop_times_in_time_interval,
                                                                                                                                                                                                                                    trips_in_time_interval,
                                                                                                                                                                                                                                    gdf_stops,
                                                                                                                                                                                                                                    geojson_input_hierarchy,
                                                                                                                                                                                                                                    str_grid_idx, str_stop_idx, str_trip_idx, str_route_idx,
                                                                                                                                                                                                                                    str_name_stop_id,
                                                                                                                                                                                                                                    type_grid_idx = type_grid_idx,
                                                                                                                                                                                                                                    type_stop_idx = type_stop_idx,
                                                                                                                                                                                                                                    type_trip_idx = type_trip_idx,
                                                                                                                                                                                                                                    type_route_idx = type_route_idx,                                                                                                                                                                                                                    
                                                                                                                                                                                                                                    str_col_n_stops = f"{str_col_n_stops}_{str_t}_{str_t1}",
                                                                                                                                                                                                                                    str_prefix_complete_path = str_prefix_complete_path,
                                                                                                                                                                                                                                    str_dir_output_date = str_dir_output_date,
                                                                                                                                                                                                                                    str_name_stop_2_trip = str_name_stop_2_trip,
                                                                                                                                                                                                                                    str_name_stop_2_route = str_name_stop_2_route,
                                                                                                                                                                                                                                    str_name_grid_2_stop = str_name_grid_2_stop,
                                                                                                                                                                                                                                    str_name_stop_2_grid = str_name_stop_2_grid,
                                                                                                                                                                                                                                    str_name_name_stop_2_grid = str_name_name_stop_2_grid,
                                                                                                                                                                                                                                    str_name_grid_2_route = str_name_grid_2_route,)                
                                    
                                    # NOTE: Add columns bus trip -> differ from OD fluxes
                                    mh.flows = pipeline_associate_route_trips_2_flows(mh.flows,
                                                                                    grid_idx_2_route_idx,
                                                                                    str_col_origin,
                                                                                    str_col_destination,
                                                                                    str_col_n_trips_bus)
                                    # NOTE: Mobility Hierarchy Analysis - Inflows and Outflows for Bus Trips
                                    mh_bus, map_flux_bus,hotspot_2_origin_idx_2_crit_dest_idx_bus, hotspot_in_flows_bus,list_indices_all_fluxes_for_colormap = pipeline_mobility_hierarchy_time_day_type_trips(geojson_input_hierarchy,
                                                                                                                                                    pl.DataFrame(mh.flows),
                                                                                                                                                    str_population_col_grid,
                                                                                                                                                    str_col_comuni_name,                                    # NOTE: Must be present in flows -> index geometry name: str   
                                                                                                                                                    str_col_origin,                                         # NOTE: Must be present in flows -> index geometry origin: int  
                                                                                                                                                    str_col_destination,                                    # NOTE: Must be present in flows -> index geometry destination: int
                                                                                                                                                    str_col_n_trips_bus,                                    # NOTE: Must be present in flows -> numer of trips: int
                                                                                                                                                    str_col_total_in_flows_grid,                            # NOTE: This column is created inside the function
                                                                                                                                                    str_col_total_out_flows_grid,                           # NOTE: This column is created inside the function
                                                                                                                                                    str_hotspot_prefix,
                                                                                                                                                    str_centroid_lat,
                                                                                                                                                    str_centroid_lon,
                                                                                                                                                    str_grid_idx,
                                                                                                                                                    "bus",
                                                                                                                                                    str_t,
                                                                                                                                                    str_t1,
                                                                                                                                                    is_in_flows = is_in_flow_buses,                                                                                                  # NOTE: is_in_flows = True means that we are considering the incoming fluxes to the hotspot
                                                                                                                                                    columns_2_hold_geopandas = columns_2_hold_geopandas_in_bus,
                                                                                                                                                    int_levels = 5)                                
                                    # NOTE: Save the output of the bus hierarchy analysis for Buses
                                    save_output_mobility_hierarchy_dependent_is_in_fluxes(                                   
                                                                                            str_dir_output_date = str_dir_output_date,
                                                                                            map_flux = map_flux_bus,
                                                                                            map_hierarchy = mh_bus.fmap,
                                                                                            user_profile = "bus",
                                                                                            hotspot_levels = hotspot_in_flows_bus,
                                                                                            hotspot_2_origin_idx_2_crit_dest_idx = hotspot_2_origin_idx_2_crit_dest_idx_bus,
                                                                                            str_t = str_t,
                                                                                            str_t1 = str_t1,
                                                                                            is_in_flows = is_in_flow_buses                                                                                                  # NOTE: is_in_flows = True means that we are considering the incoming fluxes to the hotspot
                                                                                                )  
                   
                                    if SAVE_SINGLE_GEOSPATIAL_FILES:
                                        save_output_hierarchy_analysis(config = config,
                                                                    str_dir_output_date = str_dir_output_date,
                                                                    flows = mh_bus.flows,
                                                                    grid = mh_bus.grid[columns_2_hold_geopandas],
                                                                    user_profile = "bus",
                                                                    str_t = str_t,
                                                                    str_t1 = str_t1,
                                                                        )                


                                    # NOTE: Here compute the difference between the number of trips bus and 
                                    # the number of trips in the flows                                    
                                    mh_bus.flows = mh_bus.flows.with_columns((pl.col(str_col_n_trips_bus)* int_number_people_per_bus).alias(str_col_n_users_bus))                                                                 # compute the number of users bus
                                    # Compute difference and filter
                                    # If mh.flows and mh_bus.flows are Pandas DataFrames:
                                    flows_merged_bus = mh.flows.merge(
                                                                mh_bus.flows.to_pandas()[[str_col_origin, str_col_destination, str_col_n_users_bus]],
                                                                on=[str_col_origin, str_col_destination],
                                                                how="left"
                                                            )
                                    flows_merged_bus = pl.DataFrame(flows_merged_bus)
                                    # NOTE: Compute the difference between the number of users bus and the number of trips (from OD) in the flows
                                    flows_merged_bus = flows_merged_bus.with_columns(
                                        (pl.col(str_col_n_users_bus) - pl.col(str_col_n_trips)).alias(str_col_difference_bus)
                                    )
                                    # NOTE: Filter the flows with negative differences (The buses fail to cover the demand at least of 10 people)
                                    flows_negative_bus = flows_merged_bus.filter(pl.col(str_col_difference_bus) < - 10)

                                    # Plot
                                    fmap = plot_negative_differences_interactive(
                                                                                mh.grid, 
                                                                                flows_negative_bus, 
                                                                                str_col_origin, 
                                                                                str_col_destination, 
                                                                                str_col_difference_bus,
                                                                                str_centroid_lat, 
                                                                                str_centroid_lon, 
                                                                                title="Direction with need for Bus Supply"
                                                                                )
                                    fmap.save(os.path.join(str_dir_output_date, f"need_for_bus_{str_t}_{str_t1}_{str_day}.html"))                                                                                     # save the map in the output folder
                                    # Save the map                                  
                                    with open(os.path.join(str_dir_output_date, f"config_{str_t}_{str_t1}.json"), "w") as f:
                                        json.dump(config, f, indent=4)                                                                                     # save the config file with the current configuration
                                    idx_case_bus += 1
                                    global_Tij_holding_all_columns_flows, global_cities_gdf_holding_all_columns_flows = merge_flows_and_grid_with_global_to_obtain_unique_dfs(global_Tij_holding_all_columns_flows = global_Tij_holding_all_columns_flows,
                                                                                                                                                                                flows_2_be_merged = flows_merged,
                                                                                                                                                                                global_cities_gdf_holding_all_columns_flows = global_cities_gdf_holding_all_columns_flows,
                                                                                                                                                                                grid_single_case_2_be_merged = mh.grid,
                                                                                                                                                                                columns_join_global_geopandas = columns_join_global_geopandas,
                                                                                                                                                                                columns_flows_2_be_merged_2_keep = columns_flows_2_be_merged_2_keep,
                                                                                                                                                                                on_columns_flows_2_join = on_colums_flows_2_join,
                                                                                                                                                                                on_columns_grid_2_join = [str_grid_idx],
                                                                                                                                                                                message_geojson = f"{str_day} {str_t}-{str_t1} {user_profile}",
                                                                                                                                                                                message_flows = f"{str_day} {str_t}-{str_t1} {user_profile}",
                                                                                                                                                                                is_join_flows = True,
                                                                                                                                                                                is_join_grid = True
                                                                                                                                                                                )
                                    print(f"Number columns global flows after join {str_t}-{str_t1} bus: ",len(global_Tij_holding_all_columns_flows.columns),f" cities: ",len(global_cities_gdf_holding_all_columns_flows.columns))                                    
                                    
                                    # Memory management: Clear bus analysis variables
                                    del flows_merged_bus, flows_negative_bus, fmap, str_col_difference_bus
                                    del gdf_stops, trips_in_time_interval, stop_times_in_time_interval
                                    del grid_idx_2_route_idx, stop_id_2_trip_id, stop_id_2_route_id
                                    del grid_idx_2_stop_idx, stop_idx_2_grid_idx, name_stop_idx_2_grid_idx
                                    del mh_bus, map_flux_bus
                                    del hotspot_2_origin_idx_2_crit_dest_idx_bus, hotspot_in_flows_bus
                                    gc.collect()

                                    # TODO: Save the routes that needs to be empowered from this criterion
                                    # NOTE: use grid_2_route_idx to get the routes to choose them and create a dictionary with the number of trips to be added.
                                    # NOTE: The number of trips to be added is the absolute value of the difference, so we can use the negative values to understand how many trips are needed.
                            
                            # Memory management: Clear large objects at the end of time interval processing
                            if 'mh' in locals(): 
                                del mh
                            if 'Tij_dist' in locals():
                                del Tij_dist
                            if 'Tij_dist_baseline' in locals():
                                del Tij_dist_baseline
                            if 'hotspot_2_origin_idx_2_crit_dest_idx' in locals():
                                del hotspot_2_origin_idx_2_crit_dest_idx
                            if 'hotspot_in_flows' in locals():
                                del hotspot_in_flows
                            
                            # Close any remaining matplotlib figures
                            safe_close_figures()
                            gc.collect()

                        global_cities_gdf_holding_all_columns_flows.to_file(os.path.join(str_dir_output_date, f"global_cities_gdf_holding_all_columns_flows_{str_day}.geojson"))
                        global_Tij_holding_all_columns_flows.write_parquet(os.path.join(str_dir_output_date, f"global_Tij_holding_all_columns_flows_{str_day}.parquet"))
                    # Memory management: Clear user profile specific variables
                    safe_close_figures()  # Close any figures from visualization
                    gc.collect()
            
                # Memory management: Clear time interval variables  
                if 'time_vector_OD' in locals() and time_vector_OD is not None:
                    del time_vector_OD
                if 'str_time_vector' in locals() and str_time_vector is not None:
                    del str_time_vector
                gc.collect()
                
                # Memory management: Clear day-specific variables
            gc.collect()
        
        # Memory management: Clear file processing variables
        if 'df_presenze' in locals() and df_presenze is not None:
            del df_presenze
        if 'df_od' in locals() and df_od is not None:
            del df_od  
        gc.collect()

# Final memory management: Clear remaining global variables
print("Performing final memory cleanup...")
if 'cities_gdf' in locals():
    del cities_gdf
if 'df_distance_matrix' in locals():
    del df_distance_matrix
if 'direction_matrix' in locals():
    del direction_matrix
if 'distance_matrix' in locals():
    del distance_matrix
if 'Istat_obj' in locals():
    del Istat_obj
if 'feed' in locals():
    del feed
if 'df_presenze_null_days' in locals():
    del df_presenze_null_days
if 'df_od_null_days' in locals():
    del df_od_null_days
if 'data_handler' in locals():
    del data_handler

# Final garbage collection
gc.collect()
print("Memory cleanup completed.")

In [None]:
%matplotlib inline
str_col_n_trips = dict_column_flows[str_day][time_interval[0]][user_profile][is_weekday][suffix_in][f"str_col_n_trips"]
str_col_total_flows_grid = dict_column_grid[str_day][time_interval[0]][user_profile][is_weekday][suffix_in]["str_col_total_flows_grid_hierachical_routine"]
str_caption_colormap = dict_column_flows[str_day][time_interval[0]][user_profile][is_weekday][suffix_in]["str_caption_colormap_flows"]
mask = mh.grid[str_col_total_flows_grid] > 0
grid = mh.grid[mask]
df_flows = Tij_dist.filter(pl.col(str_col_n_trips) > 0)
print(f" - n grid after dropping NaN total flows: {len(grid)}")
# Init Map
center_lat = grid[str_centroid_lat].mean()
center_lon = grid[str_centroid_lon].mean()
fmap = folium.Map(location=[center_lat, center_lon], zoom_start=10, tiles='cartodbpositron')    
grid_idx_2_grid_name = dict(zip(grid[str_grid_idx], grid[str_col_comuni_name]))
# Flatten the nested list structure for Polars filtering
flat_indices = []
for indices_list in list_indices_all_fluxes_for_colormap:
    if isinstance(indices_list, (list, tuple, np.ndarray)):
        flat_indices.extend(indices_list)
    else:
        flat_indices.append(indices_list)

# Remove duplicates and ensure we have a clean list of integers
flat_indices = list(set(int(idx) for idx in flat_indices if idx is not None))

# Filter flows based on direction for colormap computation
if is_in_flows:
    # Filter flows where destination is in our list of flux indices
    filtered_flows = df_flows.filter(pl.col(str_col_destination).is_in(flat_indices))
else:
    # Filter flows where origin is in our list of flux indices
    filtered_flows = df_flows.filter(pl.col(str_col_origin).is_in(flat_indices))

# Create colormap based on number of trips
colormap, min_trips, max_trips = get_colormap_from_df_fluxes_and_col(
    filtered_flows,
    str_col_n_trips,
    caption=str_caption_colormap, 
    colormap=str_colormap
)    
# --- Tooltip fields logic ---
tooltip_fields = []
tooltip_aliases = []
if str_col_comuni_name in grid.columns:
    tooltip_fields.append(str_col_comuni_name)
    tooltip_aliases.append('City:')
if str_col_total_flows_grid and str_col_total_flows_grid in grid.columns:
    tooltip_fields.append(str_col_total_flows_grid)
    tooltip_aliases.append('Total Flows:')

# Add base grid layer (lightly colored)
folium.GeoJson(
    grid,
    name="Grid",
    style_function=lambda feature: {
        'fillColor': 'lightgray',
        'color': 'gray',
        'weight': 1,
        'fillOpacity': 0.2,
        'opacity': 0.5
    }#,
#        tooltip=folium.GeoJsonTooltip(
#            fields=tooltip_fields,
#            aliases=tooltip_aliases,
#            localize=True
#       )
).add_to(fmap)
    
# Create a mapping from grid index to coordinates
grid_coords = {}
for idx, row in grid.iterrows():
    grid_coords[idx] = [row[str_centroid_lat], row[str_centroid_lon]]

# Iterate through levels and draw fluxes
for level, origin_idx_2_crit_dest_idx in hotspot_2_origin_idx_2_crit_dest_idx.items():
    # Create a feature group for this level
    level_group = folium.FeatureGroup(name=f"Level {level} Fluxes")
    
    if is_in_flows:
        # For incoming flows: idx_dest_flux is destination, idces_orig_fluxe are origins
        for idx_dest_flux, idces_orig_fluxe in origin_idx_2_crit_dest_idx.items():
            # Highlight the destination hotspot
            if idx_dest_flux in grid_coords:
                dest_coords = grid_coords[idx_dest_flux]
                folium.CircleMarker(
                    location=dest_coords,
                    radius=10,
                    color='red',
                    fillColor='darkred',
                    fillOpacity=0.8,
                    popup=folium.Popup(
                        f"<b>Destination Hotspot</b><br>"
                        f"Level: {level}<br>"
                        f"Index: {grid_idx_2_grid_name[idx_dest_flux]}<br>"
                        f"# Regions Incoming flow: {len(idces_orig_fluxe)}"
                    )
                ).add_to(level_group)
            
            # Draw lines from each origin to this destination
            for idx_orig_flux in idces_orig_fluxe:
                # Find the specific flow data
                flow_data = df_flows.filter(
                    (pl.col(str_col_origin) == idx_orig_flux) & 
                    (pl.col(str_col_destination) == idx_dest_flux)
                )
                
                if len(flow_data) > 0:
                    n_trips = flow_data[str_col_n_trips].item()
                    
                    # Draw the flux line
                    level_group = draw_flux_line(
                        idx_orig_flux,
                        idx_dest_flux, 
                        level, 
                        n_trips,
                        grid_coords,
                        min_trips,
                        max_trips,
                        level_group,
                        is_in_flows,
                        grid_idx_2_grid_name,
                        colormap
                    )
                
                    # Add origin marker
                    if idx_orig_flux in grid_coords:
                        origin_coords = grid_coords[idx_orig_flux]
                        folium.CircleMarker(
                            location=origin_coords,
                            radius=4,
                            color='blue',
                            fillColor='lightblue',
                            fillOpacity=0.6,
                            popup=folium.Popup(
                                f"<b>Origin Point</b><br>"
                                f"Index: {idx_orig_flux}<br>"
                                f"Contributes to Level {level} hotspot"
                            )
                        ).add_to(level_group)
    
    else:
        # For outgoing flows: idx_orig_flux is origin, idces_dest_fluxe are destinations
        for idx_orig_flux, idces_dest_fluxe in origin_idx_2_crit_dest_idx.items():
            # Highlight the origin hotspot
            if idx_orig_flux in grid_coords:
                origin_coords = grid_coords[idx_orig_flux]
                folium.CircleMarker(
                    location=origin_coords,
                    radius=10,
                    color='green',
                    fillColor='darkgreen',
                    fillOpacity=0.8,
                    popup=folium.Popup(
                        f"<b>Origin Hotspot</b><br>"
                        f"Level: {level}<br>"
                        f"Index: {idx_orig_flux}<br>"
                        f"# Regions Outgoing flows to {len(idces_dest_fluxe)} destinations"
                    )
                ).add_to(level_group)
            
            # Draw lines from this origin to each destination
            for idx_dest_flux in idces_dest_fluxe:
                # Find the specific flow data
                flow_data = df_flows.filter(
                    (pl.col(str_col_origin) == idx_orig_flux) & 
                    (pl.col(str_col_destination) == idx_dest_flux)
                )
                
                if len(flow_data) > 0:
                    n_trips = flow_data[str_col_n_trips].item()
                    
                    # Draw the flux line
                    level_group = draw_flux_line(
                        idx_orig_flux,
                        idx_dest_flux, 
                        level, 
                        n_trips,
                        grid_coords,
                        min_trips,
                        max_trips,
                        level_group,
                        is_in_flows,
                        grid_idx_2_grid_name,
                        colormap
                    )
                
                    # Add destination marker
                    if idx_dest_flux in grid_coords:
                        dest_coords = grid_coords[idx_dest_flux]
                        folium.CircleMarker(
                            location=dest_coords,
                            radius=4,
                            color='orange',
                            fillColor='lightyellow',
                            fillOpacity=0.6,
                            popup=folium.Popup(
                                f"<b>Destination Point</b><br>"
                                f"Index: {idx_dest_flux}<br>"
                                f"Receives from Level {level} hotspot"
                            )
                        ).add_to(level_group)
    
    # Add the level group to the map
    level_group.add_to(fmap)

# Add the colormap legend
colormap.add_to(fmap)

# Add layer control
folium.LayerControl().add_to(fmap)
fmap

# Linking Geography and Network

In [None]:
from network_analysis import build_nk_graph_from_hotspot_dict, plot_nk_graph_geometrical_interactive
G,edges_features = build_nk_graph_from_hotspot_dict(hotspot_2_origin_idx_2_crit_dest_idx,
                                    mh.grid,
                                    mh.flows,
                                    str_col_i = str_col_origin,
                                    str_col_j = str_col_destination, 
                                    str_col_weight = str_col_n_trips)

plot_nk_graph_geometrical_interactive(G,  
                                      mh.grid, 
                                      str_centroid_lat, 
                                      str_centroid_lon)



In [None]:
import networkx as nx
import networkit as nk
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

nxG = nk.nxadapter.nk2nx(G)  # convert from NetworKit.Graph to networkx.Graph
A = nx.to_numpy_array(nxG, weight='weight')   # Get the weighted adjacency matrix

sns.heatmap(A, cmap='viridis', cbar=True, square=True, annot=False, fmt='.1f')
plt.xlabel("Destination Node")
plt.ylabel("Origin Node")
plt.title("Weighted Adjacency Matrix Heatmap")
plt.show()

In [None]:
# Eigenvector centrality
ec = nk.centrality.EigenvectorCentrality(G)
ec.run()
ec.ranking()[:10] # the 10 most central nodes

# Fluxes Hierarchy (Plot Hotspots)

In [None]:
# Create a heatmap of the bus trips matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

def plot_heatmap_flows(flows_df, 
                       str_col_origin, 
                       str_col_destination, 
                       str_col_n_trips_bus, 
                       cbar_kws,
                       title,
                       save_path=None):
    """
    Plot a heatmap of the bus trips matrix.
    """
    # Create a pivot table to transform the data into a matrix format
    bus_trips_matrix = flows_df.pivot(index=str_col_origin, 
                                      columns=str_col_destination, 
                                      values=str_col_n_trips_bus)

    # Fill NaN values with 0 (if there are missing combinations)
    bus_trips_matrix = bus_trips_matrix.fillna(0)

    # Create the heatmap
    plt.figure(figsize=(12, 10))
    sns.heatmap(bus_trips_matrix, 
                annot=True,  # Show values in cells
                fmt='g',     # Format for numbers
                cmap='YlOrRd',  # Color scheme
                cbar_kws={'label': cbar_kws},
                square=True)  # Make cells square

    plt.title(title, 
              fontsize=14, fontweight='bold')
    plt.xlabel('Destination Grid Cell (j)', fontsize=12)
    plt.ylabel('Origin Grid Cell (i)', fontsize=12)
    plt.xticks(rotation=45)
    plt.yticks(rotation=0)
    plt.tight_layout()

    # Print some statistics about the matrix
    print(f"Matrix shape: {bus_trips_matrix.shape}")
    print(f"Total non-zero connections: {(bus_trips_matrix > 0).sum().sum()}")
    print(f"Maximum number of common routes: {bus_trips_matrix.max().max()}")
    print(f"Average number of common routes (non-zero): {bus_trips_matrix[bus_trips_matrix > 0].mean().mean():.2f}")
    print(f"Fraction 2 complete graph: ",(bus_trips_matrix > 0).sum().sum()/(len(bus_trips_matrix)*(len(bus_trips_matrix)-1)/2))
    if save_path:
        plt.savefig(save_path)
        print(f"Heatmap saved to {save_path}")
    else:
        plt.show()
cbar_kws = 'Number of Common Bus Routes'
title = f'Bus Route Intersections Matrix\n{user_profile} - {str_t} to {str_t1}'
plot_heatmap_flows(mh.flows, 
                       str_col_origin, 
                       str_col_destination, 
                       str_col_n_trips_bus, 
                       cbar_kws,
                       title,
                       save_path=None)
cbar_kws = f'Number trips {user_profile}'
title = f'Number trips \n{user_profile} - {str_t} to {str_t1}'
plot_heatmap_flows(mh.flows, 
                       str_col_origin, 
                       str_col_destination, 
                       str_col_n_trips, 
                       cbar_kws,
                       title,
                       save_path=None)


In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
import numpy as np
import pandas as pd
import polars as pl
from matplotlib.colors import LogNorm
from pathlib import Path
import os
import json

def create_daily_od_visualizations_with_animations(df_od, IndexVodafone2UserProfile, base_dir_output, 
                                                  str_period_id_presenze="PERIOD_ID", top_n=10):
    """
    Create and save visualizations AND animations for each day in the dataset.
    Generates both static plots and animations for each day.
    
    Parameters:
    - df_od: DataFrame with OD data
    - IndexVodafone2UserProfile: Dictionary mapping indices to user profiles
    - base_dir_output: Base directory to save outputs
    - str_period_id_presenze: Column name for period/date information
    - top_n: Number of top O-D pairs to show in bar charts
    """
    
    # Convert to Polars if it's pandas
    if isinstance(df_od, pd.DataFrame):
        df_od = pl.from_pandas(df_od)
    
    # Extract date information and create str_day column
    df_od_with_dates = df_od.with_columns([
        pl.col(str_period_id_presenze).map_elements(
            lambda x: extract_date_info(x)[0], 
            return_dtype=pl.Utf8
        ).alias("str_day")
    ])
    
    # Get unique days and hours
    unique_days = sorted(df_od_with_dates["str_day"].unique().to_list())
    unique_hours = sorted(df_od_with_dates["DEPARTURE_HOUR"].unique().to_list())
    
    print(f"Processing {len(unique_days)} days and {len(unique_hours)} hours")
    print(f"Days: {unique_days}")
    print(f"Hours: {unique_hours}")
    
    # Create output directories
    Path(base_dir_output).mkdir(parents=True, exist_ok=True)
    matrix_dir = os.path.join(base_dir_output, "matrix_plots")
    bar_dir = os.path.join(base_dir_output, "bar_plots")
    animation_dir = os.path.join(base_dir_output, "animations")
    data_dir = os.path.join(base_dir_output, "data")
    
    for dir_path in [matrix_dir, bar_dir, animation_dir, data_dir]:
        Path(dir_path).mkdir(parents=True, exist_ok=True)
    
    # Get visitor classes (excluding AGGREGATED)
    visitor_classes = [idx for idx in IndexVodafone2UserProfile.keys() if idx <= 4]
    x_labels = [f"D_{IndexVodafone2UserProfile[idx]}" for idx in visitor_classes]
    y_labels = [f"O_{IndexVodafone2UserProfile[idx]}" for idx in visitor_classes]
    
    # Get overall top O-D pairs for bar charts
    overall_aggregated = df_od_with_dates.group_by([
        "O_VISITOR_CLASS_ID", 
        "D_VISITOR_CLASS_ID"
    ]).agg([
        pl.col("TRIPS").sum().alias("total_trips")
    ]).sort("total_trips", descending=True)
    
    top_pairs = overall_aggregated.head(top_n).to_pandas()
    top_pair_labels = []
    top_pair_keys = []
    
    for _, row in top_pairs.iterrows():
        o_class = int(row['O_VISITOR_CLASS_ID'])
        d_class = int(row['D_VISITOR_CLASS_ID'])
        if o_class in IndexVodafone2UserProfile and d_class in IndexVodafone2UserProfile:
            label = f"O_{IndexVodafone2UserProfile[o_class]} → D_{IndexVodafone2UserProfile[d_class]}"
            top_pair_labels.append(label)
            top_pair_keys.append((o_class, d_class))
    
    # Store all data for saving
    all_matrix_data = []
    all_bar_data = []
    
    # Process each day
    for day in unique_days:
        print(f"Processing day: {day}")
        
        day_data = df_od_with_dates.filter(pl.col("str_day") == day)
        
        if day_data.height == 0:
            print(f"No data for day {day}, skipping...")
            continue
        
        # Prepare data structures for this day
        daily_matrices = []
        daily_bar_data = []
        max_trips_matrix = 0
        max_trips_bar = 0
        
        # Process each hour for this day
        for hour in unique_hours:
            print(f"  Processing hour: {hour}")
            
            # Filter data for this day and hour
            day_hour_data = day_data.filter(pl.col("DEPARTURE_HOUR") == hour)
            
            if day_hour_data.height == 0:
                # Create empty data structures for missing combinations
                matrix = np.zeros((4, 4))
                trips_array = np.zeros(len(top_pair_keys))
            else:
                # === MATRIX DATA PROCESSING ===
                # Group by origin and destination visitor classes and sum trips
                aggregated = day_hour_data.group_by([
                    "O_VISITOR_CLASS_ID", 
                    "D_VISITOR_CLASS_ID"
                ]).agg([
                    pl.col("TRIPS").sum().alias("total_trips")
                ])
                
                aggregated_pd = aggregated.to_pandas()
                
                # Create matrix (4x4 for the 4 main visitor classes)
                matrix = np.zeros((4, 4))
                
                for _, row in aggregated_pd.iterrows():
                    o_class = int(row['O_VISITOR_CLASS_ID']) - 1  # Convert to 0-based indexing
                    d_class = int(row['D_VISITOR_CLASS_ID']) - 1  # Convert to 0-based indexing
                    
                    # Only include valid classes (1-4, converted to 0-3)
                    if 0 <= o_class < 4 and 0 <= d_class < 4:
                        matrix[o_class, d_class] = row['total_trips']
                
                # === BAR DATA PROCESSING ===
                trips_array = np.zeros(len(top_pair_keys))
                
                for _, row in aggregated_pd.iterrows():
                    o_class = int(row['O_VISITOR_CLASS_ID'])
                    d_class = int(row['D_VISITOR_CLASS_ID'])
                    
                    if (o_class, d_class) in top_pair_keys:
                        idx = top_pair_keys.index((o_class, d_class))
                        trips_array[idx] = row['total_trips']
            
            # Store data for animations
            daily_matrices.append(matrix)
            daily_bar_data.append(trips_array)
            max_trips_matrix = max(max_trips_matrix, matrix.max())
            max_trips_bar = max(max_trips_bar, trips_array.max())
            
            # === SAVE STATIC PLOTS ===
            create_and_save_matrix_plot(matrix, day, hour, x_labels, y_labels, 
                                      matrix_dir, IndexVodafone2UserProfile)
            
            create_and_save_bar_plot(trips_array, day, hour, top_pair_labels, 
                                    bar_dir, top_n)
            
            # === STORE DATA FOR PARQUET FILES ===
            # Store matrix data
            for i in range(4):
                for j in range(4):
                    if matrix[i, j] > 0:  # Only store non-zero values
                        all_matrix_data.append({
                            'day': day,
                            'hour': hour,
                            'origin_class': i + 1,  # Convert back to 1-based indexing
                            'destination_class': j + 1,
                            'origin_class_name': IndexVodafone2UserProfile[i + 1],
                            'destination_class_name': IndexVodafone2UserProfile[j + 1],
                            'trips': int(matrix[i, j])
                        })
            
            # Store bar data
            for idx, (o_class, d_class) in enumerate(top_pair_keys):
                if trips_array[idx] > 0:  # Only store non-zero values
                    all_bar_data.append({
                        'day': day,
                        'hour': hour,
                        'origin_class': o_class,
                        'destination_class': d_class,
                        'origin_class_name': IndexVodafone2UserProfile[o_class],
                        'destination_class_name': IndexVodafone2UserProfile[d_class],
                        'trips': int(trips_array[idx]),
                        'pair_label': top_pair_labels[idx]
                    })
        
        # === CREATE ANIMATIONS FOR THIS DAY ===
        if len(daily_matrices) > 1:  # Only create animation if we have multiple time points
            print(f"  Creating animations for day: {day}")
            
            # Create matrix animation
            create_matrix_animation(daily_matrices, day, unique_hours, x_labels, y_labels, 
                                  animation_dir, IndexVodafone2UserProfile, max_trips_matrix)
            
            # Create bar animation
            create_bar_animation(daily_bar_data, day, unique_hours, top_pair_labels, 
                               animation_dir, top_n, max_trips_bar)
    
    # === SAVE DATA TO PARQUET FILES ===
    if all_matrix_data:
        matrix_df = pl.DataFrame(all_matrix_data)
        matrix_df.write_parquet(os.path.join(data_dir, "od_matrix_data_daily_hourly.parquet"))
        print(f"Saved matrix data: {len(all_matrix_data)} records")
    
    if all_bar_data:
        bar_df = pl.DataFrame(all_bar_data)
        bar_df.write_parquet(os.path.join(data_dir, "od_top_pairs_data_daily_hourly.parquet"))
        print(f"Saved bar data: {len(all_bar_data)} records")
    
    # === SAVE METADATA ===
    metadata = {
        'unique_days': unique_days,
        'unique_hours': unique_hours,
        'visitor_classes': list(IndexVodafone2UserProfile.items()),
        'top_pairs': [(k, v) for k, v in zip(top_pair_keys, top_pair_labels)],
        'total_matrix_plots': len(unique_days) * len(unique_hours),
        'total_bar_plots': len(unique_days) * len(unique_hours),
        'total_animations': len(unique_days) * 2,  # 2 animations per day (matrix + bar)
        'processing_date': pd.Timestamp.now().isoformat()
    }
    
    with open(os.path.join(data_dir, "metadata.json"), "w") as f:
        json.dump(metadata, f, indent=2)
    
    print(f"\nProcessing complete!")
    print(f"Matrix plots saved to: {matrix_dir}")
    print(f"Bar plots saved to: {bar_dir}")
    print(f"Animations saved to: {animation_dir}")
    print(f"Data files saved to: {data_dir}")
    
    return all_matrix_data, all_bar_data, metadata

def create_matrix_animation(daily_matrices, day, unique_hours, x_labels, y_labels, 
                           animation_dir, IndexVodafone2UserProfile, max_trips):
    """Create and save matrix animation for a single day."""
    
    fig, ax = plt.subplots(figsize=(12, 10))
    
    # Initialize the heatmap
    im = ax.imshow(daily_matrices[0], cmap='YlOrRd', aspect='auto', 
                   vmin=0, vmax=max_trips if max_trips > 0 else 1)
    
    # Add colorbar
    cbar = plt.colorbar(im, ax=ax)
    cbar.set_label('Total Trips', rotation=270, labelpad=20)
    
    # Set labels
    ax.set_xticks(range(4))
    ax.set_yticks(range(4))
    ax.set_xticklabels(x_labels, rotation=45)
    ax.set_yticklabels(y_labels)
    ax.set_xlabel('Destination Visitor Class')
    ax.set_ylabel('Origin Visitor Class')
    
    # Add grid for better readability
    ax.set_xticks(np.arange(-0.5, 4, 1), minor=True)
    ax.set_yticks(np.arange(-0.5, 4, 1), minor=True)
    ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5, alpha=0.3)
    
    # Add text annotations for values
    text_objects = []
    for i in range(4):
        row_texts = []
        for j in range(4):
            text = ax.text(j, i, '', ha='center', va='center', 
                          color='black', fontweight='bold', fontsize=8)
            row_texts.append(text)
        text_objects.append(row_texts)
    
    title = ax.set_title('')
    
    def animate(frame):
        hour = unique_hours[frame]
        matrix = daily_matrices[frame]
        
        # Update the image data
        im.set_array(matrix)
        
        # Update text annotations
        for i in range(4):
            for j in range(4):
                value = matrix[i, j]
                if value > 0:
                    # Format large numbers with K/M notation
                    if value >= 1000000:
                        text_objects[i][j].set_text(f'{value/1000000:.1f}M')
                    elif value >= 1000:
                        text_objects[i][j].set_text(f'{value/1000:.1f}K')
                    else:
                        text_objects[i][j].set_text(f'{int(value)}')
                else:
                    text_objects[i][j].set_text('')
        
        # Update title
        title.set_text(f'Daily Trips by Visitor Class - {day}, Hour: {hour}:00')
        
        return [im] + [text for row in text_objects for text in row] + [title]
    
    # Create animation
    anim = animation.FuncAnimation(fig, animate, frames=len(daily_matrices), 
                                 interval=1000, blit=True, repeat=True)
    
    plt.tight_layout()
    
    # Save animation
    gif_path = os.path.join(animation_dir, f"matrix_animation_{day}.gif")
    mp4_path = os.path.join(animation_dir, f"matrix_animation_{day}.mp4")
    
    try:
        anim.save(gif_path, writer='pillow', fps=1)
        print(f"  Saved matrix animation (GIF): {gif_path}")
    except Exception as e:
        print(f"  Could not save GIF: {e}")
    
    try:
        anim.save(mp4_path, writer='ffmpeg', fps=1)
        print(f"  Saved matrix animation (MP4): {mp4_path}")
    except Exception as e:
        print(f"  Could not save MP4: {e}")
    
    plt.close()
    return anim

def create_bar_animation(daily_bar_data, day, unique_hours, top_pair_labels, 
                        animation_dir, top_n, max_trips):
    """Create and save bar animation for a single day."""
    
    fig, ax = plt.subplots(figsize=(14, 8))
    
    # Create initial bar plot
    x_pos = np.arange(len(top_pair_labels))
    bars = ax.bar(x_pos, daily_bar_data[0], alpha=0.7)
    
    # Customize the plot
    ax.set_xlabel('Top Origin → Destination Visitor Class Pairs', fontsize=12)
    ax.set_ylabel('Total Trips', fontsize=12)
    ax.set_ylim(0, max_trips * 1.2 if max_trips > 0 else 1)
    ax.set_xticks(x_pos)
    ax.set_xticklabels(top_pair_labels, rotation=45, ha='right')
    ax.grid(axis='y', alpha=0.3)
    
    # Add value labels on bars
    value_texts = []
    for bar in bars:
        text = ax.text(bar.get_x() + bar.get_width()/2., 
                      bar.get_height() + max_trips * 0.02 if max_trips > 0 else 0.02,
                      '', ha='center', va='bottom', 
                      fontsize=9, fontweight='bold')
        value_texts.append(text)
    
    title = ax.set_title('')
    
    def animate(frame):
        hour = unique_hours[frame]
        data = daily_bar_data[frame]
        
        # Update bar heights and colors
        for bar, value in zip(bars, data):
            bar.set_height(value)
            
            # Color bars based on value (gradient from light to dark)
            if max_trips > 0:
                intensity = min(1.0, value / (max_trips * 0.7))
                bar.set_color(plt.cm.YlOrRd(intensity))
            else:
                bar.set_color('lightgray')
        
        # Update value labels
        for bar, text, value in zip(bars, value_texts, data):
            if value > 0:
                if value >= 1000000:
                    label = f'{value/1000000:.1f}M'
                elif value >= 1000:
                    label = f'{value/1000:.0f}K'
                else:
                    label = f'{int(value)}'
                text.set_text(label)
                text.set_position((bar.get_x() + bar.get_width()/2., 
                                 bar.get_height() + (max_trips * 0.02 if max_trips > 0 else 0.02)))
            else:
                text.set_text('')
        
        # Update title
        title.set_text(f'Top {len(top_pair_labels)} O-D Pairs - {day}, Hour: {hour}:00')
        
        # Return as list (fix for animation)
        return list(bars) + value_texts + [title]
    
    # Create animation
    anim = animation.FuncAnimation(fig, animate, frames=len(daily_bar_data), 
                                 interval=1200, blit=True, repeat=True)
    
    plt.tight_layout()
    
    # Save animation
    gif_path = os.path.join(animation_dir, f"bars_animation_{day}.gif")
    mp4_path = os.path.join(animation_dir, f"bars_animation_{day}.mp4")
    
    try:
        anim.save(gif_path, writer='pillow', fps=0.8)
        print(f"  Saved bar animation (GIF): {gif_path}")
    except Exception as e:
        print(f"  Could not save GIF: {e}")
    
    try:
        anim.save(mp4_path, writer='ffmpeg', fps=0.8)
        print(f"  Saved bar animation (MP4): {mp4_path}")
    except Exception as e:
        print(f"  Could not save MP4: {e}")
    
    plt.close()
    return anim

# Keep the existing static plot creation functions
def create_and_save_matrix_plot(matrix, day, hour, x_labels, y_labels, 
                               output_dir, IndexVodafone2UserProfile):
    """Create and save a single matrix heatmap plot."""
    
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Create heatmap
    im = ax.imshow(matrix, cmap='YlOrRd', aspect='auto', 
                   vmin=0, vmax=matrix.max() if matrix.max() > 0 else 1)
    
    # Add colorbar
    cbar = plt.colorbar(im, ax=ax)
    cbar.set_label('Total Trips', rotation=270, labelpad=20)
    
    # Set labels
    ax.set_xticks(range(4))
    ax.set_yticks(range(4))
    ax.set_xticklabels(x_labels, rotation=45)
    ax.set_yticklabels(y_labels)
    ax.set_xlabel('Destination Visitor Class')
    ax.set_ylabel('Origin Visitor Class')
    
    # Add grid for better readability
    ax.set_xticks(np.arange(-0.5, 4, 1), minor=True)
    ax.set_yticks(np.arange(-0.5, 4, 1), minor=True)
    ax.grid(which='minor', color='gray', linestyle='-', linewidth=0.5, alpha=0.3)
    
    # Add text annotations for values
    for i in range(4):
        for j in range(4):
            value = matrix[i, j]
            if value > 0:
                # Format large numbers with K/M notation
                if value >= 1000000:
                    text = f'{value/1000000:.1f}M'
                elif value >= 1000:
                    text = f'{value/1000:.1f}K'
                else:
                    text = f'{int(value)}'
                    
                ax.text(j, i, text, ha='center', va='center', 
                       color='black', fontweight='bold', fontsize=8)
    
    # Set title
    ax.set_title(f'Daily Trips by Visitor Class - {day}, Hour: {hour}:00', 
                fontsize=12, fontweight='bold')
    
    plt.tight_layout()
    
    # Save plot
    filename = f"matrix_{day}_hour_{hour:02d}.png"
    filepath = os.path.join(output_dir, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()
    
    return filepath

def create_and_save_bar_plot(trips_array, day, hour, top_pair_labels, 
                           output_dir, top_n):
    """Create and save a single bar plot."""
    
    fig, ax = plt.subplots(figsize=(14, 8))
    
    # Create bar plot
    x_pos = np.arange(len(top_pair_labels))
    max_trips = trips_array.max() if trips_array.max() > 0 else 1
    
    bars = ax.bar(x_pos, trips_array, alpha=0.7)
    
    # Color bars based on value (gradient from light to dark)
    for bar, value in zip(bars, trips_array):
        if max_trips > 0:
            intensity = min(1.0, value / (max_trips * 0.7))
            bar.set_color(plt.cm.YlOrRd(intensity))
        else:
            bar.set_color('lightgray')
    
    # Customize the plot
    ax.set_xlabel('Top Origin → Destination Visitor Class Pairs', fontsize=12)
    ax.set_ylabel('Total Trips', fontsize=12)
    ax.set_ylim(0, max_trips * 1.2)
    ax.set_xticks(x_pos)
    ax.set_xticklabels(top_pair_labels, rotation=45, ha='right')
    ax.grid(axis='y', alpha=0.3)
    
    # Add value labels on bars
    for bar, value in zip(bars, trips_array):
        if value > 0:
            if value >= 1000000:
                label = f'{value/1000000:.1f}M'
            elif value >= 1000:
                label = f'{value/1000:.0f}K'
            else:
                label = f'{int(value)}'
            
            ax.text(bar.get_x() + bar.get_width()/2., 
                   bar.get_height() + max_trips * 0.02,
                   label, ha='center', va='bottom', 
                   fontsize=9, fontweight='bold')
    
    # Set title
    ax.set_title(f'Top {len(top_pair_labels)} O-D Pairs - {day}, Hour: {hour}:00',
                fontsize=12, fontweight='bold')
    
    plt.tight_layout()
    
    # Save plot
    filename = f"bars_{day}_hour_{hour:02d}.png"
    filepath = os.path.join(output_dir, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()
    
    return filepath

def create_summary_visualizations(base_dir_output, all_matrix_data, all_bar_data):
    """Create summary visualizations across all days and hours."""
    
    summary_dir = os.path.join(base_dir_output, "summary_plots")
    Path(summary_dir).mkdir(parents=True, exist_ok=True)
    
    if all_matrix_data:
        matrix_df = pl.DataFrame(all_matrix_data)
        
        # === DAILY TOTALS HEATMAP ===
        daily_totals = matrix_df.group_by(['day', 'origin_class', 'destination_class']).agg([
            pl.col('trips').sum().alias('daily_trips')
        ])
        
        # Convert to pivot table for heatmap
        pivot_data = daily_totals.to_pandas().pivot_table(
            values='daily_trips', 
            index=['origin_class', 'destination_class'], 
            columns='day', 
            fill_value=0
        )
        
        # Create heatmap
        fig, ax = plt.subplots(figsize=(20, 12))
        sns.heatmap(pivot_data, annot=True, fmt='g', cmap='YlOrRd', 
                   cbar_kws={'label': 'Total Daily Trips'})
        plt.title('Daily Trips by O-D Pairs Across All Days', fontsize=14, fontweight='bold')
        plt.xlabel('Day', fontsize=12)
        plt.ylabel('Origin-Destination Class Pairs', fontsize=12)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(os.path.join(summary_dir, "daily_totals_heatmap.png"), 
                   dpi=300, bbox_inches='tight')
        plt.close()
        
        # === HOURLY PATTERNS ===
        hourly_totals = matrix_df.group_by(['hour', 'origin_class', 'destination_class']).agg([
            pl.col('trips').sum().alias('hourly_trips')
        ])
        
        hourly_pivot = hourly_totals.to_pandas().pivot_table(
            values='hourly_trips',
            index=['origin_class', 'destination_class'],
            columns='hour',
            fill_value=0
        )
        
        fig, ax = plt.subplots(figsize=(16, 10))
        sns.heatmap(hourly_pivot, annot=True, fmt='g', cmap='YlOrRd',
                   cbar_kws={'label': 'Total Hourly Trips'})
        plt.title('Hourly Trip Patterns by O-D Pairs', fontsize=14, fontweight='bold')
        plt.xlabel('Hour of Day', fontsize=12)
        plt.ylabel('Origin-Destination Class Pairs', fontsize=12)
        plt.tight_layout()
        plt.savefig(os.path.join(summary_dir, "hourly_patterns_heatmap.png"),
                   dpi=300, bbox_inches='tight')
        plt.close()
    
    print(f"Summary visualizations saved to: {summary_dir}")

# === MAIN EXECUTION FUNCTION ===
def run_complete_od_analysis_with_animations(df_od, IndexVodafone2UserProfile, base_dir_output_save, 
                                            str_period_id_presenze="PERIOD_ID", top_n=10):
    """
    Main function to run the complete daily O-D analysis with animations.
    """
    
    print("Starting comprehensive O-D visualization analysis with animations...")
    
    # Create daily visualizations and animations
    all_matrix_data, all_bar_data, metadata = create_daily_od_visualizations_with_animations(
        df_od=df_od,
        IndexVodafone2UserProfile=IndexVodafone2UserProfile,
        base_dir_output=base_dir_output_save,
        str_period_id_presenze=str_period_id_presenze,
        top_n=top_n
    )
    
    # Create summary visualizations
    create_summary_visualizations(base_dir_output_save, all_matrix_data, all_bar_data)
    
    print("Complete analysis with animations finished!")
    return all_matrix_data, all_bar_data, metadata

# === USAGE EXAMPLE ===
# Set up output directory
base_dir_output_save = os.path.join(config[str_dir_output], "complete_od_analysis_with_animations")

# Run the complete analysis with animations
all_matrix_data, all_bar_data, metadata = run_complete_od_analysis_with_animations(
    df_od=df_od,
    IndexVodafone2UserProfile=IndexVodafone2UserProfile,
    base_dir_output_save=base_dir_output_save,
    str_period_id_presenze=str_period_id_presenze,
    top_n=12  # Show top 12 O-D pairs in bar charts
)

print("Complete O-D analysis with animations completed successfully!")
print(f"Results saved in: {base_dir_output_save}")