# Urban (mal)adaptation index

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
from matplotlib.lines import Line2D
import seaborn as sns
from tqdm import tqdm
from dateutil.relativedelta import relativedelta
from utils import *

import warnings
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)

matplotlib.rcParams['figure.dpi'] = 70

In [None]:
figure_formats = ['svg', 'eps', 'pdf', 'png']

## Load data

In [None]:
# Load district shapefile
districts_shapefile = gpd.read_file('../data/processed/cbs/wijk_buurt_kaart/districts.json')

# Define number of runs to load
n_runs = 10

# Function to process scenario data
def process_scenario(scenario_name, n_runs):
    """Process infection data for a given scenario"""
    
    # List all files corresponding to experimental setup
    file_paths = find_files(f'../results/{scenario_name}', 'infectedPersons.csv')
    print(f"\n{scenario_name.upper()} - Number of experiments: {len(file_paths)}")
    
    # Extract experiment names
    experiment_names = [os.path.basename(os.path.dirname(
        file_path)) for file_path in file_paths][:n_runs]
    
    # Store start and end dates to find the smallest and largest
    start_end_dates = {}
    
    print(f'Loading {scenario_name} experiment results...')
    for experiment_name, file_path in tqdm(zip(experiment_names, file_paths[:n_runs]), total=n_runs):
        # Load experiment results
        results = pd.read_csv(file_path)
        
        # Add date_time column
        results['date_time'] = results['Time(h)'].apply(lambda x: relativedelta(
            years=50, months=2) + datetime.datetime(*time.gmtime(x * 3600)[:6]))
        
        # Get the start and end dates of the experiment
        experiment_start = results['date_time'].min().date()
        experiment_end = results['date_time'].max().date()
        
        # Store start and end dates
        start_end_dates[experiment_name] = (experiment_start, experiment_end)
    
    # Find the smallest and largest start and end dates
    min_start = min(start for start, _ in start_end_dates.values())
    max_end = max(end for _, end in start_end_dates.values())
    print(f"Max date: {max_end}")
    print(f"Min date: {min_start}")
    
    print(f"Smallest start date: {min_start}")
    print(f"Largest end date: {max_end}")
    
    # Define complete dates
    complete_dates = pd.date_range(start=min_start, end=max_end, freq='d')
    
    # Store infections in dictionaries
    all_infections = {}
    infections_by_day = {}
    district_infections_by_day = {}
    resident_visitor_infections_by_day = {}
    district_resident_visitor_infections_by_day = {}
    district_resident_visitor_infections_by_day_location = {}
    
    # Process each file
    print(f'Processing {scenario_name} infections...')
    for experiment_name, file_path in tqdm(zip(experiment_names, file_paths[:n_runs]), total=n_runs):
        # Load experiment results
        results = pd.read_csv(file_path)
        
        # Convert to GeoDataFrame with infection location as geometry
        results = gpd.GeoDataFrame(results, geometry=gpd.points_from_xy(
            results['infectLocationLon'], results['infectLocationLat']))
        
        # Add date_time column
        results['date_time'] = results['Time(h)'].apply(lambda x: relativedelta(
            years=50, months=2) + datetime.datetime(*time.gmtime(x * 3600)[:6]))
        
        # Add residence and infection districts to the data
        results = results.pipe(assign_residence_district, districts_shapefile)\
                         .pipe(assign_infection_district, districts_shapefile)
        
        # Add dummy variable indicating infection
        results['infection'] = 1
        
        # Add infection_type column based on whether infection occurred in district of residence
        results['infection_type'] = 'Visitor'  # Default to visitor
        
        # If infection district matches residence district, mark as resident
        mask = results['infection_district_name'] == results['residence_district_name']
        results.loc[mask, 'infection_type'] = 'Resident'
        
        # 1. Store all results
        all_infections[experiment_name] = results
        
        # 2. CALCULATE INFECTIONS BY DAY
        results_grouped = results.groupby(results['date_time'].dt.date)['infection'].sum()
        results_grouped = results_grouped.reindex(complete_dates, fill_value=0)
        infections_by_day[experiment_name] = results_grouped
        
        # 3. DIFFERENTIATE BETWEEN INFECTIONS OF RESIDENTS AND VISITORS
        results_grouped = results.groupby(['infection_type', results['date_time'].dt.date])['infection'].sum()
        results_grouped = results_grouped.unstack(level=0)
        results_grouped = results_grouped.reindex(complete_dates, fill_value=0)
        results_grouped = results_grouped.stack().swaplevel(0, 1).sort_index()
        resident_visitor_infections_by_day[experiment_name] = results_grouped
        
        # 4. CALCULATE INFECTIONS BY DAY FOR THE DISTRICTS OF INTEREST
        results_grouped = results.groupby(['infection_district_name', results['date_time'].dt.date])['infection'].sum()
        results_grouped = results_grouped.unstack(level=0)
        results_grouped = results_grouped.reindex(complete_dates, fill_value=0)
        results_grouped = results_grouped.stack().swaplevel(0, 1).sort_index()
        district_infections_by_day[experiment_name] = results_grouped
        
        # 5. CALCULATE INFECTIONS BY TYPE AND DISTRICT
        results_grouped = results.groupby(['infection_district_name', 'infection_type', 
                                          results['date_time'].dt.date])['infection'].sum()
        results_grouped = results_grouped.unstack(level=2)
        results_grouped = results_grouped.reindex(columns=complete_dates, fill_value=0)
        results_grouped = results_grouped.stack()
        district_resident_visitor_infections_by_day[experiment_name] = results_grouped
        
        # 6. INFECTIONS BY LOCATION AND TYPE AND DISTRICT
        results_grouped = results.groupby(['infection_district_name', 'infection_type',
                                          'infectLocationType', results['date_time'].dt.date])['infection'].sum()
        results_grouped = results_grouped.unstack(level=3)
        results_grouped = results_grouped.reindex(columns=complete_dates, fill_value=0)
        results_grouped = results_grouped.stack()
        district_resident_visitor_infections_by_day_location[experiment_name] = results_grouped
    
    # Concatenate all results
    infections_by_day_df = pd.concat(infections_by_day.values(), axis=1)
    infections_by_day_df.columns = experiment_names
    
    resident_visitor_infections_by_day_df = pd.concat(resident_visitor_infections_by_day.values(), axis=1)
    resident_visitor_infections_by_day_df.columns = experiment_names
    
    district_infections_by_day_df = pd.concat(district_infections_by_day.values(), axis=1)
    district_infections_by_day_df.columns = experiment_names
    
    district_resident_visitor_infections_by_day_df = pd.concat(district_resident_visitor_infections_by_day.values(), axis=1)
    district_resident_visitor_infections_by_day_df.columns = experiment_names
    
    district_resident_visitor_infections_by_day_location_df = pd.concat(district_resident_visitor_infections_by_day_location.values(), axis=1)
    district_resident_visitor_infections_by_day_location_df.columns = experiment_names
    
    return {
        'all_infections': all_infections,
        'infections_by_day': infections_by_day_df,
        'resident_visitor_infections_by_day': resident_visitor_infections_by_day_df,
        'district_infections_by_day': district_infections_by_day_df,
        'district_resident_visitor_infections_by_day': district_resident_visitor_infections_by_day_df,
        'district_resident_visitor_infections_by_day_location': district_resident_visitor_infections_by_day_location_df
    }

# Process both scenarios
baseline_results = process_scenario('baseline', n_runs)
maladaptation_results = process_scenario('lockdown', n_runs)

# Extract results for baseline
all_infections_baseline = baseline_results['all_infections'].copy() 
infections_by_day_baseline = baseline_results['infections_by_day'].copy()
resident_visitor_infections_by_day_baseline = baseline_results[
    'resident_visitor_infections_by_day'].copy()
district_infections_by_day_baseline = baseline_results['district_infections_by_day'].copy()
district_resident_visitor_infections_by_day_baseline = baseline_results[
    'district_resident_visitor_infections_by_day'].copy()
district_resident_visitor_infections_by_day_location_baseline = baseline_results[
    'district_resident_visitor_infections_by_day_location'].copy()

# Extract results for maladaptation
all_infections_maladaptation = maladaptation_results['all_infections'].copy()
infections_by_day_maladaptation = maladaptation_results['infections_by_day'].copy()
resident_visitor_infections_by_day_maladaptation = maladaptation_results[
    'resident_visitor_infections_by_day'].copy()
district_infections_by_day_maladaptation = maladaptation_results['district_infections_by_day'].copy()
district_resident_visitor_infections_by_day_maladaptation = maladaptation_results[
    'district_resident_visitor_infections_by_day'].copy()
district_resident_visitor_infections_by_day_location_maladaptation = maladaptation_results[
    'district_resident_visitor_infections_by_day_location'].copy()

In [None]:
central_color = '#FF5C00'
outer_color = '#00CAB1'

def district_daily_totals(df: pd.DataFrame,
                           start: str | None = None, end: str | None = None,
                           agg: str = "mean") -> pd.DataFrame:
    """
    Returns date×district totals aggregated over runs.
    agg: 'mean' (default) or 'median'
    """
    df.index = df.index.set_levels(pd.to_datetime(df.index.levels[1]), level=1)
    df = df.sort_index()

    # Optional window
    if start or end:
        df = df.loc[pd.IndexSlice[:, slice(pd.to_datetime(start) if start else None,
                                             pd.to_datetime(end) if end else None)], :]

    # Aggregate over runs per (district,date)
    if agg == "mean":
        totals = df.mean(axis=1, skipna=True)
    elif agg == "median":
        totals = df.median(axis=1, skipna=True)
    else:
        raise ValueError("agg must be 'mean', 'median', or 'sum'.")

    totals = totals.unstack(level=0).sort_index()  # date × district
    return totals.fillna(0.0)

def shares_from_totals(totals: pd.DataFrame) -> pd.DataFrame:
    """Row-normalize totals to shares per day (safe divide)."""
    row_sum = totals.sum(axis=1).replace(0, np.nan)
    shares = totals.div(row_sum, axis=0).fillna(0.0)
    return shares

# ---------- 2) Ranks and rank changes over time ----------
def rank_change_over_time(df_baseline: pd.DataFrame,
                          df_policy: pd.DataFrame,
                          use_shares: bool = False,
                          start: str | None = None,
                          end: str | None = None):
    """
    Returns:
      dates, shares_b (date×district), shares_p, ranks_b, ranks_p, delta_rank (date×district), delta_rank_cum
    """
    Tb = district_daily_totals(df_baseline, start, end)
    Tp = district_daily_totals(df_policy,   start, end)

    # align on dates and districts
    Tb, Tp = Tb.align(Tp, join="inner", axis=0)
    Tb, Tp = Tb.align(Tp, join="inner", axis=1)

    if use_shares:
        sb = shares_from_totals(Tb)
        sp = shares_from_totals(Tp)
    else:
        sb = Tb
        sp = Tp

    # ranks each day (1 = highest share). 'min' keeps ties stable at best rank.
    rb = sb.rank(axis=1, ascending=False, method='min')
    rp = sp.rank(axis=1, ascending=False, method='min')

    # positive means worsening (moved toward top under policy)
    delta_rank = rb - rp
    delta_rank_cum = delta_rank.expanding(min_periods=1).mean()

    return sb.index, sb, sp, rb, rp, delta_rank, delta_rank_cum

def plot_rank(
    dates, delta_rank, delta_rank_cum,
    focus_districts: list[str],
    district_labels: dict[str, str],
):
    """
    Rank change (Δrank = r_baseline − r_policy).
    """
    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    

    for d in focus_districts:
        label = district_labels.get(d, d)
        color = central_color if label == "Central" else outer_color
        ax.plot(dates, delta_rank[d], lw=1.2, ls="--", alpha=0.9, color=color, label=f"{label} Δrank (daily)")
        line = ax.plot(dates, delta_rank_cum[d], lw=2.2, alpha=1.0, color=color, label=f"{label} Δrank (cum)")
        
        # Add annotation for adaptation/maladaptation
        if label == "Central":
            ax.annotate("Adaptation", xy=(dates[-1], delta_rank_cum[d].iloc[-1]),
                       xytext=(10, 0), textcoords='offset points',
                       fontsize=9, color=color, va='center')
        else:
            ax.annotate("Maladaptation", xy=(dates[-1], delta_rank_cum[d].iloc[-1]),
                       xytext=(10, 0), textcoords='offset points',
                       fontsize=9, color=color, va='center')

    ax.axhline(0, ls="--", lw=1, color="black", alpha=0.6)
    ax.set_xlabel("Simulation time (date)")
    ax.set_ylabel(r"$\Delta r$ = $r_{\mathrm{No\text{-}response}} - r_{\mathrm{Hard\ lockdown}}$")

    ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d"))
    ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right")
    ax.spines[["top", "right"]].set_visible(False)
    ax.legend(frameon=False, ncol=1)

    fig.tight_layout()

district1 = "Wijk 28 Centrum"     # Central
district2 = "Wijk 42 Ypenburg"    # Outer residential
focus = [district1, district2]

# Create display labels for the districts
district_labels = {
    "Wijk 28 Centrum": "Central",
    "Wijk 42 Ypenburg": "Outer residential"
}

dates, sb, sp, rb, rp, dr, drcum = rank_change_over_time(
    district_infections_by_day_baseline, 
    district_infections_by_day_maladaptation,
    start=None, end=None
)

plot_rank(dates, dr, drcum, focus, district_labels)
for fmt in figure_formats:
    plt.savefig(f"../figures/fig7.{fmt}", format=fmt, dpi=300, bbox_inches='tight')

summary_df = pd.DataFrame({
    "Final_UAI_rank": drcum.iloc[-1],
    "Initial_rank": rb.iloc[0],
    "Final_rank": rp.iloc[-1]
}).sort_values("Final_UAI_rank", ascending=False)