In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

start_date = "2020-03-02"
manhole_names = {
    "MUC560": "N_Ua", # Wintersteinstr.", # Hasenbergl
    "MUC348": "N_Ub", # "Schmidbartlanger",
    "MUC012": "N_Uc", # "Am Langwieder Bach",
    "MUC112": "C_U", # Botanischer Garten/Menzinger Str.
    
    "MUC060": "S_Ua", # Gräfelfinger Str. / Waldwiesenstr.
    "MUC612": "S_Ub", # Neue Messstelle 1
    "MUC616": "S_M1", # Neue Messstelle 4
    "MUC600": "S_M2", # "Leopoldstr.",
    
    "MUC608": "S_M3", # "Gyßlinger Becken",
    "MUC362": "S_M4", # "Schenkendorfstr.",
    "MUC614": "E_U", # Neue Messstelle 2
    "MUC494": "E_M", # "Savitstr.",

    "MUC562": "N_D", # "WWT Gut Marienhof",
    "MUC434": "SCE_D1", # "Zulauf Gut Großlappen",
    "MUC596": "SCE_D2", # "WWT Gut Großlappen",    
    "MUC586": "Overall",}

In [None]:
import scipy.stats as stats

In [None]:
experiment_series ="pop8"
precipitation_setting = "Rain"
decay_setting = "decay"

In [None]:
result_path = f"preprocessed_data/{experiment_series}"

df_simulations = pd.read_csv(f"{result_path}/substances/{decay_setting}_{precipitation_setting}_output.csv")
df_simulations = df_simulations.loc[df_simulations["variable"]=="COV19"]
df_simulations["Location"] = df_simulations.manhole.apply(lambda x: manhole_names[x] if x in manhole_names else x)
df_simulations["Date"] = pd.to_datetime(start_date) + pd.to_timedelta(df_simulations["time_in_minutes"], unit="min")

df_measurements = pd.read_csv(f"{result_path}/../concentration_measurements.csv")

# df_simulations = df_simulations.loc[df_simulations["Location"].isin(df_measurements["Location"].unique())]

In [None]:
df_measurements["sampling_timepoint"] = pd.to_datetime(df_measurements.Date)
df_measurements.rename(columns={"Value": "value"}, inplace=True)

In [None]:
# correct values according to feedback provided by Jasmin
df_measurements.loc[(df_measurements.Date=="2020-04-28")& (df_measurements.Location_Clear!="Savitstr."), "value"] = 37.5

In [None]:
df_measurements = df_measurements.loc[df_measurements.value>75]

In [None]:
df_measurements.sort_values("Date")

In [None]:
df_simulations.Hour = df_simulations.Date.dt.hour
df_simulations = df_simulations.loc[(df_simulations.Hour<=11)&(df_simulations.Hour>=9)]
df_simulations.Date = pd.to_datetime(df_simulations.Date.dt.date)

In [None]:
df_simulations

In [None]:
df_res = pd.merge(df_simulations, df_measurements, how="left", left_on=["Date"], right_on=["sampling_timepoint"], suffixes=("_sim", "_meas"))

In [None]:
df_res = df_res.loc[df_res.value_meas.notna(), ["Location_sim", "Location_meas", "simulation_id", "time_in_minutes", "value_meas", "value_sim"]]

In [None]:
df_res = df_res.loc[df_res.value_sim!=0]

In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

meas = df_res['value_meas'].values
sim  = df_res['value_sim'].values

def neg_log_likelihood(params, meas, sim):
    """
    Negative log-likelihood for:
      meas_i ~ LogNormal(log(k*sim_i), sigma^2)
    params = [ln_k, ln_sigma]
    """
    ln_k, ln_sigma = params
    sigma = np.exp(ln_sigma)
    mu = ln_k + np.log(sim)
    # drop the -ln(x) term since it doesn't depend on k or sigma
    resid = (np.log(meas) - mu)
    # NLL up to an additive constant:
    return 0.5 * np.sum((resid/sigma)**2 + 2*ln_sigma)

# initial guesses: use simple log‐ratio MLE for ln_k, and its std for ln_sigma
initial_ln_k = 1.0
initial_ln_sigma = np.log(np.log(meas/sim).std(ddof=0))
x0 = np.array([initial_ln_k, initial_ln_sigma])

# enforce sigma>0 but ln_sigma free; k>0 automatically via ln_k
res = minimize(neg_log_likelihood, x0,
               args=(meas, sim),
               bounds=[(None, None), (None, None)])

ln_k_opt, ln_sigma_opt = res.x
k_opt = np.exp(ln_k_opt)
sigma_opt = np.exp(ln_sigma_opt)

print(f"  Estimated scaling k = {k_opt:.6f}")
print(f"Estimated log‐space σ = {sigma_opt:.6f}")


## Apply scaling

In [None]:
for setting in ["decay_Rain", "no_decay_noRain", "no_decay_Rain"]:
    result_path = f"preprocessed_data/pop8"
    df_simulations = pd.read_csv(f"{result_path}/substances/{setting}_output.csv")
    df_simulations.value = df_simulations.value * k_opt
    df_simulations.to_csv(f"{result_path}/substances/{setting}_output_scaled.csv", index=False)


result_path = f"preprocessed_data/pop8_local"
setting = "decay_Rain"
df_simulations = pd.read_csv(f"{result_path}/substances/{setting}_output.csv")
df_simulations.value = df_simulations.value * k_opt
df_simulations.to_csv(f"{result_path}/substances/{setting}_output_scaled.csv", index=False)

In [None]:

result_path = f"preprocessed_data/pop8_local57"
setting = "decay_Rain"
df_simulations = pd.read_csv(f"{result_path}/substances/{setting}_output.csv")
df_simulations.value = df_simulations.value * k_opt
df_simulations.to_csv(f"{result_path}/substances/{setting}_output_scaled.csv", index=False)