In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd
from scipy.optimize import minimize
from scipy.integrate import odeint
from tqdm import tqdm
from scipy.sparse import csr_matrix
from joblib import Parallel, delayed
from scipy.optimize import basinhopping
import sys
from scipy.optimize import differential_evolution
from scipy.optimize import dual_annealing

In [3]:
with open("Proper London Station Borough Proportions Dict.pkl", 'rb') as f:
    props_dict = pickle.load(f)

unique_boroughs = set(value[0] for value in props_dict.values())
num_unique_boroughs = len(unique_boroughs)
print(num_unique_boroughs)

25


In [3]:
def expand_array(short_array):
    long_array = np.zeros(63001)  # Create a zero array of the desired size

    # Assign values at the correct positions
    long_array[np.arange(251) * (251 + 1)] = short_array
    return long_array



# Load dictionary
with open("Proper_London_Patch_Populations_1_5km_fixed.pkl", "rb") as file:
    pop_dict = pickle.load(file)

# Extract values in order
short_pop_vector = np.array(list(pop_dict.values()))

pop_vector = expand_array(short_pop_vector)
    
with open("London Borough List.pkl", 'rb') as f:
    borough_list = pickle.load(f)
    
with open("London Station List.pkl", "rb") as file:
    station_list = pickle.load(file)
    
with open("London Station Borough List.pkl", 'rb') as f:
    station_borough_list = pickle.load(f)
    
with open("Proper London Station Borough Proportions Dict.pkl", 'rb') as f:
    props_dict = pickle.load(f)
    
covid_df = pd.read_csv("phe_cases_london_boroughs_fixed.csv")

borough_populations = pd.read_csv("london population 1801 to 2021_cleaned.csv")

# with open("Final Commuter Matrix Buenos Aires.pkl", "rb") as file:
    # commuter_array = pickle.load(file)
################################################################################################
################################################################################################
################################################################################################
################################################################################################
    
def get_timeseries(station_name,covid_df,props_dict):
    
    current_borough = props_dict[station_name][0]
    
    # Filter data for the given area_name "Lambeth" and within the maximum date range
    filtered_df = covid_df[
        (covid_df['date'] <= "2020-05-22") & 
        (covid_df['area_name'] == current_borough)
    ]

    # Find the earliest date where total_cases is nonzero
    # earliest_nonzero_date = filtered_df.loc[filtered_df['total_cases'] > 0, 'date'].min()
    earliest_nonzero_date = "2020-05-07"

    # Filter again using the adjusted start date
    filtered_df = filtered_df[
        (filtered_df['date'] >= earliest_nonzero_date) & 
        (filtered_df['date'] <= "2020-05-07")
    ]
    
    borough_timeseries = np.array(list(filtered_df['total_cases']))
    
    station_timeseries = props_dict[station_name][1]*borough_timeseries
    

    
    
    return station_timeseries



def get_timeseries_with_five(station_name, covid_df, props_dict):
    current_borough = props_dict[station_name][0]

    # Define the start and end dates
    end_date = "2020-03-14"
    earliest_nonzero_date = "2020-03-07"
    five_days_before = "2020-03-04"  # 5 days before the earliest nonzero date

    # Filter data for the given borough within the maximum date range
    filtered_df = covid_df[
        (covid_df['date'] >= earliest_nonzero_date) & 
        (covid_df['date'] <= end_date) & 
        (covid_df['area_name'] == current_borough)
    ]
    
    borough_timeseries = np.array(filtered_df['total_cases'])
    station_timeseries = props_dict[station_name][1] * borough_timeseries

    # Get new cases in the 5 days before the earliest nonzero date
    previous_df = covid_df[
        (covid_df['date'] >= five_days_before) & 
        (covid_df['date'] < earliest_nonzero_date) & 
        (covid_df['area_name'] == current_borough)
    ]
    
    total_new_cases_prev_5_days = props_dict[station_name][1]*previous_df['new_cases'].sum()

    return station_timeseries, total_new_cases_prev_5_days


################################################################################################
################################################################################################
################################################################################################
################################################################################################

all_station_timeseries = []
all_new_cases_prev_5_days = []
for i in range(len(station_list)):
    timeseries = get_timeseries_with_five(station_list[i],covid_df,props_dict)
    all_station_timeseries.append(timeseries[0])
    all_new_cases_prev_5_days.append(timeseries[1])
    
    
all_station_timeseries = np.array(all_station_timeseries)
day_range = all_station_timeseries[0].shape[0]
print(day_range)

def expand_timeseries(short_timeseries,timesteps=day_range):
    
    transposed = short_timeseries.T
    
    long_timeseries = np.zeros((timesteps,63001))
    for i in range(timesteps):
        long_timeseries[i] = expand_array(transposed[i])
        
    long_timeseries = np.array(long_timeseries)
    
    return long_timeseries

working_timeseries = expand_timeseries(all_station_timeseries)
short_pop_vector = np.array(list(pop_dict.values()))
pop_vector = expand_array(short_pop_vector)
day_list = np.arange(day_range)
data = np.column_stack((day_list[:, None], working_timeseries))


current_cases = np.array(all_new_cases_prev_5_days)
cum_cases = data[0][1:]
# Define main control panel functions

# with open("London new Nj.pkl", "rb") as file:
    # Nj = pickle.load(file)
    
with open("London Home Patch Indices.pkl", "rb") as file:
    home_patches = pickle.load(file)
    
# Load files
with open("Sparse London P Matrix.pkl", "rb") as file:
    sparse_p = pickle.load(file)

p_matrix = sparse_p.toarray()
Nj = pop_vector @ p_matrix

with open("London Unique Patches.pkl", 'rb') as f:
    valid_patches = pickle.load(f)
    
with open("London Valid Patch Indices.pkl", 'rb') as f:
    valid_patch_indices = pickle.load(f)


np.set_printoptions(suppress=True)

travel_patches = []
for i in valid_patch_indices:
    if i not in home_patches:
        travel_patches.append(i)


8


In [18]:
# OLD
# Define function to process solution
def process_solution(solution,timesteps):
    # Grouping by station subpopulation
    S_sol = solution[:63001]
    I_sol = solution[63001:126002]
    I_sol_total = solution[126002:]

    return [S_sol,I_sol,I_sol_total]



def run_model_London(params):
    S0,I0,I_total0,beta0,gamma,timesteps = params
    
    beta = expand_concise_betas(home_patches,travel_patches,beta0)
    # beta = combine_betas(beta0)
    # beta = expand_detailed_beta(beta0)
    
    # Ensure no dividing by zero
    Nj[Nj == 0] = 1
    
    # Set up initial y0 vector
    y0 = np.concatenate((S0,I0,I_total0))
    
    # beta_p_matrix = (beta*p_matrix.T).T
    # beta_p_matrix = beta[:, None] * p_matrix 
    beta_p_matrix = sparse_p.multiply(beta[:, None])
    
    t = np.linspace(0,timesteps-1,num=timesteps)
    
    
    
    def sir(y,timestep):
        S = y[:63001]
        I = y[63001:126002]
        I_total = y[126002:]

        dSdt = np.zeros_like(S)
        dIdt = np.zeros_like(I)
        dI_totaldt = np.zeros_like(I_total)

        Ij = p_matrix.T @ I
        
        
        method = "vector"
        
        if method == "vector":

            # Compute Ij_div_Nj efficiently
            Ij_div_Nj = Ij / Nj  # Shape (63001,)
        
            # Vectorized computation of infection terms using sparse matrix multiplication
            infection_terms = beta_p_matrix.multiply(S[:, None])  # (63001, 63001)
            infection_terms = infection_terms @ Ij_div_Nj  # (63001,)
        
            # Update derivatives
            dSdt -= infection_terms
            dIdt += infection_terms
            

        else:
            leg_infections = np.zeros(len(valid_patch_indices))
            # Essentially, for all i, all j, dSdt[i] += -beta[j]*p_matrix[i][j]*S[i]*Ij[j]/Nj[j]
            for i in valid_patch_indices:
                for j in valid_patch_indices:
                    dSdt[i] += -beta[j]*p_matrix[i][j]*S[i]*Ij[j]/Nj[j]
                    leg_infections[j] += -beta[j]*p_matrix[i][j]*S[i]*Ij[j]/Nj[j]
            dIdt -= dSdt
    
        dI_totaldt += dIdt

        # Add the gamma terms
        dIdt -= gamma * I
        # dSdt += gamma * I

        # Concatenate results
        dx = np.concatenate((dSdt, dIdt, dI_totaldt))

        return dx
    
    y_log = np.zeros((len(t)+1,189003))
    y_log[0] = y0.copy()

    for step in range(len(t)):
        results = sir(y_log[step],step)
        y_log[step+1] = y_log[step] + results
    
    solution = process_solution(y_log.T,timesteps)
    
    return solution
    
    
def expand_timeseries(short_timeseries):
    
    transposed = short_timeseries.T
    
    long_timeseries = np.zeros((day_range,63001))
    for i in range(day_range):
        long_timeseries[i] = expand_array(transposed[i])
        
    long_timeseries = np.array(long_timeseries)
    
    return long_timeseries

def expand_array(short_array):
    long_array = np.zeros(63001)  # Create a zero array of the desired size

    # Assign values at the correct positions
    long_array[np.arange(251) * (251 + 1)] = short_array
    return long_array



def compress_timeseries(long_timeseries,timesteps=day_range):
    compressed = np.zeros((timesteps, 251))  # Adjusted to match the original short array shape
    
    for i in range(timesteps):
        compressed[i] = compress_array(long_timeseries[i])
        
    return compressed.T  # Transpose back to match original input shape

def compress_array(long_array):
    return long_array[np.arange(251) * (251 + 1)]  # Extract values from the expanded positions


def expand_betas(short_betas,long_length):
    
    long_betas = np.zeros(long_length)
    index = 0
    for i in valid_patch_indices:
        long_betas[i] = short_betas[index]
        index += 1
    
    return long_betas



In [20]:
import numpy as np
import pickle
from scipy.optimize import minimize
from scipy.sparse import csr_matrix
from scipy.optimize import differential_evolution

def expand_concise_betas(home_patches,travel_patches,concise_beta):
    long_beta = np.zeros(63001)
    for i in range(251):
        long_beta[home_patches[i]] = concise_beta[i]
    long_beta[travel_patches] = concise_beta[-1]
    
    return long_beta

# Global variable to track the best beta and error
best_beta = None
best_error = float('inf')
with open("8 Day Distributed Updated Formalized London Beta.pkl", "rb") as file:
    best_beta_yet_0 = pickle.load(file)

# best_beta_yet_0 = np.random.uniform(0.1, 0.9, size=252)
print(best_beta_yet_0.shape)
    
def combine_betas(travel_betas):
    combined_beta = np.zeros(63001)
    combined_beta[travel_patches] = travel_betas
    combined_beta[home_patches] = shortened_best_beta[:-1]
    return combined_beta

# best_beta_yet_0 = np.ones(586)*0.41933532
# best_beta_yet_0 = make_detailed_beta(best_beta_yet_0)
    
def error_function(beta, data, pop_dict):
    global best_beta, best_error
    
    short_pop_vector = np.array(list(pop_dict.values()))
    pop_vector = np.zeros(63001)
    pop_vector[np.arange(251) * (251 + 1)] = short_pop_vector

    I0 = expand_array(current_cases)
    S0 = pop_vector - I0
    I_total_0 = cum_cases
    
    gamma = 1/5

    params = [S0, I0, I_total_0, beta, gamma, int(data[-1, 0])]
    y = run_model_London(params)[2]

    I_sum_model = compress_timeseries(y.T)
    raw_real_data = compress_timeseries(data[:, 1:])  

    normalizer = compress_array(pop_vector)[:, np.newaxis]  # Ensure correct shape

    # Apply normalization to both real data and model output
    real_data = raw_real_data / normalizer
    model_data = I_sum_model / normalizer
    # real_data = raw_real_data
    # model_data = I_sum_model

    # Ensure shapes match before computing error
    if real_data.shape != model_data.shape:
        raise ValueError(f"Shape mismatch: real_data {real_data.shape} vs model_data {model_data.shape}")

    # Exclude index 143 from error calculation
    error_matrix = (model_data - real_data) ** 2
    # error_matrix[143] = 0  # Exclude index 143

    squared_error = np.sum(error_matrix)
    # squared_error = np.sum((model_data - real_data) ** 2)
    # Regularization with Elastic Net
    l1_penalty = 1e-5 * (1 + np.std(beta))
    l2_penalty = 1e-5 * (1 + np.mean(beta)**2)

    # l1_penalty = 1e-4 * np.sum(np.abs(beta - np.mean(beta)))  # L1 penalty
    # l2_penalty = 1e-4 * np.sum((beta - np.mean(beta)) ** 2)  # L2 penalty
    total_error = squared_error + l1_penalty + l2_penalty
    
    # Save best beta
    if total_error < best_error:
        best_error = total_error
        best_beta = beta.copy()
        
        with open("8 Day Distributed Updated Formalized London Beta.pkl", "wb") as f:
            pickle.dump(best_beta, f)
        print(f"New best beta found. Error: {best_error:.12f}")

    return total_error


# Optimized fitting function
def sir_simulation_fit_class(full_timeseries, pop_dict):
    working_timeseries = expand_timeseries(full_timeseries)
    
    day_list = np.arange(day_range)
    data = np.column_stack((day_list[:, None], working_timeseries))

    
    initial_guess = best_beta_yet_0.copy()
    beta_bounds = [(0.1, 0.9)] * 252

    result = minimize(error_function, initial_guess, args=(data, pop_dict), method='Powell', bounds=beta_bounds)

    print('Estimated beta:', result.x)
    print('Final minimized error:', result.fun)

    # Save final optimized beta
    with open("8 Day Distributed Updated Final Formalized London Beta.pkl", "wb") as f:
        pickle.dump(result.x, f)

    return result.x

# Run fitting
sir_simulation_fit_class(all_station_timeseries, pop_dict)


(252,)
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097781
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097780
New best beta found. Error: 0.000025097779
New best beta found. Error: 0.000025097779
New best beta found. Error: 0.000025097779
New best beta found. Error: 0.000025097779
New 

New best beta found. Error: 0.000025097733
New best beta found. Error: 0.000025097733
New best beta found. Error: 0.000025097733
New best beta found. Error: 0.000025097733
New best beta found. Error: 0.000025097733
New best beta found. Error: 0.000025097732
New best beta found. Error: 0.000025097732
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097722
New best beta found. Error: 0.000025097721
New best beta found. Error: 0.000025097721
New best be

New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097704
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best beta found. Error: 0.000025097703
New best be

array([0.55142297, 0.59011634, 0.52701153, 0.57979314, 0.58705295,
       0.58795187, 0.55289585, 0.58169712, 0.60397374, 0.6175986 ,
       0.5845326 , 0.60897624, 0.54132858, 0.5233441 , 0.58667189,
       0.60909123, 0.54070521, 0.59737073, 0.65862766, 0.55131999,
       0.58862502, 0.51378869, 0.59803472, 0.63170134, 0.54802019,
       0.57854905, 0.53028056, 0.53674065, 0.64855697, 0.52310398,
       0.54416588, 0.57914195, 0.58649361, 0.65394254, 0.56340519,
       0.5361923 , 0.58558861, 0.57922775, 0.59260442, 0.59938963,
       0.58637112, 0.52274305, 0.54685816, 0.62526365, 0.61591957,
       0.62484595, 0.54905127, 0.54920012, 0.60833407, 0.59579206,
       0.52865189, 0.5397532 , 0.54066299, 0.58317697, 0.55679684,
       0.54422591, 0.67232731, 0.58716305, 0.55254169, 0.54525009,
       0.62126248, 0.53225115, 0.5456023 , 0.61081496, 0.60789808,
       0.63471462, 0.51444755, 0.58525974, 0.57276924, 0.56629218,
       0.52292895, 0.56116253, 0.54678695, 0.58209732, 0.58448