# Dissertation Code - B151410

## Part 2 - Finding Transition Probabilities

This section of the code handles the derivation of the transition probabilities for the microsimulation model using the synthetic data generated in the previous section. Similar to the previous section, the code was published on Github and can be found at https://github.com/rohankumarPfE/B151410_Dissertation

This analysis is summarized and critically appraised in more detail in Section 4.2 - 'Spatial Microsimulation Model'. The results were reported in Section 6.2 - 'Microsimulation'

In [243]:
#importing relevant packages
import pandas as pd
import numpy as np

#reading synthetic data that was generated previously
data = pd.read_csv("C:\\Users\\s1969938\\Downloads\\Synthetic Data.csv")

In [244]:
#initial parameters
n_individuals = len(data.index)
states = {'susceptible', 'exposed', 'infected', 'recovered'}
n_states = len(states)

# making each column into a NumPy array that can be called later - easier to deal with
def dataframe_to_array(data):
    arrays={}
    for col in data.columns:
        arrays[col] = np.array(data[col])
    return arrays
data_dict = dataframe_to_array(data)

# some columns have both string and numeric values
columns_to_retype = ['HrWkUS', 'LeisNum1', 'LeisNum2', 'LeisNum5', 'LeisNum6', 'LeisNum7', 'LeisNum8']

def convert_to_numeric(value):
    try:
        return int(value)
    except ValueError:
        return 0
    
for i in columns_to_retype:
    data[i] = data[i].apply(convert_to_numeric)

In [245]:
# rate of covid if no extra risk factors
covid_rate = 0.01

# creating behavioural probability function by processing the dataframe
def prob_multiplier_work(data_dict):
    
    # initializing lists to hold the individual prob multipliers
    wrking_multiplier = []
    stat_multiplier = []
    dsic_multiplier = []
    HrWkUS_multiplier = []
    total_work_multiplier = [] # list to hold all-inclusive multiplier
    
    # iterating through the first four columns in the dataframe (columns related to work patterns)
    for col_name in data.columns[1:5]:
        col_data = data[col_name]
        
        # for each of the four work pattern characteristics, the function creates a multiplier if the person has a particular characteristic
        # these multipliers (can also be interpreted as odds ratios) are sourced from literature and are in Appendix A
        if col_name == "Wrking":
            for value in col_data:
                if value == "No":
                    wrking_multiplier.append(0)
                elif value == "Item not applicable":
                    wrking_multiplier.append(0)
                else:
                    wrking_multiplier.append(1)
        
        if col_name == "Stat":
            for value in col_data:
                if value == "Self-employed":
                    stat_multiplier.append(0.5)
                else: 
                    stat_multiplier.append(1)
        
        if col_name == "dsic":
            for value in col_data:
                if value == "Public admin, education and health":
                    dsic_multiplier.append(1.7)
                elif value == "Distribution, hotels and restaurants": 
                    dsic_multiplier.append(1.6)
                elif value == "Other services": 
                    dsic_multiplier.append(1.6)
                else:
                    dsic_multiplier.append(1)
                    
        if col_name == "HrWkUS":
            for value in col_data:
                if value > 40:
                    HrWkUS_multiplier.append(1*(value-40)/40)
                elif value < 40: 
                    HrWkUS_multiplier.append(1*(40-value)/40)
                else:
                    HrWkUS_multiplier.append(1)
                    
    for i in range(len(wrking_multiplier)):
        total_work_multiplier.append(wrking_multiplier[i]*stat_multiplier[i]*dsic_multiplier[i]*HrWkUS_multiplier[i])
    
    return total_work_multiplier

# second probability multiplier function - this one refers to household characteristics
def prob_multiplier_household(data_dict):
    household_multiplier = []
    for value in data['DVHsize']:
        if value == 1:
            household_multiplier.append(1)
        else:
            household_multiplier.append(0.88)
    
    return household_multiplier

# third probability multiplier function - this one refers to leisure behavior patterns
def prob_multiplier_leisure(data_dict):
    cinema=[]
    theatre=[]
    cultural=[]
    sport=[]
    eating=[]
    entertaining=[]
    total_leisure=[]
    
    for col_name in data.columns[0:20]:
        col_data = data[col_name]
        if col_name == "LeisNum1":
            for value in col_data:
                if value == 0:
                    cinema.append(1)
                else:
                    cinema.append(1.09*value/4)
        
        if col_name == "LeisNum2":
            for value in col_data:
                if value == 0:
                    theatre.append(1)
                else: 
                    theatre.append(1.09*value/4)
        
        if col_name == "LeisNum5":
            for value in col_data:
                if value == 0:
                    cultural.append(1)
                else: 
                    cultural.append(1.09*value/4)
                    
        if col_name == "LeisNum6":
            for value in col_data:
                if value == 0:
                    sport.append(1)
                else: 
                    sport.append(1.09*value/4)
        
        if col_name == "LeisNum7":
            for value in col_data:
                if value == 0:
                    eating.append(1)
                else: 
                    eating.append(1.28*value)
        
        if col_name == "LeisNum8":
            for value in col_data:
                if value == 0:
                    entertaining.append(1)
                else: 
                    entertaining.append(1.27*value)
                    
    for i in range(len(entertaining)):
        total_leisure.append(cinema[i]*theatre[i]*cultural[i]*sport[i]*eating[i]*entertaining[i])
    
    return total_leisure

# final behavioural multiplier - physical activities
def prob_multiplier_physical(data_dict):
    
    # initializing lists to hold the individual prob multipliers
    gym = []
    teamsport = []
    total_physical = [] # list to hold all-inclusive multiplier
    
    # iterating through the first four columns in the dataframe (columns related to work patterns)
    for col_name in data.columns[0:20]:
        col_data = data[col_name]
        
        # for each of the four work pattern characteristics, the function creates a multiplier if the person has a particular characteristic
        # these multipliers (can also be interpreted as odds ratios) are sourced from literature and are in Appendix A
        if col_name == "KeepFit":
            for value in col_data:
                if value == "Yes":
                    gym.append(1.06)
                else:
                    gym.append(1)
        
        if col_name == "TeamGame":
            for value in col_data:
                if value == "Yes":
                    teamsport.append(1.36)
                else: 
                    teamsport.append(1)
                    
    for i in range(len(teamsport)):
        total_physical.append(gym[i]*teamsport[i])
    
    return total_physical

work = np.array(prob_multiplier_work(data_dict))
household = np.array(prob_multiplier_household(data_dict))
leisure = np.array(prob_multiplier_leisure(data_dict))
physical = np.array(prob_multiplier_physical(data_dict))

covid_likelihood = []

for i in range(0,1000):
    covid_likelihood.append((work[i]+household[i]+leisure[i]+physical[i])*covid_rate)

All of the previous multipliers were in relation to behavioural characteristics from the synthetic version of the UK Time Use Survey - this means that they only impact the incidence of COVID-19, not the severity or mortality. The next block of code considers the impact of pre-existing health issues on the COVID experience, i.e. whether patients are symptomatic, asymptomatic, and whether they will recover. The remaining work uses a baseline of 40.50% of patients having asymptomatic COVID-19, this comes directly from the existing medical literature. 


In [246]:
asymptomatic_rate = 40.5
mortality_rate = 1

comorbidities = {
    'Cancer': 1.8,
    'Chronic kidney disease': 2,
    'Chronic liver disease': 2.1,
    'Chronic lung disease': 4.38,
    'Cystic fibrosis': 2.1,
    'Dementia': 2.1,
    'Diabetes': 2.34,
    'Disability': 2.1,
    'Heart conditions': 2.1,
    'HIV': 2.1,
    'Mental health': 1.5,
    'Obesity': 1.5,
    'Pregnancy': 1.2,
    'Sickle cell disease': 2.1,
    'Smoking': 1.98,
    'Solid organ or blood stem cell transplant': 2.1,
    'Stroke': 2.1,
    'Substance use disorder': 2.1
}

def initial_multi(data, comorbidities):
    # Create a list to store the updated values
    multiplier = []
    
    # Iterate through each row in the DataFrame
    for index, row in data.iterrows():
        # Iterate through each column in the row
        for column in data.columns[16::]:
            # Check if the value is 1
            if row[column] == 1:
                # Find the key in the dictionary with the same name as the column
                key = column
                # Append the value from the dictionary to the list
                multiplier.append([index, comorbidities.get(key)])
    
    return multiplier

def duplicates(data, multiplier):
    # Create a dictionary to store the data
    data_dict = {item[0]: item[1] for item in data}
    
    # Iterate through the multiplier list
    for item in multiplier:
        # Extract the first value from the item
        key = item[0]
        # Check if the key is present in the data dictionary
        if key in data_dict:
            # Multiply the second value of the item with the corresponding value in the data dictionary
            item[1] *= data_dict[key]
    
    # Create a dictionary to store the updated items with unique first values
    updated_dict = {}
    for item in multiplier:
        # Extract the first value from the item
        key = item[0]
        # Check if the key is already present in the updated_dict
        if key in updated_dict:
            # Multiply the second value of the item with the corresponding value in the updated_dict
            updated_dict[key] *= item[1]
        else:
            # If the key is not present, add the item to the updated_dict
            updated_dict[key] = item[1]
    
    # Create a list to store the updated items
    updated_multiplier = [[key, value] for key, value in updated_dict.items()]
    
    return updated_multiplier

def fill(lst):
    # Create a set to store the numbers already present in the list
    present_numbers = set()
    
    # Iterate through the sublists in the list
    for sublist in lst:
        # Extract the first number from the sublist
        num = sublist[0]
        # Add the number to the set of present numbers
        present_numbers.add(num)
    
    # Create a list to store the new sublists
    new_sublists = []
    
    # Iterate through all numbers between 0 and 999
    for num in range(999):
        # Check if the number is not present in the list
        if num not in present_numbers:
            # Create a new sublist with the number and 1 as the second item
            new_sublist = [num, 1]
            # Append the new sublist to the list of new sublists
            new_sublists.append(new_sublist)
    
    # Concatenate the original list and the list of new sublists
    updated_list = lst + new_sublists
    
    sorted_list = sorted(updated_list, key=lambda x: x[0])
    
    return sorted_list

multiplier = fill(duplicates(data, initial_multi(data,comorbidities)))

symptomatic = []
mortality = []

for i in multiplier:
    symptomatic.append(i[1]*int((100-asymptomatic_rate)))
    mortality.append(i[1]*int(mortality_rate))