In [1]:
import pandas as pd
import numpy as np

In [2]:
initPrev = pd.read_csv("../data/initialPrevalence.csv") # read the necessary data files
flow_rates = pd.read_csv("../data/flow_rates.csv")
flow_apc = pd.read_csv("../data/flow_apc.csv")

In [3]:
flow_apc = flow_apc[flow_rates.columns] # make sure the columns are in the same order

In [4]:
prevMale = initPrev[initPrev['sex'] == 'male'].reset_index(drop=True) # filter only the male data from the datasets
ratesMale = flow_rates[flow_rates['sex'] == 'male'].reset_index(drop=True)
apcMale = flow_apc[flow_apc['sex'] == 'male'].reset_index(drop=True)

modelCols = prevMale.columns # get the columns of the dataframes

# select data from X where the sex column in X has the value male

In [5]:
rates = np.array(ratesMale.iloc[:, 2:]) # convert the data to numpy arrays
apc = np.array(apcMale.iloc[:, 2:])

In [6]:
np.shape(rates), np.shape(apc) # check the shapes of the arrays

((110, 21), (110, 21))

In [7]:
t = 2023

In [8]:
flows = ['n_s', 's_rs', 'n_v', 'v_rv', 'rs_dead', 'rv_dead',
       's_dead', 'sv_dead', 'v_dead', 'n_dead', 'v_sv', 's_sv', 'sv_s',
       'sv_vrs', 'sv_rs', 'v_s', 's_vrs', 'vrs_sv', 'vrs_s', 'vrs_rv',
       'vrs_dead']

x = set(flow.split('_')[0] for flow in flows)
states = set(flow.split('_')[1] for flow in flows)

In [9]:
r = rates * (1 + apc)**(t - 2021)
r = pd.DataFrame(r, columns=flows)

In [10]:
flowsLookup = {}

for i in x:
    flowsLookup[i] = [flow for flow in flows if flow.split('_')[0] == i]

In [11]:
flowsLookup

{'v': ['v_rv', 'v_dead', 'v_sv', 'v_s'],
 'rv': ['rv_dead'],
 's': ['s_rs', 's_dead', 's_sv', 's_vrs'],
 'n': ['n_s', 'n_v', 'n_dead'],
 'rs': ['rs_dead'],
 'sv': ['sv_dead', 'sv_s', 'sv_vrs', 'sv_rs'],
 'vrs': ['vrs_sv', 'vrs_s', 'vrs_rv', 'vrs_dead']}

In [12]:
for statex in x:
    r[f"f_{statex}"] = r[flowsLookup[statex]].sum(axis=1)

In [13]:
for flow in flows:
        statex = flow.split('_')[0]
        statey = flow.split('_')[1]
        r[f"p_{statex}_{statey}"] = (1 - np.exp( - r[f"f_{statex}"]/4)) * r[f"{statex}_{statey}"]/r[f"f_{statex}"]

In [14]:
ps = [col for col in r.columns if col.startswith('p_')]

In [15]:
print(ps)

['p_n_s', 'p_s_rs', 'p_n_v', 'p_v_rv', 'p_rs_dead', 'p_rv_dead', 'p_s_dead', 'p_sv_dead', 'p_v_dead', 'p_n_dead', 'p_v_sv', 'p_s_sv', 'p_sv_s', 'p_sv_vrs', 'p_sv_rs', 'p_v_s', 'p_s_vrs', 'p_vrs_sv', 'p_vrs_s', 'p_vrs_rv', 'p_vrs_dead']


In [16]:
for state in x:
    r[f"p_from_{state}"] = r[[col for col in ps if col.endswith(f"_{state}")]].sum(axis=1)

In [17]:
for state in states:
    if state != 'dead':
        prevMale[f"out_{state}"] = r[f"p_from_{state}"] * prevMale[state]
    else:
        prevMale[f"out_{state}"] = 0
        
    prevMale[f"in_{state}"] = 0
    for statex in x:
        if statex != state:
            if f"p_{statex}_{state}" in r.columns:
                prevMale[f"in_{state}"] += r[f"p_{statex}_{state}"] * prevMale[statex]

    prevMale[state] += prevMale[f"in_{state}"] - prevMale[f"out_{state}"]

prevMale = prevMale[modelCols]

In [18]:
prevMale["smoking"] = (prevMale["s"] + prevMale["sv"])/(1 - prevMale["dead"])
prevMale["vaping"] = (prevMale["v"] + prevMale["sv"] + prevMale["vrs"])/(1 - prevMale["dead"])

In [19]:
output = prevMale[["age", "sex", "smoking", "vaping"]]

In [20]:
output

Unnamed: 0,age,sex,smoking,vaping
0,0,male,0.000000,0.000000
1,1,male,0.000000,0.000000
2,2,male,0.000000,0.000000
3,3,male,0.000000,0.000000
4,4,male,0.000000,0.000000
...,...,...,...,...
105,105,male,0.010995,0.000101
106,106,male,0.010666,0.000157
107,107,male,0.010338,0.000142
108,108,male,0.010021,0.000145
