In [2]:
import pandas as pd
import numpy as np
import os
from glob import glob
from matplotlib import pyplot as plt

Look at a single outcome: cigarette smoking rate in general population in 2031 in ban option 1

In [3]:
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2023-03-29_14-16-04-931491"
output_dir = os.path.join(base_dir, "outputs")
outputs_dirs = [os.path.join(output_dir, f"option_{i}") for i in range(6)]

In [4]:
UA_data = np.zeros((25,25,100))

for opt in [1]:
    outputs = outputs_dirs[opt]

    # for each arr, store a 2D array in the list
    # axis = 0 are the groups: menthol, nonmenthol, smoker, ecig/dual, former, never (3, 4, 3+4, 5, 2, 1)
    # axis = 1 are the years 2016-2031
    for i,f in enumerate(sorted(glob(outputs + "/*.npy"))):
        arr = np.load(f)
        f = os.path.basename(f)
        mort = int(f[5:7])
        pop = int(f[12:14])
        ban = int(f[25:27])
        arr = arr[16] # get the years we are interested in
        arr = np.sum(arr, axis=(0,1)) # dont need demographics
        arr = arr[:-1] # don't need dead people
        UA_data[mort, pop, ban] = (arr[2] + arr[3] ) / np.sum(arr)


In [8]:
print("Outcome: cigarette smoking rate in 2031, base ban scenario")
print("mean:", np.mean(UA_data))
print("std dev:", np.std(UA_data))
print("std dev accross mortality params", np.mean(np.std(UA_data, axis=0)))
print("std dev accross initial pops    ", np.mean(np.std(UA_data, axis=1)))
print("std dev accross ban params      ", np.mean(np.std(UA_data, axis=2)))

Outcome: cigarette smoking rate in 2031, base ban scenario
mean: 0.061123933299768674
std dev: 0.0014057088188369185
std dev accross mortality params 0.001334809645910865
std dev accross initial pops     0.0013370173139520588
std dev accross ban params       0.0013936917498340367


Looking at variance due to sampling randomness

In [6]:
output_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data_4-25-2023_longleaf_partial/option_0"

UA_data = np.zeros((25,25,100))

# for each arr, store a 2D array in the list
# axis = 0 are the groups: menthol, nonmenthol, smoker, ecig/dual, former, never (3, 4, 3+4, 5, 2, 1)
# axis = 1 are the years 2016-2031
for i,f in enumerate(sorted(glob(output_dir + "/*.npy"))):
    arr = np.load(f)
    f = os.path.basename(f)
    mort = int(f[5:7])
    pop = int(f[12:14])
    ban = int(f[25:27])
    arr = arr[16] # get the years we are interested in
    arr = np.sum(arr, axis=(0,1,2)) # dont need demographics
    arr = arr[:-1] # don't need dead people
    UA_data[mort, pop, ban] = (arr[2] + arr[3] ) / np.sum(arr)

In [7]:

print("Outcome: cigarette smoking rate in 2031, SQ scenario")
print("mean:", np.mean(UA_data))
print("std dev:", np.std(UA_data))
print("std dev accross mortality params", np.mean(np.std(UA_data, axis=0)))
print("std dev accross initial pops    ", np.mean(np.std(UA_data, axis=1)))
print("*SQ scenario has no change due to ban params*")
print("std dev accross ban params      ", np.mean(np.std(UA_data, axis=2)))

Outcome: cigarette smoking rate in 2031, base ban scenario
mean: 0.09019083698926202
std dev: 0.0015686805736826843
std dev accross mortality params 0.001518239498948974
std dev accross initial pops     0.001520077738738574
*SQ scenario has no change due to ban params*
std dev accross ban params       0.001554227025282794
