In [35]:
import pandas as pd
import numpy as np
import os
from glob import glob

from tabulate import tabulate
printtab = lambda x : print(tabulate(x, headers='firstrow'))

Need to get the prevalences of the smoking groups in the years 2016 (start), 2021, 2026, 2031, 2051


With 95% confidence intervals

In [36]:
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2022-11-29_12-24-05-273292"

outputs = os.path.join(base_dir, "outputs_numpy")

In [37]:
collection_list = []

# for each arr, store a 2D array in the list
# axis = 0 are the groups: menthol, nonmenthol, ecig/dual, former, never (3, 4, 5, 2, 1)
# axis = 1 are the years 2016, 2021, 2026, 2031, 2051
for f in sorted(glob(outputs + "/*.npy")):
    arr = np.load(f)
    arr = arr[[0, 5, 10, 15, 35]] # get the years we are interested in
    arr = np.sum(arr, axis=(1,2)) # dont care about demographics
    arr = arr[:,:-1] # don't need dead people
    sums = np.sum(arr, axis=1) # total count for each year
    arr = arr / sums[:,np.newaxis] # get proportions
    arr = arr.T # transpose so we have (smoking groups, years) as axes
    arr = arr[[2,3,4,1,0]] # re-order the smoking groups
    collection_list.append(arr)

# arr = collection_list[0]

# header = ["", "2016", "2021", "2026", "2031", "2051"]
# r1 = ["menthol"] + list(np.around(arr[0], decimals=3))
# r2 = ["nonmenthol"] + list(np.around(arr[1], decimals=3))
# r3 = ["ecig/dual"] + list(np.around(arr[2], decimals=3))
# r4 = ["former"] + list(np.around(arr[3], decimals=3))
# r5 = ["nonsmoker"] + list(np.around(arr[4], decimals=3))

# tab = [header, r1, r2, r3, r4, r5]
# printtab(tab)

    


In [38]:
# analyze collection_list and get 95% confidence intervals
collection_list = np.array(collection_list)

mean_results = np.zeros_like(collection_list[0])
upper_bound = np.zeros_like(collection_list[0])
lower_bound = np.zeros_like(collection_list[0])

for i in range(collection_list.shape[1]):
    for j in range(collection_list.shape[2]):
        mean = np.mean(collection_list[:,i,j])
        upper = np.percentile(collection_list[:,i,j], 97.5)
        lower = np.percentile(collection_list[:,i,j], 2.5)

        mean_results[i,j] = mean
        upper_bound[i,j] = upper
        lower_bound[i,j] = lower

# do a table

mean_results = np.around(mean_results * 100, decimals=1)
upper_bound = np.around(upper_bound * 100, decimals=1)
lower_bound = np.around(lower_bound * 100, decimals=1)

header = ["", "2016", "2021", "2026", "2031", "% Change", "2051"]
r1 = ["menthol"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[0], upper_bound[0], lower_bound[0]
)]
r2 = ["nonmenthol"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[1], upper_bound[1], lower_bound[1]
)]
r3 = ["ecig/dual"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[2], upper_bound[2], lower_bound[2]
)]
r4 = ["former"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[3], upper_bound[3], lower_bound[3]
)]
r5 = ["nonsmoker"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[4], upper_bound[4], lower_bound[4]
)]

# put in % changes
rows = [r1, r2, r3, r4, r5]
for i, r in enumerate(rows):
    change = -(1 - mean_results[i, 4] / mean_results[i,1])
    upper = -(1 - upper_bound[i,4] / lower_bound[i,1])
    lower = -(1 - lower_bound[i,4] / upper_bound[i,1])

    change = np.around(change * 100, decimals=1)
    upper = np.around(upper * 100, decimals=1)
    lower = np.around(lower * 100, decimals=1)

    rows[i] = rows[i][:5] + [f"{change} ({lower}, {upper})"] + [rows[i][-1]]

tab = [header] + rows
print("Smoking Rates, Ban Scenario #1, with 95% Confidence Intervals")
printtab(tab)


Smoking Rates, Ban Scenario #1, with 95% Confidence Intervals
            2016                 2021                 2026                 2031                 % Change                 2051
----------  -------------------  -------------------  -------------------  -------------------  -----------------------  -------------------
menthol     5.8%, (5.7, 5.8)     5.8%, (5.6, 6.0)     0.1%, (0.1, 0.1)     0.1%, (0.1, 0.1)     -100.0 (-100.0, -100.0)  0.0%, (0.0, 0.0)
nonmenthol  9.4%, (9.3, 9.4)     5.9%, (5.6, 6.1)     6.1%, (5.9, 6.4)     5.2%, (4.9, 5.4)     -27.1 (-34.4, -19.6)     4.3%, (4.0, 4.5)
ecig/dual   3.7%, (3.6, 3.7)     1.8%, (1.7, 2.0)     1.9%, (1.8, 2.0)     1.6%, (1.5, 1.7)     -66.7 (-75.0, -58.8)     0.6%, (0.5, 0.7)
former      20.2%, (20.1, 20.4)  28.1%, (27.6, 28.5)  35.4%, (34.9, 35.9)  38.3%, (37.8, 38.8)  74.4 (68.4, 83.0)        49.0%, (48.0, 50.5)
nonsmoker   60.9%, (60.8, 61.1)  58.5%, (58.1, 58.8)  56.5%, (56.0, 56.9)  54.9%, (54.4, 55.3)  -21.2 (-24.0, -18.9)

Now get mortality results!

Want to get mortality in the years 2016, 2021, 2026, 2031, 2051

with percent change between 2031 and 2016

for full population, non-Hispanic Black, poverty, not poverty

In [39]:

collection_list = []

# for each arr, store a 2D array in the list
# axis = 0 are the groups: full, black, pov, not pov
# axis = 1 are the years 2016, 2021, 2026, 2031, 2051
for f in sorted(glob(outputs + "/*.npy")):
    arr = np.load(f)

    total_pop = np.sum(arr[-1])
    total_black = np.sum(arr[-1,1,:,:])
    total_pov = np.sum(arr[-1,:,1,:])
    total_nonpov = np.sum(arr[-1,:,0,:])

    arr = arr[[0, 5, 10, 15, 35]] # get the years we are interested in
    arr = arr [:, :, :, 5] # only care about dead people

    arr = np.concatenate([
        (np.sum(arr, axis=(1,2)) / total_pop)[:, np.newaxis], # full pop
        (np.sum(arr[:,1,:], axis=1) / total_black)[:, np.newaxis], # black
        (np.sum(arr[:,:,1], axis=1) / total_pov)[:, np.newaxis], # pov
        (np.sum(arr[:,:,0], axis=1) / total_nonpov)[:, np.newaxis], # not pov
    ], axis=1)

    arr = arr.T

    collection_list.append(arr)

In [40]:

# analyze collection_list and get 95% confidence intervals
collection_list = np.array(collection_list)

mean_results = np.zeros_like(collection_list[0])
upper_bound = np.zeros_like(collection_list[0])
lower_bound = np.zeros_like(collection_list[0])

for i in range(collection_list.shape[1]):
    for j in range(collection_list.shape[2]):
        mean = np.mean(collection_list[:,i,j])
        upper = np.percentile(collection_list[:,i,j], 97.5)
        lower = np.percentile(collection_list[:,i,j], 2.5)

        mean_results[i,j] = mean
        upper_bound[i,j] = upper
        lower_bound[i,j] = lower

# do a table

mean_results = np.around(mean_results * 100, decimals=1)
upper_bound = np.around(upper_bound * 100, decimals=1)
lower_bound = np.around(lower_bound * 100, decimals=1)

header = ["", "2016", "2021", "2026", "2031", "2051"]
r1 = ["full pop"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[0], upper_bound[0], lower_bound[0]
)]
r2 = ["black NH"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[1], upper_bound[1], lower_bound[1]
)]
r3 = ["poverty"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[2], upper_bound[2], lower_bound[2]
)]
r4 = ["not poverty"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
    mean_results[3], upper_bound[3], lower_bound[3]
)]

rows = [r1, r2, r3, r4]

tab = [header] + rows
print("Mortality, Ban Scenario #1, with 95% Confidence Intervals")
printtab(tab)

Mortality, Ban Scenario #1, with 95% Confidence Intervals
             2016              2021              2026              2031              2051
-----------  ----------------  ----------------  ----------------  ----------------  -------------------
full pop     0.5%, (0.4, 0.6)  1.9%, (1.7, 2.1)  3.8%, (3.4, 4.1)  6.2%, (5.7, 6.7)  28.1%, (26.0, 29.4)
black NH     0.5%, (0.3, 0.8)  1.9%, (1.4, 2.5)  3.7%, (3.0, 4.4)  6.0%, (5.1, 7.0)  27.2%, (24.8, 29.2)
poverty      0.5%, (0.3, 0.7)  1.9%, (1.5, 2.2)  3.6%, (3.1, 4.1)  5.9%, (5.2, 6.6)  26.0%, (23.7, 27.7)
not poverty  0.5%, (0.4, 0.7)  1.9%, (1.7, 2.2)  3.8%, (3.4, 4.2)  6.3%, (5.7, 6.9)  28.7%, (26.6, 30.0)
