In [15]:
import pandas as pd
import numpy as np
import os
from glob import glob

from tabulate import tabulate
printtab = lambda x : print(tabulate(x, headers='firstrow'))

Need to get the prevalences of the smoking groups in the years 2016 (start), 2021, 2026, 2031, 2051


With 95% confidence intervals

In [16]:
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2022-12-16_15-55-24-478281"
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2023-03-29_14-16-04-931491"
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2023-03-29_14-16-04-931491_updatedSep2023"
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2023-10-11_23-47-30-192455"
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2023-10-24_21-00-37-093241"
# base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2024-10-23_21-15-19-372466"
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2024-11-11_19-02-43-329056"
base_dir = "/Users/nick/Documents/Gillings_work/uncertainty_analysis_data/uncertainty_analysis_2024-12-11_07-52-16-945389"
output_dir = os.path.join(base_dir, "outputs")
outputs_dirs = [os.path.join(output_dir, f"option_{i}") for i in range(6)]

In [31]:
start_year = 2016
years = [2016,2024,2025,2026,2027,2030,2035,2055]
year_inds = list(np.array(years) - start_year)

[0, 8, 9, 10, 11, 14, 19, 39]


In [17]:
collection_list_options = []
SQ_2031_rates = [] # status quo 2031 rates in separate list for comparison

for opt in range(6):
    outputs = outputs_dirs[opt]
    if not os.path.exists(outputs):
        continue
    collection_list = []

    # for each arr, store a 2D array in the list
    # axis = 0 are the groups: menthol, nonmenthol, smoker, ecig/dual, former, never (3, 4, 3+4, 5, 2, 1)
    # axis = 1 are the years 2016, 2024, 2029, 2034, 2054
    for i,f in enumerate(sorted(glob(outputs + "/*.npy"))):
        arr = np.load(f)
        arr = arr[:,:,:,0,:] # age-restrict 18-64
        arr = arr[year_inds] # get the years we are interested in
        arr = np.sum(arr, axis=(1,2)) # dont care about demographics
        arr = arr[:,:-1] # don't need dead people
        sums = np.sum(arr, axis=1) # total count for each year
        arr = arr / sums[:,np.newaxis] # get proportions
        arr = arr.T # transpose so we have (smoking groups, years) as axes
        arr = np.concatenate([ # want to add the smokers together too
            arr[0:4],
            (arr[2] + arr[3])[np.newaxis, :],
            arr[4][np.newaxis, :],
        ], axis=0)
        arr = arr[[2,3,4,5,1,0]] # re-order the smoking groups
        # add change 2016-2031 column and change from SQ column
        if opt == 0:
            SQ_2031_rates.append(arr[:,3]) # store the 2031 rates since we are looking at SQ now
            arr = np.concatenate([
                arr[:,:4], # 2016, 2024, 2029, 2034
                (arr[:,3] - arr[:,0])[:,np.newaxis], # change 2031 - 2016
                np.zeros((len(arr), 1)), # change from SQ (zeros)
                arr[:,4][:,np.newaxis], #2054
            ],axis=1)
        else:
            arr = np.concatenate([
                arr[:,:4], # 2016, 2024, 2029, 2034
                (arr[:,3] - arr[:,0])[:,np.newaxis], # change 2034 - 2016
                ((arr[:,3] - SQ_2031_rates[i]) / SQ_2031_rates[i])[:,np.newaxis], # change from SQ
                arr[:,4][:,np.newaxis], #2054
            ],axis=1)
        collection_list.append(arr)

    collection_list_options.append(collection_list)


        


In [18]:
status_quo_prevalences = []
for opt in range(6):
    # analyze collection_list and get 95% confidence intervals
    if opt >= len(collection_list_options):
        continue
    collection_list = np.array(collection_list_options[opt])

    mean_results = np.zeros_like(collection_list[0])
    upper_bound = np.zeros_like(collection_list[0])
    lower_bound = np.zeros_like(collection_list[0])

    for i in range(collection_list.shape[1]):
        for j in range(collection_list.shape[2]):
            mean = np.mean(collection_list[:,i,j])
            upper = np.percentile(collection_list[:,i,j], 97.5)
            lower = np.percentile(collection_list[:,i,j], 2.5)

            mean_results[i,j] = mean
            upper_bound[i,j] = upper
            lower_bound[i,j] = lower

    # do a table

    mean_results = np.around(mean_results * 100, decimals=3)
    upper_bound = np.around(upper_bound * 100, decimals=3)
    lower_bound = np.around(lower_bound * 100, decimals=3)

    # change_in_prevalence = np.around(change_in_prevalence * 100, decimals=1)

    header = ["", "2016", "2024", "2029", "2034", "Change 2016-2034", "% Change from SQ 2034", "2054"]
    r1 = ["menthol"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[0], upper_bound[0], lower_bound[0]
    )]
    r2 = ["nonmenthol"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[1], upper_bound[1], lower_bound[1]
    )]
    r3 = ["menthol+nonmenthol"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[2], upper_bound[2], lower_bound[2]
    )]
    r4 = ["ecig/dual"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[3], upper_bound[3], lower_bound[3]
    )]
    r5 = ["former"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[4], upper_bound[4], lower_bound[4]
    )]
    r6 = ["nonsmoker"] + [f"{mean}%, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[5], upper_bound[5], lower_bound[5]
    )]

    rows = [r1, r2, r3, r4, r5, r6]

    tab = [header] + rows
    print(" ")
    print(" ")
    print(f"Smoking Rates, Ban Scenario #{opt}, with 95% Confidence Intervals")
    if opt == 0: print("** Status Quo Scenario **")
    printtab(tab)


 
 
Smoking Rates, Ban Scenario #0, with 95% Confidence Intervals
** Status Quo Scenario **
                    2016                       2024                       2029                       2034                       Change 2016-2034           % Change from SQ 2034    2054
------------------  -------------------------  -------------------------  -------------------------  -------------------------  -------------------------  -----------------------  -------------------------
menthol             5.741%, (5.733, 5.751)     5.749%, (5.499, 5.91)      5.349%, (5.238, 5.517)     5.157%, (4.957, 5.39)      -0.584%, (-0.787, -0.35)   0.0%, (0.0, 0.0)         4.652%, (4.53, 4.747)
nonmenthol          9.391%, (9.381, 9.399)     4.943%, (4.692, 5.152)     4.38%, (4.183, 4.611)      4.188%, (4.026, 4.351)     -5.203%, (-5.367, -5.04)   0.0%, (0.0, 0.0)         3.878%, (3.746, 4.073)
menthol+nonmenthol  15.132%, (15.132, 15.132)  10.692%, (10.394, 10.967)  9.728%, (9.493, 10.024)    9.344%, (9.

# Now get the same table in an easy CSV format

In [19]:
print("CSV values to be read into software")
print("All values in this table are percentages")
print("There are 5 ban scenarios each with a table here")
print("In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)")
print("SQ = Status Quo scenario")
print("All numbers are rounded to 5 decimal places which should be more than we want for the final table")
status_quo_prevalences = []
for opt in range(6):
    # analyze collection_list and get 95% confidence intervals
    if opt >= len(collection_list_options):
        continue
    collection_list = np.array(collection_list_options[opt])

    mean_results = np.zeros_like(collection_list[0])
    upper_bound = np.zeros_like(collection_list[0])
    lower_bound = np.zeros_like(collection_list[0])

    for i in range(collection_list.shape[1]):
        for j in range(collection_list.shape[2]):
            mean = np.mean(collection_list[:,i,j])
            upper = np.percentile(collection_list[:,i,j], 97.5)
            lower = np.percentile(collection_list[:,i,j], 2.5)

            mean_results[i,j] = mean
            upper_bound[i,j] = upper
            lower_bound[i,j] = lower


    # do a table

    mean_results = np.around(mean_results * 100, decimals=5)
    upper_bound = np.around(upper_bound * 100, decimals=5)
    lower_bound = np.around(lower_bound * 100, decimals=5)

    # change_in_prevalence = np.around(change_in_prevalence * 100, decimals=5)

    header = ["2016", "2024", "2026", "2034", "%Change2016-2034", "%ChangeFromSQ2034", "2054"]
    new_header = ["group,"]
    for e in header:
        new_header.append(e + "M,")
        new_header.append(e + "LB,")
        new_header.append(e + "UB,") 
    
    r1 = ["menthol,"]
    r2 = ["nonmenthol,"]
    r3 = ["menthol+nonmenthol,"]
    r4 = ["ecig/dual,"]
    r5 = ["former,"]
    r6 = ["neversmoker,"]

    rows = [r1, r2, r3, r4, r5, r6]
    
    
    for i, r in enumerate(rows):
        for m, ub, lb in zip(mean_results[i], upper_bound[i], lower_bound[i]):
            rows[i] += [f"{m},", f"{lb},", f"{ub},"]

    tab = [new_header] + rows
    print(" ")
    print(" ")
    print(f"Smoking Rates, Ban Scenario #{opt}, with 95% Confidence Intervals")
    if opt == 0: print("** Status Quo Scenario **")
    print(tabulate(tab))

CSV values to be read into software
All values in this table are percentages
There are 5 ban scenarios each with a table here
In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)
SQ = Status Quo scenario
All numbers are rounded to 5 decimal places which should be more than we want for the final table
 
 
Smoking Rates, Ban Scenario #0, with 95% Confidence Intervals
** Status Quo Scenario **
-------------------  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ---------  ------------------  -------------------  -------------------  -------------------  --------------------  --------------------  ---------  ---------  ---------
group,               2016M,     2016LB,    2016UB,    2024M,     2024LB,    2024UB,    2026M,     2026LB,    2026UB,    2034M,     2034LB,    2034UB,    %Change2016-2034M,  %Change2016-2034LB,  %Change2016-2034UB,  %ChangeFromSQ2034M,  %ChangeFromSQ2034LB, 

# Now get mortality results!

Want to get mortality in the years 2016, 2021, 2026, 2031, 2051

with percent change between 2031 and 2016

for full population, non-Hispanic Black, poverty, not poverty

In [20]:

collection_list_options = []
total_pops_opt_group = []
SQ_mortality_2021_2031 = []

for opt in range(6):
    outputs = outputs_dirs[opt]
    collection_list = []
    this_total_pops = []

    # for each arr, store a 2D array in the list
    # axis = 0 are the groups: full, black, pov, not pov
    # axis = 1 are the years 2016, 2024, 2029, 2034, 2054
    # original dimensions are (year, black, pov, smoking group)
    # now, original dimensions are (year, black, pov, smoking group) # after we decided to add 65+ year olds separately
    for i,f in enumerate(sorted(glob(outputs + "/*.npy"))):
        arr = np.load(f)
        # arr = arr.sum(axis=3)
        arr = arr[:,:,:,0,:]

        # set the number of dead people in the first year (2016) to zero
        # and adjust all mortality afterward accordingly
        arr[:,:,:,5] -= arr[0,:,:,5].reshape((-1,2,2))

        total_pop = np.sum(arr[-1])
        total_black = np.sum(arr[-1,1,:,:])
        total_pov = np.sum(arr[-1,:,1,:])
        total_nonpov = np.sum(arr[-1,:,0,:])

        this_total_pops.append([total_pop, total_black, total_pov, total_nonpov])

        arr = arr[[0, 5, 10, 15, 35]] # get the years we are interested in
        arr = arr [:, :, :, 5] # only care about dead people

        arr = np.concatenate([
            (np.sum(arr, axis=(1,2)))[:, np.newaxis], # full pop
            (np.sum(arr[:,1,:], axis=1))[:, np.newaxis], # black
            (np.sum(arr[:,:,1], axis=1))[:, np.newaxis], # pov
            (np.sum(arr[:,:,0], axis=1))[:, np.newaxis], # not pov
        ], axis=1)

        arr = arr.T # now the dims are group, year

        # add the change from SQ column
        if opt == 0:
            SQ_mortality_2021_2031.append(arr[:,4] - arr[:,1])
            arr = np.concatenate([
                arr,
                np.zeros((len(arr), 1))
            ], axis=1)
        else:
            arr = np.concatenate([
                arr,
                ((arr[:,4] - arr[:,1]) - SQ_mortality_2021_2031[i])[:, np.newaxis],
            ], axis=1)
        
        # print(arr.shape)
        # assert False

        collection_list.append(arr)
    
    collection_list_options.append(collection_list)

    this_total_pops = np.array(this_total_pops)
    total_pops_opt_group.append(np.mean(this_total_pops, axis=0))

total_pops_opt_group = np.array(total_pops_opt_group)

In [21]:
print(total_pops_opt_group)
print(total_pops_opt_group.shape)

backup = np.copy(total_pops_opt_group)

[[2.24781040e+08 2.79957202e+07 2.49118533e+07 1.99869187e+08]
 [2.23843161e+08 2.76410032e+07 2.45856425e+07 1.99257518e+08]
 [2.23933053e+08 2.76779224e+07 2.45128134e+07 1.99420239e+08]
 [2.23917061e+08 2.76894108e+07 2.45544320e+07 1.99362629e+08]
 [2.23818189e+08 2.76759045e+07 2.45239955e+07 1.99294194e+08]
 [2.23719028e+08 2.76741944e+07 2.45556325e+07 1.99163395e+08]]
(6, 4)


In [22]:
SQ_2051_mort = None
for opt in range(6):
    
    # analyze collection_list and get 95% confidence intervals
    collection_list = np.array(collection_list_options[opt])

    mean_results = np.zeros_like(collection_list[0])
    upper_bound = np.zeros_like(collection_list[0])
    lower_bound = np.zeros_like(collection_list[0])

    for i in range(collection_list.shape[1]):
        for j in range(collection_list.shape[2]):
            mean = np.mean(collection_list[:,i,j])
            upper = np.percentile(collection_list[:,i,j], 97.5)
            lower = np.percentile(collection_list[:,i,j], 2.5)

            mean_results[i,j] = mean
            upper_bound[i,j] = upper
            lower_bound[i,j] = lower

    # do a table

    mean_results = np.around(mean_results / 100000, decimals=1)
    upper_bound = np.around(upper_bound / 100000, decimals=1)
    lower_bound = np.around(lower_bound / 100000, decimals=1)
    # change_cummort = np.around(change_cummort / 100000, decimals=1)
    total_pops = np.around(total_pops_opt_group[opt] / 100000, decimals=1)

    header = ["", "2016", "2024", "2029", "2034", "2054", "Change from SQ 2024-2054", "total living"]
    r1 = ["full pop"] + [f"{mean}, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[0], upper_bound[0], lower_bound[0]
    )]
    r2 = ["black NH"] + [f"{mean}, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[1], upper_bound[1], lower_bound[1]
    )]
    r3 = ["poverty"] + [f"{mean}, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[2], upper_bound[2], lower_bound[2]
    )]
    r4 = ["not poverty"] + [f"{mean}, ({lower_bound}, {upper_bound})" for mean, upper_bound, lower_bound in zip(
        mean_results[3], upper_bound[3], lower_bound[3]
    )]

    rows = [r1, r2, r3, r4]

    # put in total pop
    for i, r in enumerate(rows):
        rows[i] += [f"{total_pops[i]}"]

    tab = [header] + rows
    print("")
    print("")
    print(f"Cumulative Mortality (units of 100,000), Ban Scenario #{opt}, with 95% Confidence Intervals")
    if opt == 0: print("** Status Quo Scenario **")
    printtab(tab)



Cumulative Mortality (units of 100,000), Ban Scenario #0, with 95% Confidence Intervals
** Status Quo Scenario **
             2016             2024                2029                2034                2054                   Change from SQ 2024-2054      total living
-----------  ---------------  ------------------  ------------------  ------------------  ---------------------  --------------------------  --------------
full pop     0.0, (0.0, 0.0)  29.8, (26.3, 32.1)  59.7, (55.4, 64.1)  88.3, (82.1, 94.2)  201.2, (194.1, 208.6)  0.0, (0.0, 0.0)                     2247.8
black NH     0.0, (0.0, 0.0)  3.6, (2.9, 4.8)     7.3, (5.9, 8.9)     10.7, (9.4, 12.0)   24.5, (22.1, 26.7)     0.0, (0.0, 0.0)                      280
poverty      0.0, (0.0, 0.0)  6.7, (5.6, 8.3)     13.5, (11.9, 15.9)  20.6, (18.7, 23.0)  51.6, (46.9, 55.7)     0.0, (0.0, 0.0)                      249.1
not poverty  0.0, (0.0, 0.0)  23.1, (19.7, 25.5)  46.2, (40.7, 50.3)  67.6, (61.7, 73.0)  149.6, (143.4, 1

# Mortality Data in CSV form

In [23]:
print("CSV values to be read into software")
print("All values in this table are absolute counts")
print("There are 5 ban scenarios each with a table here")
print("In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)")
print("SQ = Status Quo scenario")
print("All numbers are rounded to 10 decimal places which should be more than we want for the final table")
SQ_2051_mort = None
for opt in range(6):
    
    # analyze collection_list and get 95% confidence intervals
    collection_list = np.array(collection_list_options[opt])

    mean_results = np.zeros_like(collection_list[0])
    upper_bound = np.zeros_like(collection_list[0])
    lower_bound = np.zeros_like(collection_list[0])

    for i in range(collection_list.shape[1]):
        for j in range(collection_list.shape[2]):
            mean = np.mean(collection_list[:,i,j])
            upper = np.percentile(collection_list[:,i,j], 97.5)
            lower = np.percentile(collection_list[:,i,j], 2.5)

            mean_results[i,j] = mean
            upper_bound[i,j] = upper
            lower_bound[i,j] = lower

    # # compute reduction in cumulative mortality
    # change_cummort = None
    # if opt == 0:
    #     SQ_2051_mort = np.concatenate([
    #         mean_results[:,-1][:,np.newaxis],
    #         upper_bound[:,-1][:,np.newaxis],
    #         lower_bound[:,-1][:,np.newaxis],
    #     ], axis=1)
    #     change_cummort = np.zeros_like(SQ_2051_mort)
    # else:
    #     change_cummort = np.concatenate([
    #         (mean_results[:,-1] - SQ_2051_mort[:,0])[:,np.newaxis],
    #         (upper_bound[:,-1] - SQ_2051_mort[:,2])[:,np.newaxis],
    #         (lower_bound[:,-1] - SQ_2051_mort[:,1])[:,np.newaxis],
    #     ], axis=1)

    # do a table

    mean_results = np.around(mean_results / 100000, decimals=10)
    upper_bound = np.around(upper_bound / 100000, decimals=10)
    lower_bound = np.around(lower_bound / 100000, decimals=10)
    # change_cummort = np.around(change_cummort / 100000, decimals=10)
    total_pops = np.around(total_pops_opt_group[opt] / 100000, decimals=10)

    header = ["2016", "2024", "2029", "2034", "2054", "ChangeFromSQ2024-2054"]
    new_header = ["group,"]
    for e in header:
        new_header.append(e + "M,")
        new_header.append(e + "LB,")
        new_header.append(e + "UB,") 
    new_header += ["totalLiving"]

    r1 = ["fullPop,"]
    r2 = ["BlackNH,"]
    r3 = ["Poverty,"]
    r4 = ["NotPoverty,"]

    rows = [r1, r2, r3, r4]

    for i, r in enumerate(rows):
        for m, ub, lb in zip(mean_results[i], upper_bound[i], lower_bound[i]):
            rows[i] += [f"{m},", f"{lb},", f"{ub},", ]

    # put in total pop
    for i, r in enumerate(rows):
        # rows[i] += [f"{change_cummort[i,0]},", f"{change_cummort[i,2]},",f"{change_cummort[i,1]},"]
        rows[i] += [f"{total_pops[i]},"]

    tab = [new_header] + rows
    print("")
    print("")
    print(f"Cumulative Mortality (units of 100,000), Ban Scenario #{opt}, with 95% Confidence Intervals")
    if opt == 0: print("** Status Quo Scenario **")
    print(tabulate(tab))

CSV values to be read into software
All values in this table are absolute counts
There are 5 ban scenarios each with a table here
In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)
SQ = Status Quo scenario
All numbers are rounded to 10 decimal places which should be more than we want for the final table


Cumulative Mortality (units of 100,000), Ban Scenario #0, with 95% Confidence Intervals
** Status Quo Scenario **
-----------  ------  -------  -------  --------------  --------------  --------------  --------------  --------------  --------------  --------------  --------------  --------------  ---------------  ---------------  ---------------  -----------------------  ------------------------  ------------------------  ----------------
group,       2016M,  2016LB,  2016UB,  2024M,          2024LB,         2024UB,         2029M,          2029LB,         2029UB,         2034M,          2034LB,         2034UB,         2054M,           2054LB,     

In [24]:
"""
For the paper


"""

'\nFor the paper\n\n\n'

# Validation (Status Quo only)

BRFSS validation: 2016, 2017, 2021

prevalence of never smoker, former smoker, smoker, ecig/dual

for total pop, male, female, black

In [25]:
# status quo
outputs = outputs_dirs[0]
collection_list = []

# for each arr, store a 2D array in the list
for f in sorted(glob(outputs + "/*.npy")):
    arr = np.load(f)
    arr = arr[[0, 1, 5,]] # get the years we are interested in
    arr = arr[:,:,:,:-1] # don't need dead people
    sums = np.sum(arr, axis=(1,2,3)) # total count for each year
    arr = arr / sums[:, np.newaxis, np.newaxis, np.newaxis] # get proportions
    collection_list.append(arr)

collection_list = np.array(collection_list)

In [26]:
# sample, year, black, pov, state
print(collection_list.shape)

(27, 3, 2, 2, 1, 6)


In [27]:
assert False
# as of 4/30/2024 the below cell is failing

AssertionError: 

In [None]:
USpop = collection_list[:,:,:,:,:].sum(axis=(2,3))
Bpop = collection_list[:,:,1,:,:].sum(axis=2)
print(USpop.shape)
print(USpop.shape)

# combine smokers

USpop[:,:,2] += USpop[:,:,3]
Bpop[:,:,2] += Bpop[:,:,3]

USpop = USpop[:,:,[0,1,2,4]]
Bpop = Bpop[:,:,[0,1,2,4]]

print(USpop.shape)
print(USpop.shape)

(125, 3, 1, 6)
(125, 3, 1, 6)


IndexError: index 2 is out of bounds for axis 2 with size 1

In [None]:
# black population denominator should not be whole population

if np.sum(Bpop, axis=2)[0,0] != 1:
    Bpop /= np.sum(Bpop, axis=2)[:,:,np.newaxis]

print(np.sum(Bpop, axis=2)[:5])

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
for pop, name in zip([USpop, Bpop], ["US Population", "Black NH Population"]):

    mean_results = np.zeros_like(pop[0])
    upper_bound = np.zeros_like(pop[0])
    lower_bound = np.zeros_like(pop[0])

    for i in range(pop.shape[1]):
        for j in range(pop.shape[2]):
            mean = np.mean(pop[:,i,j])
            upper = np.percentile(pop[:,i,j], 97.5)
            lower = np.percentile(pop[:,i,j], 2.5)

            mean_results[i,j] = mean
            upper_bound[i,j] = upper
            lower_bound[i,j] = lower
    
    # make a table

    mean_results = np.around(mean_results * 100, decimals=1)
    upper_bound = np.around(upper_bound * 100, decimals=1)
    lower_bound = np.around(lower_bound * 100, decimals=1)

    header = ["", "2016", "2017", "2021"]

    r1 = ["Never Smoker"]
    r2 = ["Former Smoker"]
    r3 = ["Cigarette Smoker"]
    r4 = ["Ecig/Dual"]

    rows = [r1, r2, r3, r4]

    for i, r in enumerate(rows):
        for m, ub, lb in zip(mean_results[:,i], upper_bound[:,i], lower_bound[:,i]):
            rows[i] += [f"{m}%, ({lb}, {ub})"]
    
    tab = [header] + rows
    print(" ")
    print(" ")
    print(f"Smoking Prevalences, Status Quo Scenario, {name}")
    printtab(tab)

    

IndexError: index 1 is out of bounds for axis 1 with size 1

# same but for CSV readability

In [None]:
print("CSV values to be read into software")
print("All values in this table are percentages")
print("Status Quo scenario only, to be compared to BRFSS for validation")
print("In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)")
print("All numbers are rounded to 5 decimal places which should be more than we want for the final table")
for pop, name in zip([USpop, Bpop], ["US Population", "Black NH Population"]):

    mean_results = np.zeros_like(pop[0])
    upper_bound = np.zeros_like(pop[0])
    lower_bound = np.zeros_like(pop[0])

    for i in range(pop.shape[1]):
        for j in range(pop.shape[2]):
            mean = np.mean(pop[:,i,j])
            upper = np.percentile(pop[:,i,j], 97.5)
            lower = np.percentile(pop[:,i,j], 2.5)

            mean_results[i,j] = mean
            upper_bound[i,j] = upper
            lower_bound[i,j] = lower
    
    # make a table

    mean_results = np.around(mean_results * 100, decimals=5)
    upper_bound = np.around(upper_bound * 100, decimals=5)
    lower_bound = np.around(lower_bound * 100, decimals=5)

    header = ["2016", "2017", "2021"]
    new_header = ["group,"]
    for e in header:
        new_header.append(e + "M,")
        new_header.append(e + "LB,")
        new_header.append(e + "UB,") 

    r1 = ["NeverSmoker,"]
    r2 = ["FormerSmoker,"]
    r3 = ["CigaretteSmoker,"]
    r4 = ["Ecig/Dual,"]

    rows = [r1, r2, r3, r4]

    for i, r in enumerate(rows):
        for m, ub, lb in zip(mean_results[:,i], upper_bound[:,i], lower_bound[:,i]):
            rows[i] += [f"{m},", f"{lb},", f"{ub},"]
    
    tab = [new_header] + rows
    print(" ")
    print(" ")
    print(f"Smoking Prevalences, Status Quo Scenario, {name}")
    print(tabulate(tab))

CSV values to be read into software
All values in this table are percentages
Status Quo scenario only, to be compared to BRFSS for validation
In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)
All numbers are rounded to 5 decimal places which should be more than we want for the final table


IndexError: index 1 is out of bounds for axis 1 with size 1

# NSDUH Validation

2016-2020

menthol cig use among smokers

US pop, black NH, In poverty


In [None]:
# status quo
outputs = outputs_dirs[0]
collection_list = []

# for each arr, store a 2D array in the list
for f in sorted(glob(outputs + "/*.npy")):
    arr = np.load(f)
    arr = arr[[0, 1, 2, 3, 4]] # get the years we are interested in
    arr = arr[:,:,:,:-1] # don't need dead people
    collection_list.append(arr)

collection_list = np.array(collection_list)

In [None]:
collection_list.shape

(125, 5, 2, 2, 1, 6)

In [None]:
USprops = collection_list[:,:,:,:,2].sum(axis=(2,3)) / collection_list[:,:,:,:,2:4].sum(axis=(2,3,4))
print(USprops.shape)
Bprops = collection_list[:,:,1,:,2].sum(axis=2) / collection_list[:,:,1,:,2:4].sum(axis=(2,3))
print(Bprops.shape)
Pprops = collection_list[:,:,:,1,2].sum(axis=2) / collection_list[:,:,:,1,2:4].sum(axis=(2,3))
print(Pprops.shape)


IndexError: index 2 is out of bounds for axis 4 with size 1

In [None]:
all_props = np.concatenate([
    USprops[:,np.newaxis,:],
    Bprops[:,np.newaxis,:],
    Pprops[:,np.newaxis,:],
], axis=1)
print(all_props.shape)

NameError: name 'USprops' is not defined

In [None]:
mean_results = np.zeros_like(all_props[0])
upper_bound = np.zeros_like(all_props[0])
lower_bound = np.zeros_like(all_props[0])

for i in range(all_props.shape[1]):
    for j in range(all_props.shape[2]):
        mean = np.mean(all_props[:,i,j])
        upper = np.percentile(all_props[:,i,j], 97.5)
        lower = np.percentile(all_props[:,i,j], 2.5)

        mean_results[i,j] = mean
        upper_bound[i,j] = upper
        lower_bound[i,j] = lower

# make a table

mean_results = np.around(mean_results * 100, decimals=1)
upper_bound = np.around(upper_bound * 100, decimals=1)
lower_bound = np.around(lower_bound * 100, decimals=1)

header = ["", "2016", "2017", "2018", "2019", "2020"]

r1 = ["US Population"]
r2 = ["Black NH"]
r3 = ["Poverty"]

rows = [r1, r2, r3,]

for i, r in enumerate(rows):
    for m, ub, lb in zip(mean_results[i], upper_bound[i], lower_bound[i]):
        rows[i] += [f"{m}%, ({lb} {ub})"]

tab = [header] + rows
print(" ")
print(" ")
print(f"Prevalence of Menthol Cigarette Smoking among Cigarette Smokers, Status Quo Scenario")
printtab(tab)

NameError: name 'all_props' is not defined

In [None]:
print("CSV values to be read into software")
print("All values in this table are percentages")
print("Status Quo scenario only, to be compared to NSDUH for validation")
print("In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)")
print("All numbers are rounded to 5 decimal places which should be more than we want for the final table")
mean_results = np.zeros_like(all_props[0])
upper_bound = np.zeros_like(all_props[0])
lower_bound = np.zeros_like(all_props[0])

for i in range(all_props.shape[1]):
    for j in range(all_props.shape[2]):
        mean = np.mean(all_props[:,i,j])
        upper = np.percentile(all_props[:,i,j], 97.5)
        lower = np.percentile(all_props[:,i,j], 2.5)

        mean_results[i,j] = mean
        upper_bound[i,j] = upper
        lower_bound[i,j] = lower

# make a table

mean_results = np.around(mean_results * 100, decimals=5)
upper_bound = np.around(upper_bound * 100, decimals=5)
lower_bound = np.around(lower_bound * 100, decimals=5)

header = ["2016", "2017", "2018", "2019", "2020"]
new_header = ["group,"]
for e in header:
    new_header.append(e + "M,")
    new_header.append(e + "LB,")
    new_header.append(e + "UB,") 

r1 = ["USPopulation,"]
r2 = ["BlackPopulation,"]
r3 = ["PovPopulation,"]

rows = [r1, r2, r3,]

for i, r in enumerate(rows):
    for m, ub, lb in zip(mean_results[i], upper_bound[i], lower_bound[i]):
        rows[i] += [f"{m},", f"{lb},", f"{ub},"]

tab = [new_header] + rows
print(" ")
print(" ")
print(f"Prevalence of Menthol Cigarette Smoking among Cigarette Smokers, Status Quo Scenario")
print(tabulate(tab))

CSV values to be read into software
All values in this table are percentages
Status Quo scenario only, to be compared to NSDUH for validation
In column titles, M = mean, LB = lower bound, UB = upper bound (95% confidence intervals)
All numbers are rounded to 5 decimal places which should be more than we want for the final table


NameError: name 'all_props' is not defined