In [14]:
"""
We have realized that we need to have a post-bad cohort of 18 yearolds to add to the simulation 
Before, we were adding cohorts of 18 year olds that were smoking at pre-ban levels. 

The new cohort will be determined by copying the old cohort and modifying it:
Anybody who is a current menthol smoker will have their current state changed:
60% ecig
20% nonmenthol smoker
20% never smokers

Then anybody who was a previous menthol smoker will have their state changed according to the same rule
"""

'\nWe have realized that we need to have a post-bad cohort of 18 yearolds to add to the simulation \nBefore, we were adding cohorts of 18 year olds that were smoking at pre-ban levels. \n\nThe new cohort will be determined by copying the old cohort and modifying it:\nAnybody who is a current menthol smoker will have their current state changed:\n60% ecig\n20% nonmenthol smoker\n20% never smokers\n\nThen anybody who was a previous menthol smoker will have their state changed according to the same rule\n'

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from simulation import Simulation

In [16]:
cohorts_18_dict = dict()
cohorts_18_dict[2015] = pd.read_excel(os.path.join("..", "..", "corrected_18yo_cohorts", "Wave 2 fresh population profile.xlsx")).to_numpy()
cohorts_18_dict[2016] = pd.read_excel(os.path.join("..", "..", "corrected_18yo_cohorts", "Wave 3 fresh population profile.xlsx")).to_numpy()
cohorts_18_dict[2017] = pd.read_excel(os.path.join("..", "..", "corrected_18yo_cohorts", "Wave 4 fresh population profile.xlsx")).to_numpy()

In [17]:
"""
current column indexing:

0 sex	
1 black	
2 state_3	
3 state_4	
4 initiation_age_grp	
5 poverty	
6 weighted	
7 count

We have 6 states in this simulation:
    1 -> never smoker
    2 -> former smoker
    3 -> menthol smoker
    4 -> nonmenthol smoker
    5 -> ecig
    6 -> dead
"""
cohort_arr = cohorts_18_dict[2017]

In [18]:
# pass through and change the current smoking state of all the menthol smokers

cohort_arr = cohort_arr
new_cohort_arr = np.copy(cohort_arr)
there_are_menthol_smokers = True

while (there_are_menthol_smokers):
    menthol_row = None
    idx = None
    for i, row in enumerate(new_cohort_arr):
        if (row[3] == 3):
            idx = i
            menthol_row = row
            break

    if idx == None:
        there_are_menthol_smokers = False
        break

    overall_weight = menthol_row[6] # total weight of the row
    overall_count = menthol_row[7] # number of individuals represented in the row
    new_rows = np.tile(menthol_row, (3,1))

    # 60% ecig
    new_rows[0,3] = 5
    new_rows[0,6] = overall_weight * 0.6
    new_rows[0,7] = overall_count * 0.6

    # 20% non-menthol smoker
    new_rows[1,3] = 1
    new_rows[1,6] = overall_weight * 0.2
    new_rows[1,7] = overall_count * 0.2

    # 20% never smoker
    new_rows[2,3] = 1
    new_rows[2,4] = 0 # initiation age
    new_rows[2,6] = overall_weight * 0.2
    new_rows[2,7] = overall_count * 0.2

    # remove the menthol row and add the new rows
    new_cohort_arr = np.concatenate([
        new_cohort_arr[0:idx],
        new_cohort_arr[idx+1:],
        new_rows,
    ], axis=0)


In [19]:
print(new_cohort_arr.shape)

(63, 8)


In [20]:

# pass through and change the previous smoking state of all the menthol smokers

there_are_menthol_smokers = True

while (there_are_menthol_smokers):
    menthol_row = None
    idx = None
    for i, row in enumerate(new_cohort_arr):
        if (row[2] == 3):
            idx = i
            menthol_row = row
            break

    if idx == None:
        there_are_menthol_smokers = False
        break

    overall_weight = menthol_row[6] # total weight of the row
    overall_count = menthol_row[7] # number of individuals represented in the row
    new_rows = np.tile(menthol_row, (3,1))

    # 60% ecig
    new_rows[0,2] = 5
    new_rows[0,6] = overall_weight * 0.6
    new_rows[0,7] = overall_count * 0.6

    # 20% non-menthol smoker
    new_rows[1,2] = 1
    new_rows[1,6] = overall_weight * 0.2
    new_rows[1,7] = overall_count * 0.2

    # 20% never smoker
    new_rows[2,2] = 1
    new_rows[2,6] = overall_weight * 0.2
    new_rows[2,7] = overall_count * 0.2

    # remove the menthol row and add the new rows
    new_cohort_arr = np.concatenate([
        new_cohort_arr[0:idx],
        new_cohort_arr[idx+1:],
        new_rows,
    ], axis=0)


In [21]:
print(new_cohort_arr.shape)

(97, 8)


In [22]:

# Convert to a DataFrame
df = pd.DataFrame(new_cohort_arr, columns=[
    "sex",
    "black",
    "state_3",
    "state_4",
    "initiation_age_grp",
    "poverty",
    "weighted",
    "count",
    ])  

savepath = os.path.join("..", "..", "corrected_18yo_cohorts", "postban population profile.xlsx")

# Save to an Excel file
df.to_excel(savepath, index=False)