In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pylab as plt
import pynhanes

## User-defined dictionary of combined varibles

Normally, data analysis does not require all variables from each NHANES category. 

`pynhanes` expects user to provide a manually created dictionary of needed variables with human-readable names and corresponding combination of variable codes. This can be done in either two ways:

a) Hardcoded explicitly (see next cell), or

b) Provided in a .json file (see https://github.com/timpyrkov/pynhanes/blob/master/scripts/nhanes_variables.json)



In [None]:
""" Provide selected variables combinations explicitly: """
variables = {
    "Age":                                      ["RIDAGEYR"],
    "Gender":                                   ["RIAGENDR"],
    "Survey":                                   ["SDDSRVYR"],
    "Mortality event":                          ["MORTSTAT"],
    "Mortality tte":                            ["PERMTH_INT"],
    "Health general":                           ["HSD010"],
    "Smoking status":                           ["SMQ020", "SMQ120", "SMQ150"],
    "Smoking regularly":                        ["SMD030", "SMD130", "SMD160"],
    "Smoking now":                              ["SMQ040", "SMQ140", "SMQ170"],
}

# """ Or read from .jsom file, for example: """
# variables = pynhanes.load_variables("./CSV/nhanes_variables.json")

## Load userdata

Load data from default places:

- `nhanes_userdata.csv` and `nhanes_codebook.csv` in subfolder `./CSV`

- `nhanes_counts.npz` and `nhanes_triax.npz` in subfolder `./NPZ`


In [None]:
codebook = pynhanes.CodeBook(variables=variables)
nhanes = pynhanes.NhanesLoader()

In [None]:
pynhanes.

## Plot fraction of smokers/nonsmokers along lifespan

Corresponding data field above was named `Smoking status`.

In [None]:
age = nhanes.userdata("Age")
status = nhanes.userdata("Smoking status")
labels = codebook.dict["Smoking status"]

plt.figure(figsize=(8,4), facecolor="white")
pynhanes.plot_age_fraction(status, age, labels=labels)
plt.xlabel("Age")
plt.ylabel("Population fraction")
plt.show()