In [None]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import pynhanes

## User-defined dictionary of combined varibles

Normally, data analysis does not require all variables from each NHANES category. 

`pynhanes` expects user to provide a manually created dictionary of needed variables with human-readable names and corresponding combination of variable codes. This can be done in either two ways:

a) Hardcoded explicitly (see next cell), or

b) Provided in a .json file (see https://github.com/timpyrkov/pynhanes/blob/master/scripts/nhanes_variables.json)

* To use mortality events and time-to-event variables and carry out survival analysis: Download Linked Mortality Files manually from the FTP site https://www.cdc.gov/nchs/data-linkage/mortality-public.htm to folder cwhere you store .XPT files 

In [None]:
""" Provide selected variables combinations explicitly: """
variables = {
    "Age":                                      ["RIDAGEYR"],
    "Gender":                                   ["RIAGENDR"],
    "Survey":                                   ["SDDSRVYR"],
    # "Mortality event":                          ["MORTSTAT"],
    # "Mortality tte":                            ["PERMTH_INT"],
    "BMI (kg/m2)":                              ["BMXBMI"],
    "Health general":                           ["HSD010"],
    "Smoking status":                           ["SMQ020", "SMQ120", "SMQ150"],
    "Smoking regularly":                        ["SMD030", "SMD130", "SMD160"],
    "Smoking now":                              ["SMQ040", "SMQ140", "SMQ170"],
}

# """ Or read from .jsom file, for example: """
# variables = pynhanes.loader.load_variables("./CSV/nhanes_variables.json")

## Load Codebook and find missng .XPT files

In [None]:
""" 1. Load Codebook """
path = "./CSV/nhanes_codebook.csv"
codebook = pynhanes.CodeBook(path)
codebook = codebook.data

""" 2. Print requred XPT to load"""
# Each XPT can be doanloaded from NHANES website 
# using wgetxpt.py script: e.g. to download DEMO category run the commands
# > mkdir XPT
# > python ./wgetxpt.py DEMO --out XPT
xpts = pynhanes.userdata.list_xpts_missing(variables, codebook, folder="XPT")
if len(xpts):
    print("ERROR! Use wgetxpt.py to download:", xpts)
else:
    print("OK")

## Load and parse .XPT files

In [None]:
""" 1. Load and combine XPT """
data = pynhanes.userdata.load_data(variables, codebook, folder="XPT")

""" 2. Convert data to pandas Dataframe with multiindex columns """
data = pynhanes.userdata.processing(variables, codebook, data)
data.to_csv("./CSV/nhanes_userdata.csv", sep=";")
data