In [1]:
import pandas as pd
import numpy as np

from NHANES_data_API import NHANESDataAPI

In [2]:
#test data categories
nhanes_api = NHANESDataAPI()
categories = nhanes_api.list_data_categories()
print("Available NHANES data categories:")
print(categories)


Available NHANES data categories:
['demographics', 'dietary', 'examination', 'laboratory', 'questionnaire', 'limitedaccess']


In [9]:
common_variables, uncommon_variables, variable_cycles_dict = nhanes_api.common_variables('demographics', '1999-2005')


Valid Cycle: 1999-2000
Variable Table Columns: Index(['Variable Name', 'Variable Description', 'Data File Name',
       'Data File Description', 'Years'],
      dtype='object')
Valid Cycle: 2001-2002
Variable Table Columns: Index(['Variable Name', 'Variable Description', 'Data File Name',
       'Data File Description', 'Years'],
      dtype='object')
Valid Cycle: 2003-2004
Variable Table Columns: Index(['Variable Name', 'Variable Description', 'Data File Name',
       'Data File Description', 'Years'],
      dtype='object')
Valid Cycle: 2005-2006
Variable Table Columns: Index(['Variable Name', 'Variable Description', 'Data File Name',
       'Data File Description', 'Years'],
      dtype='object')


In [10]:
variable_cycles_dict

{'DMAETHN': ['1999-2000'],
 'DMARACE': ['1999-2000'],
 'DMDBORN': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDCITZN': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDEDUC': ['1999-2000', '2001-2002', '2003-2004'],
 'DMDEDUC2': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDEDUC3': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHHSIZ': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHRAGE': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHRBRN': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHREDU': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHRGND': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHRMAR': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDHSEDU': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDMARTL': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDSCHOL': ['1999-2000', '2001-2002', '2003-2004', '2005-2006'],
 'DMDYRSUS': ['1999-200

In [3]:
variable_table = nhanes_api._retrieve_variable_table(categories[0])
variable_table

Unnamed: 0,Variable Name,Variable Description,Data File Name,Data File Description,Years
0,AIALANG,Language of the MEC ACASI Interview Instrument,DEMO_D,Demographic Variables & Sample Weights,2005-2006
1,DMDBORN,In what country {were you/was SP} born?,DEMO_D,Demographic Variables & Sample Weights,2005-2006
2,DMDCITZN,{Are you/Is SP} a citizen of the United States...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
3,DMDEDUC2,(SP Interview Version) What is the highest gra...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
4,DMDEDUC3,(SP Interview Version) What is the highest gra...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
...,...,...,...,...,...
537,SIAINTRP,Was an interpreter used to conduct the Sample ...,DEMO_J,Demographic Variables & Sample Weights,2017-2018
538,SIALANG,Language of the Sample Person Interview Instru...,DEMO_J,Demographic Variables & Sample Weights,2017-2018
539,SIAPROXY,Was a Proxy respondent used in conducting the ...,DEMO_J,Demographic Variables & Sample Weights,2017-2018
540,WTINT2YR,Full sample 2 year interview weight.,DEMO_J,Demographic Variables & Sample Weights,2017-2018


In [4]:
nhanes_api.list_file_names(categories[0])

['Demographic Variables & Sample Weights']

In [5]:
nhanes_api.retrieve_cycle_data_file_name_mapping(variable_table=variable_table, file_name='Dietary Supplement Use 30-Day - Individual Dietary Supplements')

{'2013-2014': 'DSQIDS_H', '2015-2016': 'DSQIDS_I', '2017-2018': 'DSQIDS_J'}

In [5]:

def get_data_filename(data_category, cycle_year, data_file_description):
    """
    Get the data file name for a specific cycle year and data file description.

    Args:
    cycle_year (str): The year or cycle for which data is requested.
    data_file_description (str): The data file description.

    Returns:
    str: The data file name.
    """
    variable_table = nhanes_api._retrieve_variable_table(data_category)

    for i in range(len(variable_table)):
        if variable_table['Years'][i] == cycle_year and variable_table['Data File Description'][i] == data_file_description:
            return variable_table['Data File Name'][i]
        
    return None

In [6]:
get_data_filename(data_category=categories[1], cycle_year= "1999-2000", data_file_description="Dietary Interview - Individual Foods")

'DRXIFF'

In [9]:
#get data 
def retrieve_data(data_category, cycle, filename): 
    data_file_name = get_data_filename(data_category=data_category, cycle_year= cycle, data_file_description=filename)

    data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{data_file_name}.XPT")

    return data

In [10]:
retrieve_data(data_category=categories[1], cycle= "1999-2000", filename="Dietary Interview - Individual Foods")

Unnamed: 0,SEQN,WTDRD1,WTDR4YR,DRXILINE,DRDDRSTS,DRDINTMD,DRDDAY,DRALANG,DRXCCMNM,DRDCCMTY,...,DRXIM181,DRXIM201,DRXIM221,DRXIP182,DRXIP183,DRXIP184,DRXIP204,DRXIP205,DRXIP225,DRXIP226
0,1.0,14809.893854,6066.128663,1.0,1.0,1.0,4.0,1.0,1.0,5.000000e+00,...,4.000000e-02,5.397605e-79,5.397605e-79,1.000000e-01,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
1,1.0,14809.893854,6066.128663,2.0,1.0,1.0,4.0,1.0,1.0,5.000000e+00,...,1.180000e+00,5.397605e-79,5.397605e-79,1.000000e-01,7.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
2,1.0,14809.893854,6066.128663,3.0,1.0,1.0,4.0,1.0,,5.397605e-79,...,2.400000e-01,5.397605e-79,5.397605e-79,3.300000e-01,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
3,1.0,14809.893854,6066.128663,4.0,1.0,1.0,4.0,1.0,,5.397605e-79,...,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
4,1.0,14809.893854,6066.128663,5.0,1.0,1.0,4.0,1.0,2.0,2.000000e+01,...,9.000000e-02,5.397605e-79,5.397605e-79,2.200000e-01,3.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127835,9965.0,21564.978106,12163.017290,18.0,1.0,1.0,5.0,1.0,,5.397605e-79,...,2.200000e-01,8.000000e-02,2.000000e-02,1.000000e-02,1.000000e-02,2.000000e-02,3.000000e-02,1.400000e-01,1.000000e-02,1.400000e-01
127836,9965.0,21564.978106,12163.017290,19.0,1.0,1.0,5.0,1.0,5.0,8.000000e+00,...,9.100000e+00,1.000000e-02,5.397605e-79,4.710000e+00,3.500000e-01,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
127837,9965.0,21564.978106,12163.017290,20.0,1.0,1.0,5.0,1.0,5.0,8.000000e+00,...,1.460000e+00,5.397605e-79,5.397605e-79,1.200000e-01,8.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
127838,9965.0,21564.978106,12163.017290,21.0,1.0,1.0,5.0,1.0,,5.397605e-79,...,1.100000e+00,5.397605e-79,5.397605e-79,1.000000e-01,6.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79


In [4]:
nhanes_api.check_cycle("2005-2015")

['2005-2006', '2007-2008', '2009-2010', '2011-2012', '2013-2014', '2015-2016']