In [1]:
import pandas as pd
import numpy as np

from NHANES_data_API import NHANESDataAPI

In [2]:
#test data categories
nhanes_api = NHANESDataAPI()
categories = nhanes_api.list_data_categories()
print("Available NHANES data categories:")
print(categories)


Available NHANES data categories:
['demographics', 'dietary', 'examination', 'laboratory', 'questionnaire', 'limitedaccess']


In [3]:
variable_table = nhanes_api._retrieve_variable_table(categories[1])
variable_table

Unnamed: 0,Variable Name,Variable Description,Data File Name,Data File Description,Years
0,DRALANG,The SP/Proxy spoke mostly:,DRXIFF,Dietary Interview - Individual Foods,1999-2000
1,DRD020,What time did you begin to eat/drink the meal/...,DRXIFF,Dietary Interview - Individual Foods,1999-2000
2,DRD030,Coded meal name,DRXIFF,Dietary Interview - Individual Foods,1999-2000
3,DRD040,List of places: Where did you eat this meal/food?,DRXIFF,Dietary Interview - Individual Foods,1999-2000
4,DRDCCMTY,Combination food type,DRXIFF,Dietary Interview - Individual Foods,1999-2000
...,...,...,...,...,...
6263,DSQIVD,Vitamin D (D2 + D3) (mcg),DSQIDS_J,Dietary Supplement Use 30-Day - Individual Die...,2017-2018
6264,DSQIVK,Vitamin K (mcg),DSQIDS_J,Dietary Supplement Use 30-Day - Individual Die...,2017-2018
6265,DSQIZINC,Zinc (mg),DSQIDS_J,Dietary Supplement Use 30-Day - Individual Die...,2017-2018
6266,RXQ215A,"Did you take {PRODUCT NAME} as an antacid, as ...",DSQIDS_J,Dietary Supplement Use 30-Day - Individual Die...,2017-2018


In [4]:
nhanes_api.list_file_names(categories[1])

['Dietary Interview - Individual Foods',
 'Dietary Interview - Total Nutrient Intakes',
 'Dietary Supplement Use 30-Day - File 1, Supplement Counts',
 'Food Frequency Questionnaire - Raw Questionnaire Responses',
 'Food Frequency Questionnaire - Output from DietCalc Software',
 'Dietary Interview Technical Support File - Food Codes',
 "Dietary Supplement Use 30-Day - File 2, Participant's Use of Supplements",
 'Dietary Interview - Total Nutrient Intakes, First Day',
 'Dietary Interview - Total Nutrient Intakes, Second Day',
 'Dietary Interview - Individual Foods, Second Day',
 'Dietary Supplement Use 24-Hour - Total Dietary Supplements, Second Day',
 'Dietary Screener Questionnaire',
 'Dietary Interview - Individual Foods, First Day',
 'Dietary Supplement Use 24-Hour - Individual Dietary Supplements, First Day',
 'Dietary Supplement Use 24-Hour - Individual Dietary Supplements, Second Day',
 'Dietary Supplement Use 30 Day - Individual Dietary Supplements',
 'Dietary Supplement Use 30-D

In [5]:
nhanes_api.retrieve_cycle_data_file_name_mapping(variable_table=variable_table, file_name='Dietary Supplement Use 30-Day - Individual Dietary Supplements')

{'2013-2014': 'DSQIDS_H', '2015-2016': 'DSQIDS_I', '2017-2018': 'DSQIDS_J'}

In [5]:

def get_data_filename(data_category, cycle_year, data_file_description):
    """
    Get the data file name for a specific cycle year and data file description.

    Args:
    cycle_year (str): The year or cycle for which data is requested.
    data_file_description (str): The data file description.

    Returns:
    str: The data file name.
    """
    variable_table = nhanes_api._retrieve_variable_table(data_category)

    for i in range(len(variable_table)):
        if variable_table['Years'][i] == cycle_year and variable_table['Data File Description'][i] == data_file_description:
            return variable_table['Data File Name'][i]
        
    return None

In [6]:
get_data_filename(data_category=categories[1], cycle_year= "1999-2000", data_file_description="Dietary Interview - Individual Foods")

'DRXIFF'

In [9]:
#get data 
def retrieve_data(data_category, cycle, filename): 
    data_file_name = get_data_filename(data_category=data_category, cycle_year= cycle, data_file_description=filename)

    data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{data_file_name}.XPT")

    return data

In [10]:
retrieve_data(data_category=categories[1], cycle= "1999-2000", filename="Dietary Interview - Individual Foods")

Unnamed: 0,SEQN,WTDRD1,WTDR4YR,DRXILINE,DRDDRSTS,DRDINTMD,DRDDAY,DRALANG,DRXCCMNM,DRDCCMTY,...,DRXIM181,DRXIM201,DRXIM221,DRXIP182,DRXIP183,DRXIP184,DRXIP204,DRXIP205,DRXIP225,DRXIP226
0,1.0,14809.893854,6066.128663,1.0,1.0,1.0,4.0,1.0,1.0,5.000000e+00,...,4.000000e-02,5.397605e-79,5.397605e-79,1.000000e-01,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
1,1.0,14809.893854,6066.128663,2.0,1.0,1.0,4.0,1.0,1.0,5.000000e+00,...,1.180000e+00,5.397605e-79,5.397605e-79,1.000000e-01,7.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
2,1.0,14809.893854,6066.128663,3.0,1.0,1.0,4.0,1.0,,5.397605e-79,...,2.400000e-01,5.397605e-79,5.397605e-79,3.300000e-01,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
3,1.0,14809.893854,6066.128663,4.0,1.0,1.0,4.0,1.0,,5.397605e-79,...,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
4,1.0,14809.893854,6066.128663,5.0,1.0,1.0,4.0,1.0,2.0,2.000000e+01,...,9.000000e-02,5.397605e-79,5.397605e-79,2.200000e-01,3.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127835,9965.0,21564.978106,12163.017290,18.0,1.0,1.0,5.0,1.0,,5.397605e-79,...,2.200000e-01,8.000000e-02,2.000000e-02,1.000000e-02,1.000000e-02,2.000000e-02,3.000000e-02,1.400000e-01,1.000000e-02,1.400000e-01
127836,9965.0,21564.978106,12163.017290,19.0,1.0,1.0,5.0,1.0,5.0,8.000000e+00,...,9.100000e+00,1.000000e-02,5.397605e-79,4.710000e+00,3.500000e-01,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
127837,9965.0,21564.978106,12163.017290,20.0,1.0,1.0,5.0,1.0,5.0,8.000000e+00,...,1.460000e+00,5.397605e-79,5.397605e-79,1.200000e-01,8.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79
127838,9965.0,21564.978106,12163.017290,21.0,1.0,1.0,5.0,1.0,,5.397605e-79,...,1.100000e+00,5.397605e-79,5.397605e-79,1.000000e-01,6.000000e-02,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79,5.397605e-79


In [None]:
https://wwwn.cdc.gov/Nchs/Nhanes/1999-2000/DRXIFF.XPT