In [19]:
import pandas as pd

# NHANES datasets
# Search Variables: https://wwwn.cdc.gov/nchs/nhanes/search/default.aspx
# Questionnaire Data: https://wwwn.cdc.gov/Nchs/Nhanes/Search/DataPage.aspx?Component=Questionnaire&Cycle=2013-2014

# Demographic Data Documentation: https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/DEMO_H.htm
# RIAGENDR - Gender (1: M, 2: F, .: Missing)
# RIDAGEYR - Age in years at screening (0-79: Range, 80: 80 years of age and over, .: missing)
# RIDEXPRG - Pregnancy status at exam (1: Pregnant, 2: Not pregnant, 3: Not sure, .: missing)

# Dietary Interview - Total Nutrient Intakes, First Day (DR1TOT_H) - Data Documentation:
# https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/DR1TOT_H.htm

# DR1TIRON
# Variable Description - Iron (mg)
# Data File Name - Dietary Interview - Total Nutrient Intakes, First Day (DR1TOT_H)
# Values: 0.13 to 130.76 - Range of Values, . - Missing

# DR1TTFAT
# Variable Description - Total fat (gm)
# Data File Name - Dietary Interview - Total Nutrient Intakes, First Day (DR1TOT_H)
# Values: 0 to 548.38	- Range of Values, . - Missing

# DR1TFIBE
# Variable Description - Dietary fiber (gm)
# Data File Name - Dietary Interview - Total Nutrient Intakes, First Day (DR1TOT_H)
# Values: 0 to 136.3 - Range of Values, . - Missing


# Standard Biochemistry Profile (BIOPRO_H) - Data Documentation:
# https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/BIOPRO_H.htm

# LBDSIRSI
# Variable Description - Iron, refrigerated serum (umol/L)
# Data File Name - Standard Biochemistry Profile (BIOPRO_H)
# Values: 0.9 to 99.8 - Range of Values, . - Missing

# LBXSIR
# Variable Description - Iron, refrigerated serum (ug/dL)
# Data File Name - Standard Biochemistry Profile (BIOPRO_H)
# Values: 5 to 557 - Range of Values, . - Missing


# Medical Conditions (MCQ_H) - Data Documentation:
# https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/MCQ_H.htm#MCQ053

# MCQ053
# SAS Label - Taking treatment for anemia/past 3 mos
# Variable Description - During the past 3 months, {have you/has SP} been on treatment for anemia (a-nee-me-a), sometimes called "tired blood" or "low blood"? [Include diet, iron pills, iron shots, transfusions as treatment.]
# Data File Name - Medical Conditions (MCQ_H)
# Values: 1: Yes, 2: No, 7: Refused, 9: Don't know, .: Missing

demographic_variables = ["SEQN", "RIAGENDR", "RIDAGEYR", "RIDEXPRG"]
diet_variables = ["SEQN", "DR1TIRON", "DR1TTFAT", "DR1TFIBE"]
labs_variables = ["SEQN", "LBDSIRSI", "LBXSIR"]
questionnaire_variables = ["SEQN", "MCQ053"]

demographic_cols = {
    "RIAGENDR": "Gender",
    "RIDAGEYR": "Age",
    "RIDEXPRG": "Pregnant"
}

diet_cols = {
    "DR1TIRON": "Iron_intake_mg",
    "DR1TTFAT": "Total_Fat_intake_g",
    "DR1TFIBE": "Fiber_intake_g"
}

labs_cols = {
    "LBDSIRSI": "Iron_serum_umol/L",
    "LBXSIR": "Iron_serum_ug/dL"
}

questionnaire_cols = {
    "MCQ053": "Treatment_for_anemia"
}

demographic = pd.read_csv("NHANES/demographic.csv", usecols=lambda c: c in demographic_variables).rename(columns=demographic_cols)
diet = pd.read_csv("NHANES/diet.csv", usecols=lambda c: c in diet_variables).rename(columns=diet_cols)
labs = pd.read_csv("NHANES/labs.csv", usecols=lambda c: c in labs_variables).rename(columns=labs_cols)
questionnaire = pd.read_csv("NHANES/questionnaire.csv", usecols=lambda c: c in questionnaire_variables).rename(columns=questionnaire_cols)

df = (
    demographic
    .merge(diet, on="SEQN", how="left")
    .merge(labs, on="SEQN", how="left")
    .merge(questionnaire, on="SEQN", how="left")
)

df

Unnamed: 0,SEQN,Gender,Age,Pregnant,Fiber_intake_g,Total_Fat_intake_g,Iron_intake_mg,Iron_serum_ug/dL,Iron_serum_umol/L,Treatment_for_anemia
0,73557,1,69,,10.8,52.81,8.41,58.0,10.4,2.0
1,73558,1,54,,16.7,124.29,26.88,79.0,14.1,2.0
2,73559,1,72,,9.9,65.97,17.57,98.0,17.6,2.0
3,73560,1,9,,10.6,58.27,14.19,,,2.0
4,73561,2,73,,12.3,55.36,17.72,91.0,16.3,2.0
...,...,...,...,...,...,...,...,...,...,...
10170,83727,1,26,,30.4,193.51,47.01,73.0,13.1,2.0
10171,83728,2,2,,9.3,52.39,6.62,,,1.0
10172,83729,2,42,2.0,25.7,110.30,15.06,49.0,8.8,2.0
10173,83730,1,7,,,,,,,2.0
