In [1]:
#let's import somre relavent libraries
import pandas as pd
import requests
from bs4 import BeautifulSoup


import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
#creating a list of the survey cycles that we are going to be collecting the data
cycle_list = list()
a = 1999
b = 2000
for i in range(10):
  cycle_list.append(f"{a}-{b}")
  a = b + 1
  b = b + 2

In [4]:
#the below cfuction basicalluy allows us to be able to have tables that 
def get_variable_df(url, cycle_list = cycle_list):
  """
  This fuction inputs the NHANES URL for Variable list 
  The Pandas' pandas.read_html() fuction is used to read tables on the URL
  Resulting data frame is then cleaned (adding a year column and removing some other columns not needed)
  Year column matches the survey cycle periods
  Cycle list is used to filter the data to just the cycle of interest 
  Returns the data frame of the variable  
  """
  dfs = pd.read_html(url)
  df = dfs[0] #the table of interest in on the index 0

  Years = [i for i in  range(len(df))]
  df["Years"] = Years 
  for i in range(len(df)):
    x = df['Begin Year'][i]
    y = df['EndYear'][i]
    df["Years"][i] = f"{x}-{y}"
  df.drop(["Begin Year", "EndYear", "Component","Use Constraints"], axis=1, inplace=True)
  df = df.loc[df["Years"].isin(cycle_list)]
  df.reset_index(drop=True, inplace=True)

  return df

In [5]:
#i need a list of the data categories:
#we are going to be looking at the demo data for A PoC and MVP before we adapt it into fuctions and methods for re-usability
data_cat_list = [
    "demographics",
    "dietary", 
    "examination", 
    "laboratory", 
    "questionnaire", 
    "limitedaccess"
]

demographics_variable_table = get_variable_df(f"https://wwwn.cdc.gov/nchs/nhanes/search/variablelist.aspx?Component={data_cat_list[0]}")
dietary_variable_table = get_variable_df(f"https://wwwn.cdc.gov/nchs/nhanes/search/variablelist.aspx?Component={data_cat_list[1]}")
examination_variable_table = get_variable_df(f"https://wwwn.cdc.gov/nchs/nhanes/search/variablelist.aspx?Component={data_cat_list[2]}")
laboratory_variable_table = get_variable_df(f"https://wwwn.cdc.gov/nchs/nhanes/search/variablelist.aspx?Component={data_cat_list[3]}")
questionnaire_variable_table = get_variable_df(f"https://wwwn.cdc.gov/nchs/nhanes/search/variablelist.aspx?Component={data_cat_list[4]}")
limitedaccess_variable_table = get_variable_df(f"https://wwwn.cdc.gov/nchs/nhanes/search/variablelist.aspx?Component={data_cat_list[5]}")

In [20]:
demographics_variable_table.head(10)

Unnamed: 0,Variable Name,Variable Description,Data File Name,Data File Description,Years
0,AIALANG,Language of the MEC ACASI Interview Instrument,DEMO_D,Demographic Variables & Sample Weights,2005-2006
1,DMDBORN,In what country {were you/was SP} born?,DEMO_D,Demographic Variables & Sample Weights,2005-2006
2,DMDCITZN,{Are you/Is SP} a citizen of the United States...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
3,DMDEDUC2,(SP Interview Version) What is the highest gra...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
4,DMDEDUC3,(SP Interview Version) What is the highest gra...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
5,DMDFMSIZ,Total number of people in the Family,DEMO_D,Demographic Variables & Sample Weights,2005-2006
6,DMDHHSIZ,Total number of people in the Household,DEMO_D,Demographic Variables & Sample Weights,2005-2006
7,DMDHRAGE,Age in years of the household reference person...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
8,DMDHRBRN,In what country {were you/was NON-SP Head} born?,DEMO_D,Demographic Variables & Sample Weights,2005-2006
9,DMDHREDU,What is the highest grade or level of school {...,DEMO_D,Demographic Variables & Sample Weights,2005-2006


In [19]:
#now we need some fuctions that:
#1. given 'cycle year' and 'data file description'return a dictionary of {Variable Name:Variable Description}
#2. for each unique "Data File Description" return a dictionary of {Data File Name:cycle_Years}
#3. given the "cycle_year" and the "data_file_description" return the "data_filename"
#4. given a list of cycles return a lists and dictionary: a list of common variable in all the cycle and a dictionary that has {variable: [the cycles the variable is in]}

# Function 1: Extract variable names and descriptions for a specific year and data file description
def get_variable_dict(variable_table, cycle_year, data_file_description):
    """
    Given a variable table, cycle year, and data file description,
    return a dictionary of {Variable Name: Variable Description}.

    Args:
    variable_table (list of dictionaries): Table containing variable information.
    cycle_year (str): The year or cycle for which data is requested.
    data_file_description (str): The data file description.

    Returns:
    dict: A dictionary of {Variable Name: Variable Description}.
    """
    variable_dict = {}
    for row in variable_table:
        if row['Years'] == cycle_year and row['Data File Description'] == data_file_description:
            variable_dict[row['Variable Name']] = row['Variable Description']
    return variable_dict

# Function 2: Extract data file names and corresponding cycle years for each unique data file description
def get_data_file_dict(variable_table):
    """
    Given a variable table, return a dictionary of {Data File Description: {Data File Name: [cycle_years]}}.

    Args:
    variable_table (list of dictionaries): Table containing variable information.

    Returns:
    dict: A dictionary of {Data File Description: {Data File Name: [cycle_years]}}.
    """
    data_file_dict = {}
    for row in variable_table:
        data_file_desc = row['Data File Description']
        data_file_name = row['Data File Name']
        cycle_year = row['Years']
        if data_file_desc not in data_file_dict:
            data_file_dict[data_file_desc] = {data_file_name: [cycle_year]}
        else:
            if data_file_name in data_file_dict[data_file_desc]:
                data_file_dict[data_file_desc][data_file_name].append(cycle_year)
            else:
                data_file_dict[data_file_desc][data_file_name] = [cycle_year]
    return data_file_dict

# Function 3: Get the data file name for a specific cycle year and data file description
def get_data_filename(variable_table, cycle_year, data_file_description):
    """
    Given a variable table, cycle year, and data file description,
    return the data file name.

    Args:
    variable_table (list of dictionaries): Table containing variable information.
    cycle_year (str): The year or cycle for which data is requested.
    data_file_description (str): The data file description.

    Returns:
    str: The data file name.
    """
    for row in variable_table:
        if row['Years'] == cycle_year and row['Data File Description'] == data_file_description:
            return row['Data File Name']
    return None

# Function 4: Find common variables across multiple cycles and create a dictionary with variable-cycles mapping
def common_variables_and_cycles(variable_table, cycles):
    """
    Given a variable table and a list of cycles, return a list of common variables
    in all the cycles and a dictionary {variable: [cycles]}.

    Args:
    variable_table (list of dictionaries): Table containing variable information.
    cycles (list of str): List of cycle years.

    Returns:
    list: List of common variables.
    dict: A dictionary of {variable: [cycles]}.
    """
    common_variables = None
    variable_cycles_dict = {}
    
    for cycle in cycles:
        variables = [row['Variable Name'] for row in variable_table if row['Years'] == cycle]
        if common_variables is None:
            common_variables = set(variables)
        else:
            common_variables.intersection_update(variables)
        
        for variable in variables:
            if variable in variable_cycles_dict:
                variable_cycles_dict[variable].append(cycle)
            else:
                variable_cycles_dict[variable] = [cycle]
    
    common_variables = list(common_variables)
    return common_variables, variable_cycles_dict




In [17]:
def check_in_between_cycle(start_year, end_year, cycle_list):
    list_of_cycles_to_be_worked_on = list()
    flager = 0
    for cycle in cycle_list:
        if start_year in cycle:
            flager = 1
        if flager == 1:
            list_of_cycles_to_be_worked_on.append(cycle)
        if end_year in cycle:
            return list_of_cycles_to_be_worked_on
    

def check_cycle(input_cycle, cycle_list):
    if input_cycle in cycle_list:
        return [input_cycle]
    else:
        start_year = input_cycle.split('-')[0]
        end_year = input_cycle.split('-')[1]

        for cycle in cycle_list:
            if start_year in cycle:
                print(f"begining cycle: {cycle}")

            if end_year in cycle:
                print(f"ending cycle: {cycle}")
        the_cyclelist = check_in_between_cycle(start_year, end_year, cycle_list)
        print(the_cyclelist)

In [18]:
check_cycle('2005-2006', cycle_list=cycle_list)

['2005-2006']

In [None]:
#we are going to download data for just one cycle of the demographic data , we will use cycle '2005-2006' for this
#we are going to use the pandas function 'read_sas' to read sas since the downloadable files are sas files.
#lets create the get function --> 	Given the cycle year, data category and the data file Description, 

def get_data(data_category, cycle_year, data_file_description, cycle_list=cycle_list):
    #check the cycle
    list_of_cycles = check_cycle(cycle_year, cycle_list=cycle_list)

    if len(list_of_cycles) == 1:
        return pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{get_data_file_name(cycle_year, data_file_description)}")

    else:
        #-----we initialize an empty dataframe 
        collective_data = pd.DataFrame()
        #we need a flag to check the commonality
        #we run the commonality function 
        #we ask the use is they want to include the or exclude the uncommon variables 
        #then below we start the download and joining 
        for cycle in list_of_cycles:
            data  = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{get_data_file_name(cycle, data_file_description)}")
            #we join the data
            collective_data = collective_data.join(data)
        return collective_data



In [None]:
#We first initiate the variable tables from the get go. so for all the data categories we are going to have all the variable tables 


In [30]:
cycle_list

['1999-2000',
 '2001-2002',
 '2003-2004',
 '2005-2006',
 '2007-2008',
 '2009-2010',
 '2011-2012',
 '2013-2014',
 '2015-2016',
 '2017-2018']

In [None]:
#now we need some fuctions that:
#1. given 'cycle year' and 'data file description'return a dictionary of {Variable Name:Variable Description}
#2. for each unique "Data File Description" return a dictionary of {Data File Name:cycle_Years}
#3. given the "cycle_year" and the "data_file_description" return the "data_filename"
#4. given a list of cycles return a lists and dictionary: a list of common variable in all the cycle and a dictionary that has {variable: [the cycles the variable is in]}

# Function 1: Extract variable names and descriptions for a specific year and data file description
def get_variable_dict(variable_table, cycle_year, data_file_description):
    """
    Given a variable table, cycle year, and data file description,
    return a dictionary of {Variable Name: Variable Description}.

    Args:
    variable_table (list of dictionaries): Table containing variable information.
    cycle_year (str): The year or cycle for which data is requested.
    data_file_description (str): The data file description.

    Returns:
    dict: A dictionary of {Variable Name: Variable Description}.
    """
    variable_dict = {}
    for row in variable_table:
        if row['Years'] == cycle_year and row['Data File Description'] == data_file_description:
            variable_dict[row['Variable Name']] = row['Variable Description']
    return variable_dict

# Function 2: Extract data file names and corresponding cycle years for each unique data file description
def get_data_file_dict(variable_table):
    """
    Given a variable table, return a dictionary of {Data File Description: {Data File Name: [cycle_years]}}.

    Args:
    variable_table (list of dictionaries): Table containing variable information.

    Returns:
    dict: A dictionary of {Data File Description: {Data File Name: [cycle_years]}}.
    """
    data_file_dict = {}
    for row in variable_table:
        data_file_desc = row['Data File Description']
        data_file_name = row['Data File Name']
        cycle_year = row['Years']
        if data_file_desc not in data_file_dict:
            data_file_dict[data_file_desc] = {data_file_name: [cycle_year]}
        else:
            if data_file_name in data_file_dict[data_file_desc]:
                data_file_dict[data_file_desc][data_file_name].append(cycle_year)
            else:
                data_file_dict[data_file_desc][data_file_name] = [cycle_year]
    return data_file_dict

# Function 3: Get the data file name for a specific cycle year and data file description
def get_data_filename(variable_table, cycle_year, data_file_description):
    """
    Given a variable table, cycle year, and data file description,
    return the data file name.

    Args:
    variable_table (list of dictionaries): Table containing variable information.
    cycle_year (str): The year or cycle for which data is requested.
    data_file_description (str): The data file description.

    Returns:
    str: The data file name.
    """
    for row in variable_table:
        if row['Years'] == cycle_year and row['Data File Description'] == data_file_description:
            return row['Data File Name']
    return None

# Function 4: Find common variables across multiple cycles and create a dictionary with variable-cycles mapping
def common_variables_and_cycles(variable_table, cycles):
    """
    Given a variable table and a list of cycles, return a list of common variables
    in all the cycles and a dictionary {variable: [cycles]}.

    Args:
    variable_table (list of dictionaries): Table containing variable information.
    cycles (list of str): List of cycle years.

    Returns:
    list: List of common variables.
    dict: A dictionary of {variable: [cycles]}.
    """
    common_variables = None
    variable_cycles_dict = {}
    
    for cycle in cycles:
        variables = [row['Variable Name'] for row in variable_table if row['Years'] == cycle]
        if common_variables is None:
            common_variables = set(variables)
        else:
            common_variables.intersection_update(variables)
        
        for variable in variables:
            if variable in variable_cycles_dict:
                variable_cycles_dict[variable].append(cycle)
            else:
                variable_cycles_dict[variable] = [cycle]
    
    common_variables = list(common_variables)
    return common_variables, variable_cycles_dict


def check_in_between_cycle(start_year, end_year, cycle_list):
    list_of_cycles_to_be_worked_on = list()
    flager = 0
    for cycle in cycle_list:
        if start_year in cycle:
            flager = 1
        if flager == 1:
            list_of_cycles_to_be_worked_on.append(cycle)
        if end_year in cycle:
            return list_of_cycles_to_be_worked_on
    

def check_cycle(input_cycle, cycle_list):
    if input_cycle in cycle_list:
        return [input_cycle]
    else:
        start_year = input_cycle.split('-')[0]
        end_year = input_cycle.split('-')[1]

        for cycle in cycle_list:
            if start_year in cycle:
                print(f"begining cycle: {cycle}")

            if end_year in cycle:
                print(f"ending cycle: {cycle}")
        the_cyclelist = check_in_between_cycle(start_year, end_year, cycle_list)
        print(the_cyclelist)



#we are going to download data for just one cycle of the demographic data , we will use cycle '2005-2006' for this
#we are going to use the pandas function 'read_sas' to read sas since the downloadable files are sas files.
#lets create the get function --> 	Given the cycle year, data category and the data file Description, 

def get_data(data_category, cycle_year, data_file_description, cycle_list=cycle_list):
    #check the cycle
    list_of_cycles = check_cycle(cycle_year, cycle_list=cycle_list)

    if len(list_of_cycles) == 1:
        return pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{get_data_file_name(cycle_year, data_file_description)}")

    else:
        #-----we initialize an empty dataframe 
        collective_data = pd.DataFrame()
        #we need a flag to check the commonality
        #we run the commonality function 
        #we ask the use is they want to include the or exclude the uncommon variables 
        #then below we start the download and joining 
        for cycle in list_of_cycles:
            data  = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{get_data_file_name(cycle, data_file_description)}")
            #we join the data
            collective_data = collective_data.join(data)
        return collective_data



In [None]:
def get_data(self, cycle_year, data_category, data_file_description, include_uncommon=False):
    """
    Get data for a specific cycle year, data category, and data file description.

    Args:
    cycle_year (str): The year or cycle for which data is requested.
    data_category (str): The data category.
    data_file_description (str): The data file description.
    include_uncommon (bool): Whether to include uncommon variables (default is False).

    Returns:
    pd.DataFrame: A pandas DataFrame containing the requested data.
    """
    list_of_cycles = self.check_cycle(cycle_year)

    if len(list_of_cycles) == 1:
        data_file_name = self.get_data_filename(cycle_year, data_file_description)
        return pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle_year}/{data_file_name}")

    else:
        # Initialize an empty DataFrame
        collective_data = pd.DataFrame()

        # Check commonality of variables within the cycles
        common_variables, _ = self.common_variables(list_of_cycles)

        # Prompt the user to include/exclude uncommon variables
        if include_uncommon:
            for cycle in list_of_cycles:
                data_file_name = self.get_data_filename(cycle, data_file_description)
                data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{data_file_name}")
                collective_data = collective_data.join(data)
        else:
            # Filter out uncommon variables
            common_variable_dataframes = []
            for cycle in list_of_cycles:
                data_file_name = self.get_data_filename(cycle, data_file_description)
                data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{data_file_name}")

                # Filter the DataFrame to include only common variables
                data = data[common_variables]
                common_variable_dataframes.append(data)

            # Join the filtered DataFrames
            collective_data = pd.concat(common_variable_dataframes, axis=1)

        return collective_data


In [None]:
import pandas as pd

class NHANESDataAPI:
    def __init__(self, data_dir="data/"):
        """
        Initialize the NHANES Data API.

        Args:
        data_dir (str): Directory where data will be stored.
        """
        self.data_dir = data_dir
        self.variable_table = None  # You'll need to load the variable table

    def list_variables(self, cycle_year, data_file_description):
        """
        List variables for a specific cycle year and data file description.

        Args:
        cycle_year (str): The year or cycle for which data is requested.
        data_file_description (str): The data file description.

        Returns:
        dict: A dictionary of {Variable Name: Variable Description}.
        """
        variable_dict = {}
        for row in self.variable_table:
            if row['Years'] == cycle_year and row['Data File Description'] == data_file_description:
                variable_dict[row['Variable Name']] = row['Variable Description']
        return variable_dict

    def list_data_files(self):
        """
        List data files and their corresponding cycle years for each unique data file description.

        Returns:
        dict: A dictionary of {Data File Description: {Data File Name: [cycle_years]}}.
        """
        data_file_dict = {}
        for row in self.variable_table:
            data_file_desc = row['Data File Description']
            data_file_name = row['Data File Name']
            cycle_year = row['Years']
            if data_file_desc not in data_file_dict:
                data_file_dict[data_file_desc] = {data_file_name: [cycle_year]}
            else:
                if data_file_name in data_file_dict[data_file_desc]:
                    data_file_dict[data_file_desc][data_file_name].append(cycle_year)
                else:
                    data_file_dict[data_file_desc][data_file_name] = [cycle_year]
        return data_file_dict

    def get_data_filename(self, cycle_year, data_file_description):
        """
        Get the data file name for a specific cycle year and data file description.

        Args:
        cycle_year (str): The year or cycle for which data is requested.
        data_file_description (str): The data file description.

        Returns:
        str: The data file name.
        """
        for row in self.variable_table:
            if row['Years'] == cycle_year and row['Data File Description'] == data_file_description:
                return row['Data File Name']
        return None

    def common_variables(self, cycle_years):
        """
        Find common variables across multiple cycle years and create a dictionary with variable-cycles mapping.

        Args:
        cycle_years (list of str): List of cycle years.

        Returns:
        list: List of common variables.
        dict: A dictionary of {variable: [cycles]}.
        """
        common_variables = None
        variable_cycles_dict = {}

        for cycle in cycle_years:
            variables = [row['Variable Name'] for row in self.variable_table if row['Years'] == cycle]
            if common_variables is None:
                common_variables = set(variables)
            else:
                common_variables.intersection_update(variables)

            for variable in variables:
                if variable in variable_cycles_dict:
                    variable_cycles_dict[variable].append(cycle)
                else:
                    variable_cycles_dict[variable] = [cycle]

        common_variables = list(common_variables)
        return common_variables, variable_cycles_dict

    def check_cycle(self, input_cycle):
        """
        Check the validity of a cycle and return valid cycle(s) based on input.

        Args:
        input_cycle (str): The input cycle year or range.

        Returns:
        list: List of valid cycle(s) based on input.
        """
        cycle_list = [row['Years'] for row in self.variable_table]
        if input_cycle in cycle_list:
            return [input_cycle]
        else:
            start_year = input_cycle.split('-')[0]
            end_year = input_cycle.split('-')[1]
            the_cyclelist = self.check_in_between_cycle(start_year, end_year, cycle_list)
            return the_cyclelist

    def check_in_between_cycle(self, start_year, end_year, cycle_list):
        """
        Check for valid cycles within a range.

        Args:
        start_year (str): The start year of the range.
        end_year (str): The end year of the range.
        cycle_list (list): List of available cycle years.

        Returns:
        list: List of valid cycle(s) within the range.
        """
        list_of_cycles_to_be_worked_on = list()
        flager = 0
        for cycle in cycle_list:
            if start_year in cycle:
                flager = 1
            if flager == 1:
                list_of_cycles_to_be_worked_on.append(cycle)
            if end_year in cycle:
                return list_of_cycles_to_be_worked_on


    def get_data(self, cycle_year, data_category, data_file_description, include_uncommon=False):
        """
        Get data for a specific cycle year, data category, and data file description.

        Args:
        cycle_year (str): The year or cycle for which data is requested.
        data_category (str): The data category.
        data_file_description (str): The data file description.
        include_uncommon (bool): Whether to include uncommon variables (default is False).

        Returns:
        pd.DataFrame: A pandas DataFrame containing the requested data.
        """
        list_of_cycles = self.check_cycle(cycle_year)

        if len(list_of_cycles) == 1:
            data_file_name = self.get_data_filename(cycle_year, data_file_description)
            return pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle_year}/{data_file_name}")

        else:
            # Initialize an empty DataFrame
            collective_data = pd.DataFrame()

            # Check commonality of variables within the cycles
            common_variables, _ = self.common_variables(list_of_cycles)

            # Prompt the user to include/exclude uncommon variables
            if include_uncommon:
                for cycle in list_of_cycles:
                    data_file_name = self.get_data_filename(cycle, data_file_description)
                    data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{data_file_name}")
                    collective_data = collective_data.join(data)
            else:
                # Filter out uncommon variables
                common_variable_dataframes = []
                for cycle in list_of_cycles:
                    data_file_name = self.get_data_filename(cycle, data_file_description)
                    data = pd.read_sas(f"https://wwwn.cdc.gov/Nchs/Nhanes/{cycle}/{data_file_name}")

                    # Filter the DataFrame to include only common variables
                    data = data[common_variables]
                    common_variable_dataframes.append(data)

                # Join the filtered DataFrames
                collective_data = pd.concat(common_variable_dataframes, axis=1)

            return collective_data


    def join_data(self, cycle_year, data_category1, data_file_name1, data_category2, data_file_name2):
        """
        Join data from two different data files in the same cycle year.

        Args:
        cycle_year (str): The year or cycle for which data is requested.
        data_category1 (str): The data category of the first data file.
        data_file_name1 (str): The name of the first data file.
        data_category2 (str): The data category of the second data file.
        data_file_name2 (str): The name of the second data file.

        Returns:
        pd.DataFrame: A pandas DataFrame containing the joined data.
        """
        # Get data frames for the specified data categories and data files
        data_frame1 = self.get_data(cycle_year, data_category1, data_file_name1)
        data_frame2 = self.get_data(cycle_year, data_category2, data_file_name2)

        # Check if data frames exist
        if data_frame1 is None or data_frame2 is None:
            return None


In [1]:
from NHANES_data_API import NHANESDataAPI

api = NHANESDataAPI('demographics')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Years"][i] = f"{x}-{y}"
  df["Years"][i] = f"{x}-{y}"


In [2]:
api.get_variable_table('demographics')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Years"][i] = f"{x}-{y}"
  df["Years"][i] = f"{x}-{y}"


Unnamed: 0,Variable Name,Variable Description,Data File Name,Data File Description,Years
0,AIALANG,Language of the MEC ACASI Interview Instrument,DEMO_D,Demographic Variables & Sample Weights,2005-2006
1,DMDBORN,In what country {were you/was SP} born?,DEMO_D,Demographic Variables & Sample Weights,2005-2006
2,DMDCITZN,{Are you/Is SP} a citizen of the United States...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
3,DMDEDUC2,(SP Interview Version) What is the highest gra...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
4,DMDEDUC3,(SP Interview Version) What is the highest gra...,DEMO_D,Demographic Variables & Sample Weights,2005-2006
...,...,...,...,...,...
537,SIAINTRP,Was an interpreter used to conduct the Sample ...,DEMO_J,Demographic Variables and Sample Weights,2017-2018
538,SIALANG,Language of the Sample Person Interview Instru...,DEMO_J,Demographic Variables and Sample Weights,2017-2018
539,SIAPROXY,Was a Proxy respondent used in conducting the ...,DEMO_J,Demographic Variables and Sample Weights,2017-2018
540,WTINT2YR,Full sample 2 year interview weight.,DEMO_J,Demographic Variables and Sample Weights,2017-2018


In [3]:
api.list_data_file_descriptions()

array(['Demographic Variables & Sample Weights',
       'Demographic Variables and Sample Weights'], dtype=object)