### This script helps you pull data directly from PHE Fingertips

In [None]:
# Install the Fingertips package for python (YOU ONLY NEED TO DO THIS ONCE)
# Delete this cell or comment the code below (e.g. #...) after installing and before ever running this code again

#!pip install fingertips_py

In [None]:
"""Import Fingertips and python's main preprocessing package pandas, as well as the datetime package.
Documentation for the Fingertips package can be found here: https://fingertips-py.readthedocs.io/en/latest/#indices-and-tables
"""

import pandas as pd
from datetime import datetime
import fingertips_py as ftp

In [None]:
"""Save all Fingertips indicators as a lookup df."""

# Save all metadata for all indicators from Fingertips as a pandas dataframe
indicators = ftp.metadata.get_metadata_for_all_indicators()

# Many of the columns are populated with a dictionary in each row
# The following lines of code return only specific values from the dictionaries (e.g. just the 'Name' value below)
indicators["IndicatorName"] = indicators["Descriptive"].apply(lambda x: x["Name"])
indicators["Source"] = indicators["Descriptive"].apply(lambda x: x["DataSource"])
indicators["YearType"] = indicators["YearType"].apply(lambda x: x["Name"])
indicators["ValueType"] = indicators["ValueType"].apply(lambda x: x["Name"])
indicators["UnitLabel"] = indicators["Unit"].apply(lambda x: x["Label"])
indicators["Unit"] = indicators["Unit"].apply(lambda x: x["Value"])

# Change indicator names to lowercase
indicators["IndicatorNameLower"] = indicators["IndicatorName"].str.lower()

# Select only helpful columns
indicators = indicators[["IID", "IndicatorName", "IndicatorNameLower", "Source", "YearType", "ValueType", "UnitLabel", "Unit"]]

# Preview the dataframe
indicators.head()

In [None]:
"""Create a function to find indicators based on a simple word search. Nothing will be returned at the end of this cell."""

def find_indicator(text_to_find, just_names=True):
    """Create a helper function to find Fingertips indicators that match a user-inputted search term.
    
    Parameters
    ----------
    text_to_find: str
        search term inputted by the user
    just_names: bool
        if True, returns the full indicator names that match the search term, else returns the subsetted 'indicators' df
    """
    # Return all indicator names that contain the search term (use lowercase to increase chances of finding a match)
    inds = [i for i in indicators["IndicatorNameLower"].unique() if text_to_find.lower() in i]
    
    # If no indicators matched, alert the user and ask them to try again
    if len(inds) == 0:
        print("\n****\nNo indicators matched your text. Please re-run the cell with and use a different search term.\n****\n")
        return None
    
    # Take a subset of the indicators df (from the cell above) that only includes the matching indicators
    inds = indicators.loc[indicators.IndicatorNameLower.isin(inds)].copy()
    
    # If the just_names argument is True, return a list of the indicator names (rather than a subset of the main df)
    if just_names:
        inds = inds.IndicatorName.unique()
    
    # Returns either a subsetted dataframe from the 'indicators' dataframe or a list of full indicator names
    return inds

In [None]:
"""Allow a user to input a search term to search for matching indicators."""

# Generate an input text box for a user to enter their search term; save the response 
indicator_word_search = input("Please type a phrase/word/partial word to search for indicators on Fingertips: ")

# Use the user input to pull all indicator names that contain that string (calling on the function in the cell above)
indicator_search_results = find_indicator(indicator_word_search, just_names=True)

# Print all indicators that match the user input
if indicator_search_results is not None:
    print("\nIndicator names that match your word search:\n")
    for i in indicator_search_results:
        print(i)

In [None]:
"""Allow the user to select all or specific indicators."""

# Allow a user to select all indicators listed in the output above or specific indicators
select_all_or_specific = input("Would you like to select all matching indicators above or list specific indicators? " \
                               "Please answer 'All' or 'Specific': ")

#Create an empty list of indicators, which will be populated by user-selected indicators (if they don't select all)
selected_indicators = []

def save_specific_list_of_indicators(): 
    """Allow the user to input one indicator name at a time."""
    # Call in local variables so they can be accessed within the function
    global selected_indicators
    global list_of_inds_complete
    
    # Set the instruction text for the user input box - text will vary whether it is the first or 2+ indicator
    first_indicator_question = "\nPlease paste the full indicator name that you would like to include " \
                               "(you will have an opportunity to add more than one): "
    subsequent_indicator_question = "Please paste the full indicator name that you would like to include: "
    
    # If the user hasn't selected their first indicator yet, use the first question, else use the subsequent question
    if len(selected_indicators) == 0:
        indicator = input(first_indicator_question) 
    else:
        indicator = input(subsequent_indicator_question)

    # Check that the indicator name provided is in the Fingertips indicators dataframe
    check_indicator = indicator in indicators.IndicatorName.unique()
    
    # If the output of check_indicator is True, then add it to the list of selected indicators
    if check_indicator:
        selected_indicators.append(indicator) 
    # Else warn the user and give them a chance to re-enter the indicator name by running the function again from the start
    else:
        print("\n****\nIndicator name not found. Please make sure you paste the whole indicator name " \
              "(note it is case sensitive).\n****")
        # Use recursion to start the function again
        save_specific_list_of_indicators()
    
    # Ask the user if they want to add another indicator
    add_another = input("\nWould you like to add another indicator? Please answer with 'Yes' or 'No': ")
    
    # If yes, use recursion to call the function again, otherwise end the function
    if add_another.lower() == "yes":
        save_specific_list_of_indicators()
    elif add_another.lower() == "no":
        return None
    
# If the user wants to select all indicators from the output in the cell above...
if select_all_or_specific.lower() == "all":
    # Make a subset of the Fingertips indicators df that matches the users' selected indicators
    indicator_selection = indicators.loc[indicators.IndicatorName.isin(indicator_search_results)]
# Otherwise, call the function above, which will repeat until the user has finished selecting all of their indicators
# Then take a subset of the Fingertips indicators df that matches the users' selected indicators
else:
    save_specific_list_of_indicators()
    indicator_selection = indicators.loc[indicators.IndicatorName.isin(selected_indicators)]

# Return metadata for the selected indicators
print("\n*****\n\nInformation about your selected indicators is below:")
indicator_selection

In [None]:
"""Pull data for chosen indicators for selected geographies / area types. 
NOTE that large amounts of data means this cell can take a while to run."""

# Save a data frame that matches Fingertips indicators with their available geographies / area types
geographies = ftp.retrieve_data.get_all_areas_for_all_indicators()

# Create an empty dataframe so the output of each selected indicator can be saved in the same table
final_output = pd.DataFrame()

def pull_data_for_geography(available_geos):
    """This function asks users to input a geography by which to subset the data.
    
    Parameters
    ----------
    df: pandas DataFrame
        indicator data to append to """
    
    # Call in the final_output variable
    global final_output
    
    # Ask the user to input a geography from the list provided
    chosen_geo = input("\nPlease select your geography of choice by pasting one geography name from above: ")
    
    # If the selected geography exists, pull the data from Fingertips and append it to the final output
    if chosen_geo in available_geos["GeographicalArea"].unique():
        
        # Retrieve the indicator and geography IDs
        ind_ref = available_geos.IndicatorId.unique()[0]
        geo_ref = available_geos.loc[available_geos.GeographicalArea == chosen_geo, "AreaTypeId"].values[0]
        
        geography_subset = ftp.retrieve_data.get_data_by_indicator_ids(ind_ref, geo_ref)
        
        # Check whether there is data for the selected geography (sometimes there is no actual data on Fingertips)
        if geography_subset.shape[0] > 0:
            final_output = final_output.append(geography_subset, ignore_index=True)
        else:
            print("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print("It appears as if there is no data on Fingertips for this geography.")
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
        
        # Ask the user if they want to select another geography - if so, repeat the function from the start
        if available_geos["GeographicalArea"].nunique() > 1:
            choose_another = input("Would you like to select another geography? ")
            if choose_another.lower() == "yes":
                pull_data_for_geography(available_geos)
            else:
                return None
    
    # If the selected geography doesn't exist in the data, warn the user and use recursion to start the function again
    else:
        print("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print("Geography not found. Please make sure you paste the whole name.")
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
        pull_data_for_geography(available_geos)

# Iterate through each chosen indicator and tell the user which geographies are available
for ind_ref in indicator_selection.IID.unique():
    
    # Print the indicator name
    ind_name = indicator_selection.loc[indicator_selection.IID == ind_ref, "IndicatorName"].unique()[0]
    print("\n***Indicator name: {}***".format(ind_name))
    print("\nData for this indicator is available at the following geographies (England will be included at all geographies):")
    
    # Select matching geographies
    geo_ind_subset = geographies.loc[geographies.IndicatorId == ind_ref]
    
    # Check that there are geographies for the data (in a small number of cases, the indicator has no data)
    if len(geo_ind_subset.GeographicalArea.unique()) == 0:
        print("\n***There is no data for this indicator.***\n")
        continue
    else:
        #Print the geographies one at a time
        for geo in geo_ind_subset.GeographicalArea.unique():
            print("- {}".format(geo))
    
    # Allow the user to state whether they want to select all or specific geographies
    select_geo = input("\nFor this indicator, would you like to save data at all geography levels or specific geographies?" \
                       " Please answer with 'All' or 'Specific': ")
    
    # Pull data from Fingertips at either all geographies or selected geographies
    # If the user selects all geographies, pull this data from Fingertips
    if select_geo.lower() == "all":
        ind_by_geo = ftp.retrieve_data.get_data_for_indicator_at_all_available_geographies(ind_ref)
        if ind_by_geo.shape[0] == 0:
            print("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print("It appears as if there is no data on Fingertips for this geography.")
            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
        else:
            final_output = final_output.append(ind_by_geo, ignore_index=True)

    # Otherwise call the function above to allow the user to select specific geographies
    else:        
        pull_data_for_geography(geo_ind_subset)
        
# Preview the output (includes all selected indicators at selected geographies)
final_output.head()

In [None]:
"""Save results: one csv per indicator (includes all geographies for that indicator)."""

# Ask the user whether they would like to save results as a csv
save_results = input("Would you like to save results as a csv? " \
                     "(If you have selected more than one indicator, this will save one csv per indicator.)")

# If the user wishes to save results, pull today's month and year and save this as part of the file name (for version control)
if save_results.lower() == "yes":
    month_year = "_{0}_{1}".format(datetime.now().month, datetime.now().year)
    
    # Iterate through each indicator, subset the final output to that indicator and create a bespoke file name + save
    for indicator in final_output["Indicator Name"].unique():
        indicator_df = final_output.loc[final_output["Indicator Name"] == indicator]
        filename = indicator.replace(" ", "_").lower()[:50]
        filename = filename.replace(",", "")
        filename = filename.replace(":", "") + month_year
        
        # Save the indicator csv for a given file path (replace the file path with your local file path)
        indicator_df.to_csv("Fingertips outputs/{}.csv".format(filename), index=False)


### Some DIY Fingertips functions for quick reference

- ftp.metadata.get_area_types_for_profile(profile_name) #Pull the area type IDs for a given profile
- ftp.retrieve_data.get_data_for_indicator_at_all_available_geographies(ind_ref) #Pull all data for an indicator
- ftp.retrieve_data.get_data_by_indicator_ids(ind_ref, geo_ref) #Pull data for a specific indicator and geography
- ftp.metadata.get_area_types_as_dict() #Presents a dictionary of area type IDs and names
- ftp.retrieve_data.get_all_areas_for_all_indicators() #Get all geography ID codes for all indicators
- ftp.metadata.get_area_type_ids_for_profile(profile_id) #Get geography ID codes for a specific profile on Fingertips