In [None]:
import requests
import pandas as pd
from io import StringIO

# Function to process and save the data
def process_and_save_data(file_url, file_name, quantiles):
    try:
        # Send a request to download the file
        response = requests.get(file_url)

        # Check if the request was successful
        if response.status_code == 200:
            # Read the content of the file into a pandas DataFrame
            data = pd.read_csv(StringIO(response.text))

            # Process each quantile
            for quantile in quantiles:
                # Filter data for the current quantile
                # Edit conditions/column names as per your convenience to filter out rows
                filtered_data = data[(data['output_type'] == 'quantile') & (data['output_type_id'] == str(quantile))]

                # Pivot the data
                pivoted_data = filtered_data.pivot_table(index=['location'], columns='target_end_date', values='value', aggfunc='first')

                # Reset index and simplify column names
                pivoted_data.reset_index(inplace=True)
                pivoted_data.columns = [col if isinstance(col, str) else col[1] for col in pivoted_data.columns]

                # Add an 'id' column with sequential numbers
                pivoted_data.insert(0, 'id', range(1, len(pivoted_data) + 1))

                # Create a new filename
                new_filename = f"{file_name.rsplit('.', 1)[0]}_{quantile}.csv"

                # Save the transformed data
                pivoted_data.to_csv(new_filename, index=False)
        else:
            print(f"Failed to download {file_name}")
    except Exception as e:
        print(f"Error processing {file_name}: {e}")

# Main script
def main():
    # Base URL of the GitHub repository
    api_url = "https://api.github.com/repos/cdcepi/FluSight-forecast-hub/contents/model-output/SGroup-RandomForest"

    # Base URL for raw content download
    base_url = "https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/main/model-output/SGroup-RandomForest/"

    # Quantiles to process
    quantiles = [0.5, 0.025, 0.975]

    # Fetch the list of files from the GitHub repository
    response = requests.get(api_url)
    if response.status_code == 200:
        files = response.json()
        file_names = [file['name'] for file in files if file['name'].endswith('.csv')]
    else:
        print("Failed to fetch file list from GitHub")
        return

    # Process each file
    for file_name in file_names:
        #file name will be saved in date+model name + quantile value
        file_url = f"{base_url}{file_name}"
        process_and_save_data(file_url, file_name, quantiles)

if __name__ == "__main__":
    main()


In [2]:
#data after conversion
pd.read_csv("2023-10-14-SGroup-RandomForest_0.025.csv")

Unnamed: 0,id,location,2023-10-07,2023-10-14,2023-10-21,2023-10-28,2023-11-04
0,1,01,2.3,0.0,0.0,0.0,0.0
1,2,02,0.0,0.0,0.0,0.0,0.0
2,3,04,0.0,0.0,0.0,0.0,0.0
3,4,05,0.0,1.4,0.7,0.0,0.0
4,5,06,0.0,0.0,0.0,0.0,0.0
5,6,08,0.0,0.0,0.0,0.0,0.0
6,7,09,0.0,0.0,0.0,0.0,0.0
7,8,10,0.0,0.0,0.0,0.0,0.0
8,9,11,0.0,0.0,0.0,0.0,0.0
9,10,12,110.2,97.0,38.7,0.0,15.5


In [None]:
# Process truth data
### important To execute
file_url = "https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/main/target-data/target-hospital-admissions.csv"
response = requests.get(file_url)

# Check if the request was successful
# Read the content of the file into a pandas DataFrame
data = pd.read_csv(StringIO(response.text))

# Process each quantile
# Filter data for the current quantile
filtered_data = data

# Pivot the data
pivoted_data = filtered_data.pivot_table(index=['location'], columns='date', values='value', aggfunc='first')

# Reset index and simplify column names
pivoted_data.reset_index(inplace=True)
pivoted_data.columns = [col if isinstance(col, str) else col[1] for col in pivoted_data.columns]

# Add an 'id' column with sequential numbers
pivoted_data.insert(0, 'id', range(1, len(pivoted_data) + 1))

# Create a new filename
new_filename = f"truth_{quantile}.csv"

pivoted_data.to_csv("truth_data.csv", index=False)