<a href="https://colab.research.google.com/github/suprabhathk/FoundationalModels_TimeSeries_Epidemics/blob/main/Chronos_ILI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## Installing chronos related dependencies
!pip install git+https://github.com/amazon-science/chronos-forecasting.git

Collecting git+https://github.com/amazon-science/chronos-forecasting.git
  Cloning https://github.com/amazon-science/chronos-forecasting.git to /tmp/pip-req-build-g4uoavde
  Running command git clone --filter=blob:none --quiet https://github.com/amazon-science/chronos-forecasting.git /tmp/pip-req-build-g4uoavde
  Resolved https://github.com/amazon-science/chronos-forecasting.git to commit 94e20ea7e510ac4d665492b8bed8836a5143f16e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting accelerate<1,>=0.32 (from chronos-forecasting==1.5.0)
  Downloading accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->chronos-forecasting==1.5.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->chronos

In [None]:
import pandas as pd
import torch
import numpy as np
from chronos import ChronosPipeline

In [None]:
# Sentinelles ILI From France (1984-2025)

# Replace this URL with your Dropbox sharing link
url_ili = 'https://www.dropbox.com/scl/fi/bn8q34o3f0jki4pp8cwkq/sentinelle_ILI_France_1984_2025.csv?rlkey=h57lvdkvvmpoifwprbl6nc4l2&st=m1r7lprx&dl=1'

# Read the CSV file directly from the URL
sentinelle_ili_france_1984_2025 = pd.read_csv(url_ili)


In [None]:
####################################### ILI ########################################
# Drop the 'Unnamed: 0' column
ili_chronos = sentinelle_ili_france_1984_2025.copy()
ili_chronos = ili_chronos.drop('Unnamed: 0', axis=1)

# Rename columns
ili_chronos = ili_chronos.rename(columns={'date': 'timestamp', 'inc': 'target'})

# Add item_id column with 'ILI' value
ili_chronos['item_id'] = 'ILI'

#Preview
ili_chronos.head()

Unnamed: 0,timestamp,target,item_id
0,1984-10-29,68422.0,ILI
1,1984-11-05,135223.0,ILI
2,1984-11-12,87330.0,ILI
3,1984-11-19,72029.0,ILI
4,1984-11-26,78620.0,ILI


In [None]:
# Input.csv

# Replace this URL with your Dropbox sharing link
url_input_csv = 'https://www.dropbox.com/scl/fi/f59qxf16fdyexxwelry17/Inputs.csv?rlkey=z1ke2dm733puv20d3crdxrfad&st=af9d89bd&dl=1'

# Read the CSV file directly from the URL
input_csv = pd.read_csv(url_input_csv, sep = ';')

# Display the first few rows to verify the import
print(input_csv.head())

  Data         Scenario dstart_season_test dstart_test   dend_test
0  ILI  3_seasons_01_10         2016-07-18  2016-10-03  2019-09-02
1  ILI  3_seasons_01_11         2016-07-18  2016-11-07  2019-09-02
2  ILI  3_seasons_01_12         2016-07-18  2016-12-05  2019-09-02
3  ILI  3_seasons_15_12         2016-07-18  2016-12-19  2019-09-02
4  ILI  4_weeks_M4_2016         2016-07-18  2016-12-19  2017-01-09


In [None]:
def generate_chronos_forecasts(ili_chronos, input_csv):
    """
    Generate forecasts for multiple scenarios using Chronos pipeline
    Parameters:
    ili_chronos_df (pd.DataFrame): Preprocessed ILI data
    input_df (pd.DataFrame): DataFrame containing scenarios
    Returns:
    pd.DataFrame: Combined forecast DataFrame for all scenarios
    """
    # Initialize Chronos pipeline
    pipeline = ChronosPipeline.from_pretrained(
        "amazon/chronos-t5-small",
        device_map=torch.device("mps" if torch.backends.mps.is_available() else "cpu"),
        torch_dtype=torch.bfloat16,
    )

    # Filter for ILI data only
    input_df = input_csv[input_csv['Data'] == 'ILI']
    forecast_dfs = []

    for _, row in input_df.iterrows():
        scenario = row['Scenario']
        split_date = row['dstart_test']
        max_date = row['dend_test']
        print(f"Processing scenario: {scenario}")

        # Data
        ili_chronos = ili_chronos.copy()

        # Create copy of data
        ili_data = ili_chronos.copy()

        # Create train dataset
        train_data = ili_data[ili_data['timestamp'] <= split_date]

        # Create test dataset
        test_data = ili_data[(ili_data['timestamp'] > split_date) &
                            (ili_data['timestamp'] <= max_date)]

        # Convert to tensor and generate forecast
        context = torch.tensor(train_data['target'].values)
        prediction_length = len(test_data)
        if prediction_length > 48:
            prediction_length = 48
            test_data = test_data.iloc[:48]

        forecast = pipeline.predict(context, prediction_length)

        # Get forecast quantiles
        low, median, high = np.quantile(forecast[0].numpy(), [0.1, 0.5, 0.9], axis=0)

        # Create forecast DataFrame
        forecast_df = pd.DataFrame({
            'Data': 'ILI',
            'Scenario': scenario,
            'date': pd.to_datetime(test_data['timestamp']),
            'inc': test_data['target'],
            'y': median,
            'y_lo': low,
            'y_up': high
        })

        forecast_dfs.append(forecast_df)
        print(f"Completed scenario: {scenario}")

    # Combine all forecast DataFrames
    combined_forecasts = pd.concat(forecast_dfs, ignore_index=True)
    return combined_forecasts

In [None]:
forecasts = generate_chronos_forecasts(ili_chronos, input_csv)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/185M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Processing scenario: 3_seasons_01_10
Completed scenario: 3_seasons_01_10
Processing scenario: 3_seasons_01_11
Completed scenario: 3_seasons_01_11
Processing scenario: 3_seasons_01_12
Completed scenario: 3_seasons_01_12
Processing scenario: 3_seasons_15_12
Completed scenario: 3_seasons_15_12
Processing scenario: 4_weeks_M4_2016
Completed scenario: 4_weeks_M4_2016
Processing scenario: 4_weeks_M3_2016
Completed scenario: 4_weeks_M3_2016
Processing scenario: 4_weeks_M2_2016
Completed scenario: 4_weeks_M2_2016
Processing scenario: 4_weeks_M1_2016
Completed scenario: 4_weeks_M1_2016
Processing scenario: 4_weeks_0_2016
Completed scenario: 4_weeks_0_2016
Processing scenario: 4_weeks_P1_2016
Completed scenario: 4_weeks_P1_2016
Processing scenario: 4_weeks_P2_2016
Completed scenario: 4_weeks_P2_2016
Processing scenario: 4_weeks_P3_2016
Completed scenario: 4_weeks_P3_2016
Processing scenario: 4_weeks_P4_2016
Completed scenario: 4_weeks_P4_2016
Processing scenario: 4_weeks_M4_2017
Completed scenar

In [None]:
forecasts

Unnamed: 0,Data,Scenario,date,inc,y,y_lo,y_up
0,ILI,3_seasons_01_10,2016-10-10,13909.0,14156.894531,9583.128857,18817.781250
1,ILI,3_seasons_01_10,2016-10-17,12920.0,13939.095703,9844.486914,18774.218945
2,ILI,3_seasons_01_10,2016-10-24,9378.0,15028.088379,11238.396289,21474.920313
3,ILI,3_seasons_01_10,2016-10-31,15145.0,17206.072266,12588.746875,21082.883203
4,ILI,3_seasons_01_10,2016-11-07,15957.0,17423.871094,12937.224316,20952.203320
...,...,...,...,...,...,...,...
1162,ILI,pic_M1_2018,2019-08-05,2337.0,2376.305908,1584.204102,4039.719214
1163,ILI,pic_M1_2018,2019-08-12,1592.0,2574.331299,1544.599023,4396.165137
1164,ILI,pic_M1_2018,2019-08-19,1593.0,3168.407715,1980.255005,5663.528125
1165,ILI,pic_M1_2018,2019-08-26,1672.0,3564.458252,1940.649915,7564.571875


In [None]:
# prompt: filter forecasts to create a new dataframe which include all the rows containing Scenario after 4_weeks_P4_2018 until end and call it forecasts_pic. Create forecast_ILI which is all entries 'Scenario' <= '4_weeks_P4_2018'

# Create forecasts_pic
forecasts_pic = forecasts[
    (forecasts['Scenario'] > '4_weeks_P4_2018')
]

# Create forecast_ILI
forecasts_ILI = forecasts[
    (forecasts['Scenario'] <= '4_weeks_P4_2018')
]




In [None]:
forecasts_pic

Unnamed: 0,Data,Scenario,date,inc,y,y_lo,y_up
309,ILI,pic_01_10_2016,2016-10-10,13909.0,15681.483398,10628.561816,16552.675781
310,ILI,pic_01_10_2016,2016-10-17,12920.0,15681.483398,7797.182080,21387.800000
311,ILI,pic_01_10_2016,2016-10-24,9378.0,17206.072266,10018.725586,21257.121875
312,ILI,pic_01_10_2016,2016-10-31,15145.0,15899.281738,10236.523926,23696.463281
313,ILI,pic_01_10_2016,2016-11-07,15957.0,14810.290039,11717.553516,21649.158008
...,...,...,...,...,...,...,...
1162,ILI,pic_M1_2018,2019-08-05,2337.0,2376.305908,1584.204102,4039.719214
1163,ILI,pic_M1_2018,2019-08-12,1592.0,2574.331299,1544.599023,4396.165137
1164,ILI,pic_M1_2018,2019-08-19,1593.0,3168.407715,1980.255005,5663.528125
1165,ILI,pic_M1_2018,2019-08-26,1672.0,3564.458252,1940.649915,7564.571875


In [None]:
forecasts_ILI

Unnamed: 0,Data,Scenario,date,inc,y,y_lo,y_up
0,ILI,3_seasons_01_10,2016-10-10,13909.0,14156.894531,9583.128857,18817.781250
1,ILI,3_seasons_01_10,2016-10-17,12920.0,13939.095703,9844.486914,18774.218945
2,ILI,3_seasons_01_10,2016-10-24,9378.0,15028.088379,11238.396289,21474.920313
3,ILI,3_seasons_01_10,2016-10-31,15145.0,17206.072266,12588.746875,21082.883203
4,ILI,3_seasons_01_10,2016-11-07,15957.0,17423.871094,12937.224316,20952.203320
...,...,...,...,...,...,...,...
304,ILI,4_weeks_P4_2018,2019-04-29,3166.0,3889.623779,2415.661572,8639.058594
305,ILI,4_weeks_P4_2018,2019-05-06,1385.0,3480.189819,1228.302856,7779.247559
306,ILI,4_weeks_P4_2018,2019-05-13,3221.0,2456.604980,1187.359454,5773.020557
307,ILI,4_weeks_P4_2018,2019-05-20,2891.0,2866.039062,1228.302856,5363.586475


In [None]:
# prompt: save forecasts_ILI as csv

forecasts_ILI.to_csv('forecast_ILI.csv', index=False)


In [None]:
# prompt: Look at the Scenario column, for each Scenario, find the max y value and extract the rows into a new dataframe titled pic_ili. Also give me the count of this dataframe. Remove inc column

# Find the max 'y' value for each scenario
pic_ili = forecasts_pic.loc[forecasts_pic.groupby('Scenario')['y'].idxmax()]

# Remove the 'inc' column
pic_ili = pic_ili.drop('inc', axis=1)

# Display the count of the dataframe
print("Count of pic_ili dataframe:", len(pic_ili))

# Display the dataframe
pic_ili


Count of pic_ili dataframe: 24


Unnamed: 0,Data,Scenario,date,y,y_lo,y_up
328,ILI,pic_01_10_2016,2017-02-20,262447.046875,220499.070312,332098.95625
449,ILI,pic_01_10_2017,2018-02-05,247720.328125,205327.385937,262836.19375
584,ILI,pic_01_10_2018,2019-02-11,241041.601562,143987.284375,264338.053125
361,ILI,pic_01_11_2016,2017-02-27,246017.59375,211484.254688,289984.86875
483,ILI,pic_01_11_2017,2018-01-29,236046.976562,168604.9875,260606.046875
626,ILI,pic_01_11_2018,2019-02-11,231088.492188,177529.365625,273134.925
387,ILI,pic_01_12_2016,2017-02-13,346769.890625,251911.284375,404345.975
512,ILI,pic_01_12_2017,2018-01-15,266772.828125,222009.25,291477.034375
665,ILI,pic_01_12_2018,2019-02-18,228693.953125,143817.846875,272821.640625
410,ILI,pic_15_12_2016,2017-01-23,377049.65625,322556.953125,456485.33125


In [None]:
# prompt: For pic_ili, rename some columns and the order of columns should be as follows : Data, Scenario, y=inc_pic, y_lo = inc_pic_lo, y_up = inc_pic_up, date = date_pic

# Rename columns and reorder
pic_ili = pic_ili.rename(columns={
    'date': 'date_pic',
    'y': 'inc_pic',
    'y_lo': 'inc_pic_lo',
    'y_up': 'inc_pic_up'
})

pic_ili = pic_ili[['Data', 'Scenario', 'inc_pic', 'inc_pic_lo', 'inc_pic_up', 'date_pic']]

pic_ili


Unnamed: 0,Data,Scenario,inc_pic,inc_pic_lo,inc_pic_up,date_pic
328,ILI,pic_01_10_2016,262447.046875,220499.070312,332098.95625,2017-02-20
449,ILI,pic_01_10_2017,247720.328125,205327.385937,262836.19375,2018-02-05
584,ILI,pic_01_10_2018,241041.601562,143987.284375,264338.053125,2019-02-11
361,ILI,pic_01_11_2016,246017.59375,211484.254688,289984.86875,2017-02-27
483,ILI,pic_01_11_2017,236046.976562,168604.9875,260606.046875,2018-01-29
626,ILI,pic_01_11_2018,231088.492188,177529.365625,273134.925,2019-02-11
387,ILI,pic_01_12_2016,346769.890625,251911.284375,404345.975,2017-02-13
512,ILI,pic_01_12_2017,266772.828125,222009.25,291477.034375,2018-01-15
665,ILI,pic_01_12_2018,228693.953125,143817.846875,272821.640625,2019-02-18
410,ILI,pic_15_12_2016,377049.65625,322556.953125,456485.33125,2017-01-23


In [None]:
# prompt: save pic_ILI as csv

pic_ili.to_csv('Chronos_pic_ILI.csv', index=False)


In [None]:
# prompt: download pic_ili and forecasts_ILI locally

from google.colab import files
files.download('forecast_ILI.csv')
files.download('Chronos_pic_ILI.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>