<a href="https://colab.research.google.com/github/madarasw/ML_research_experiments/blob/main/HCPI_USA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install autogluon.timeseries

Collecting autogluon.timeseries
  Downloading autogluon.timeseries-1.4.0-py3-none-any.whl.metadata (12 kB)
Collecting torch<2.8,>=2.2 (from autogluon.timeseries)
  Downloading torch-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting lightning<2.8,>=2.2 (from autogluon.timeseries)
  Downloading lightning-2.5.5-py3-none-any.whl.metadata (39 kB)
Collecting pytorch-lightning (from autogluon.timeseries)
  Downloading pytorch_lightning-2.5.5-py3-none-any.whl.metadata (20 kB)
Collecting transformers<4.50,>=4.38.0 (from transformers[sentencepiece]<4.50,>=4.38.0->autogluon.timeseries)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting gluonts<0.17,>=0.15.0 (from autogluon.timeseries)
  Downloading gluonts-0.16.2-py3-none-any.whl.metadata (9.8 kB)
Collecting statsforecast<2.0.2,>=1.7.0 (from autogluon.timeseries)
  Downloading st

In [11]:
import pandas as pd
from datetime import datetime
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

def forecast_next_12_months(df_values: list):
    """
    df_values: list of float, representing monthly CPI / inflation values,
               in chronological order, starting Jan 1970.
    Returns: comma-separated 12 mean forecasts (floats) for next 12 months.
    """
    # sanity check
    assert isinstance(df_values, list)
    n = len(df_values)
    if n < 12:
        raise ValueError("Need at least 12 months of history")

    # 1. build a pandas DataFrame with timestamp and value
    # create a monthly date range starting Jan 1970 for n periods
    start = pd.Timestamp("1970-01-01")
    # we assume the data is at month-start frequency
    dates = pd.date_range(start=start, periods=n, freq="MS")
    df = pd.DataFrame({
        "timestamp": dates,
        "value": df_values,
        "item_id": "USA_inflation"
    })

    # 2. convert to TimeSeriesDataFrame
    ts = TimeSeriesDataFrame.from_data_frame(
        df,
        id_column="item_id",
        timestamp_column="timestamp"
    )

    # 3. set up predictor
    pred_len = 12
    predictor = TimeSeriesPredictor(
        prediction_length=pred_len,
        target="value",
        # you can explicitly specify freq = "M" or "MS" if needed
        freq="MS",
        path="autogluon_us_inflation_model",
        eval_metric="MASE",
        verbosity=2
    )

    # 4. fit model
    predictor.fit(
        train_data=ts,
        presets="medium_quality",  # or "fast_training", "high_accuracy"
        time_limit=600  # seconds; adjust higher for more tuning
    )

    # 5. get forecast
    forecast = predictor.predict(ts)  # this returns a TimeSeriesDataFrame with quantiles & mean
    # forecast has columns like "mean", "0.1", "0.9", etc.

    # the “mean” column is the expected value
    # reset index to access easily
    fc = forecast.reset_index()
    # the forecast rows will appear in chronological order for the future 12 months
    mean_vals = fc["mean"].tolist()

    # format output
    out_str = ",".join(f"{v:.6f}" for v in mean_vals)
    return out_str





Beginning AutoGluon training... Time limit = 600s
AutoGluon will save models to '/content/autogluon_us_inflation_model'
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
GPU Count:          0
Memory Avail:       11.36 GB / 12.67 GB (89.7%)
Disk Space Avail:   62.19 GB / 107.72 GB (57.7%)
Setting presets to: medium_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MASE,
 'freq': 'MS',
 'hyperparameters': 'light',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 12,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'value',
 'time_limit': 600,
 'verbosity': 2}

Provided train_data has 21 rows, 1 time series. Median time series length is 21 (min=21, max=21). 
	Removing 1 shor

ValueError: At least some time series in train_data must have >= 25 observations. Please provide longer time series as train_data or reduce prediction_length, num_val_windows, or val_step_size.

In [12]:
if __name__ == "__main__":
    # Example usage:
    data = "8.1	8.2	8.2	8.3	8.4	8.4	8.5	8.4	8.5	8.6	8.6	8.7	8.8	8.9	8.9	9.1	9.2	9.3	9.3	9.3	9.3	9.4	9.4	9.5	9.5	9.6	9.6	9.7	9.8	9.8	9.9	9.9	10.0	10.1	10.2	10.2	10.3	10.3	10.4	10.6	10.7	10.7	10.8	10.8	10.9	11.1	11.2	11.3	11.5	11.7	11.8	12.2	12.4	12.5	12.6	12.6	12.8	13.0	13.3	13.5	13.8	14.0	14.3	14.9	15.5	15.8	15.9	16.0	16.2	16.4	16.6	16.8	17.0	17.2	17.3	17.7	17.9	18.0	18.0	18.2	18.5	18.8	19.1	19.3	19.8	20.0	20.2	20.7	20.9	21.1	21.2	21.3	21.4	21.5	21.6	21.7	21.8	21.9	22.1	22.4	22.5	22.7	22.8	22.9	23.0	23.1	23.3	23.5	23.8	24.0	24.2	24.7	24.8	25.3	26.4	26.6	26.8	27.1	27.4	27.6	28.2	28.6	29.0	30.0	30.3	30.6	30.8	30.9	31.1	31.3	31.5	31.7	31.9	32.2	32.7	33.6	33.8	34.0	34.2	34.4	34.6	35.0	35.3	35.5	35.7	35.8	36.1	36.8	37.1	37.2	37.2	37.2	37.2	37.3	37.5	37.5	37.5	37.7	37.7	38.3	38.4	38.5	38.7	38.9	39.1	39.2	39.3	39.5	39.4	39.6	39.7	40.2	40.4	40.5	40.5	40.8	40.9	41.2	41.3	41.3	41.4	41.7	42.1	43.0	43.2	43.3	43.2	43.4	43.3	43.4	43.5	43.6	43.7	43.9	43.9	44.4	44.4	44.4	44.3	44.4	44.6	44.7	45.1	45.2	45.4	45.6	45.7	46.2	46.3	46.3	46.2	46.4	46.5	46.7	46.9	46.9	46.9	47.0	47.2	47.8	48.0	48.2	48.2	48.5	48.7	49.0	49.3	49.3	49.5	49.7	50.0	50.5	50.9	51.0	51.0	51.1	51.5	51.9	52.1	52.2	52.4	52.9	53.1	54.4	55.0	55.2	55.2	55.7	56.2	56.7	56.9	57.0	57.0	57.2	57.5	58.7	59.2	59.5	59.3	59.9	60.1	60.4	60.5	60.7	60.5	60.8	61.1	61.8	62.2	62.2	62.0	62.1	62.3	62.5	62.5	62.6	62.4	62.7	63.0	63.4	63.6	63.6	63.4	63.7	64.0	64.0	63.8	64.0	64.0	64.2	64.4	64.9	65.1	65.1	64.7	65.1	65.2	65.1	65.2	65.5	65.5	65.7	66.0	66.4	66.7	66.8	66.6	66.9	67.2	67.1	67.1	67.5	67.4	67.7	67.9	68.3	68.6	68.7	68.3	68.7	69.0	69.1	69.2	69.4	69.2	69.3	69.4	69.7	70.0	70.1	70.0	70.3	70.5	70.6	70.6	70.7	70.4	70.6	70.8	71.2	71.5	71.4	71.2	71.4	71.7	71.7	71.8	72.0	71.8	71.9	72.3	72.6	72.8	72.7	72.4	72.6	72.8	72.8	72.9	73.1	72.6	72.8	73.0	73.3	73.5	73.6	73.3	73.3	73.8	73.8	74.0	74.0	73.5	73.7	73.9	74.4	74.9	75.0	74.5	74.8	75.0	74.9	74.9	75.0	74.8	75.0	75.2	75.6	75.8	75.8	75.6	75.8	76.0	76.1	76.1	76.3	75.9	76.1	76.4	76.8	76.8	76.7	76.6	76.8	77.0	77.1	77.1	77.3	77.0	77.2	77.3	77.6	77.9	77.9	77.7	77.9	77.9	78.1	78.3	78.6	78.3	78.5	78.8	79.1	79.4	79.4	79.5	79.7	79.9	80.0	80.0	80.3	80.0	80.2	80.4	80.9	81.3	81.5	81.5	81.8	81.9	82.0	82.2	82.6	82.1	82.4	82.8	83.1	83.3	83.5	83.1	83.4	83.5	83.8	84.1	84.5	84.1	84.6	84.9	85.6	86.1	86.6	86.6	87.1	87.5	87.3	87.3	87.1	86.6	87.2	87.3	87.5	87.9	88.1	88.0	88.3	88.3	88.4	88.6	88.9	88.8	89.0	89.4	89.9	90.1	90.2	90.0	90.4	90.4	90.6	90.9	91.7	91.8	92.3	92.6	93.3	93.5	93.5	93.5	93.9	94.5	94.5	94.7	95.0	94.7	95.2	95.4	95.9	95.9	95.6	95.7	96.1	96.4	96.8	97.0	97.3	97.0	97.5	97.8	98.0	98.2	98.0	98.0	98.4	98.7	98.8	98.8	99.2	98.7	99.1	99.3	99.6	99.6	99.8	99.6	99.9	100.0	100.1	99.9	99.9	99.2	99.5	99.6	99.9	100.1	100.1	100.0	100.3	100.2	100.3	100.3	100.4	99.9	100.1	100.4	100.6	100.8	101.0	100.9	101.2	101.5	101.6	101.8	102.2	101.8	102.4	102.7	103.2	103.5	103.5	103.5	104.0	104.3	104.4	104.7	105.0	104.5	104.9	105.1	105.5	105.9	105.9	105.9	106.5	106.6	106.7	106.9	107.1	106.4	106.8	107.0	107.6	107.9	107.9	108.0	108.3	108.4	108.3	108.5	108.5	108.3	108.6	108.6	108.6	108.6	108.8	109.2	108.8	109.2	109.2	109.1	109.4	109.3	109.4	109.7	110.4	111.0	111.4	111.4	112.1	112.4	113.4	114.1	114.7	114.6	115.4	116.5	119.0	119.7	120.5	121.2	121.8	122.3	124.3	124.8	125.3	124.8	126.0	126.8	128.3	129.1	129.4	129.0	129.4	130.1	130.2	130.0	130.5	130.0	130.8	131.6	132.2	132.7	133.0	132.9	133.4	133.5	134.3	134.6	135.1	135.1	135.6	136.1"
    df_vals = list(map(float, data.split()))
    result = forecast_next_12_months(df_vals)
    print(result)

Beginning AutoGluon training... Time limit = 600s
AutoGluon will save models to '/content/autogluon_us_inflation_model'
AutoGluon Version:  1.4.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
GPU Count:          0
Memory Avail:       11.07 GB / 12.67 GB (87.4%)
Disk Space Avail:   62.19 GB / 107.72 GB (57.7%)
Setting presets to: medium_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MASE,
 'freq': 'MS',
 'hyperparameters': 'light',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 12,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'value',
 'time_limit': 600,
 'verbosity': 2}

Provided train_data has 663 rows, 1 time series. Median time series length is 663 (min=663, max=663). 

Provided da

137.032483,137.609527,137.701040,137.338963,137.645981,137.895005,138.525073,138.671172,139.019166,138.712714,139.058755,139.579509
