In [None]:
# ======================================================================
# GPT-OSS forecasting template (no quantization, loads model once)
# ======================================================================

# --- Imports ---
import math
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# --- Global results dict + logger ---
RESULTS = {}

def log_simple_result(results_dict, dataset_name, horizon, mae, rmse, mape, r2):
    results_dict[dataset_name] = {
        "horizon": horizon,
        "MAE": round(mae, 2),
        "RMSE": round(rmse, 2),
        "MAPE (%)": round(mape, 2),
        "R²": round(r2, 4),
    }

# --- Load GPT-OSS once ---
MODEL_ID = "openai/gpt-oss-20b"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype="auto", device_map="auto")

# --- Forecasting function ---
def gptoss_prediction(
    dataset,
    model,
    tokenizer,
    horizon=24,
    frequency="M",
    dataset_name=None,
    results_dict=RESULTS,
    do_sample=False,
    seed=42,
):
    """
    dataset: callable -> (train_df, test_df) with columns ['unique_id','ds','y']
    Returns RESULTS dict and shows a plot.
    """
    # 1) Load data
    train_df, test_df = dataset(horizon=horizon)
    y_train = train_df["y"].values.astype(float)
    y_test = test_df["y"].values.astype(float)

    # 2) Build prompt from raw training values
    prompt = " ".join(map(str, y_train.tolist()))

    # 3) Generate continuation
    torch.manual_seed(seed)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    gen = model.generate(
        **inputs,
        max_new_tokens=horizon * 4,   # allow more tokens, we'll parse tail
        do_sample=do_sample,
        temperature=1.0 if do_sample else None,
        top_p=0.95 if do_sample else None,
        eos_token_id=tokenizer.eos_token_id,
    )
    decoded = tokenizer.decode(gen[0], skip_special_tokens=True)

    # 4) Parse the LAST `horizon` numbers from the output
    all_nums = [float(x) for x in re.findall(r"\d+\.?\d*", decoded)]
    needed_total = len(y_train) + horizon
    if len(all_nums) < needed_total:
        # pad if short
        all_nums += [all_nums[-1] if all_nums else 0.0] * (needed_total - len(all_nums))
    y_pred = np.array(all_nums[-horizon:], dtype=float)

    # 5) Metrics
    mae = mean_absolute_error(y_test, y_pred)
    rmse = math.sqrt(mean_squared_error(y_test, y_pred))
    with np.errstate(divide="ignore", invalid="ignore"):
        ape = np.abs((y_test - y_pred) / y_test) * 100.0
        mape = np.nanmean(ape)
        if np.isnan(mape):
            mape = float("inf")
    ss_res = np.sum((y_test - y_pred) ** 2)
    ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
    r2 = 1 - ss_res / ss_tot if ss_tot != 0 else float("nan")

    # 6) Log results
    name = dataset_name or getattr(dataset, "__name__", "unnamed_dataset")
    log_simple_result(results_dict, name, horizon, mae, rmse, mape, r2)

    import matplotlib.pyplot as plt

    # Colorblind-friendly palette
    history_color = '#1b9e77'   # teal
    observed_color = '#d95f02'  # muted orange
    forecast_color = '#7570b3'  # muted purple

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(10, 5))

    ax.plot(train_df['ds'], y_train, label="Historical Data", color=history_color, linewidth=2)
    ax.plot(test_df['ds'], y_test, label="Observed Future", color=observed_color, markersize=6, linewidth=2)
    ax.plot(test_df['ds'], y_pred, label="Model Forecast", color=forecast_color, markersize=6, linewidth=2, linestyle='--')


    ax.set_title(f"Forecasting Monthly Air Passenger Counts Using GPT-OSS (H = {horizon})",
                fontsize=14, fontweight='bold', pad=15)
    ax.set_xlabel("Date", fontsize=12)
    ax.set_ylabel("Number of Passengers", fontsize=12)

    ax.legend(fontsize=11, frameon=True, loc='upper left')
    ax.tick_params(axis='x', rotation=30)
    ax.margins(x=0.02)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    plt.tight_layout()
    plt.savefig('GPToss', dpi=300)
    plt.show()

    return results_dict

In [None]:
# --- Dataset loader (AirPassengers) ---
def load_air_passengers(horizon=24, unique_id="AP1"):
    url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
    df = pd.read_csv(url)

    df["Month"] = pd.to_datetime(df["Month"])
    df = df.rename(columns={"Month": "ds", "Passengers": "y"})
    print(df.shape)
    print("Mean: ", df["y"].mean())

    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    train_df["unique_id"] = unique_id
    test_df["unique_id"] = unique_id

    return train_df[["unique_id", "ds", "y"]], test_df[["unique_id", "ds", "y"]]

# --- Example run ---
gptoss_prediction(load_air_passengers, model, tokenizer, horizon=24, frequency="M")
print(RESULTS)

In [None]:
def load_air_sunsopts(horizon=24, unique_id="AP1"):

    url = "https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Sunspots.csv"
    df = pd.read_csv(url)
    df = df.drop(columns=['Unnamed: 0'])
    print(df.shape)

    # Standardize column names
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.rename(columns={"Date": "ds", "Monthly Mean Total Sunspot Number": "y"})
    print('Mean: ', df['y'].mean())

    # Split train/test
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_air_sunsopts, model, tokenizer, horizon=120, frequency="M")

In [None]:
def load_temp(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/temp.csv")

    # Remove unnamed columns
    if 'Unnamed: 2' in df.columns:
        df = df.drop(columns=['Unnamed: 2'])

    # Parse date safely
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "temp": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_temp, model, tokenizer, horizon=240, frequency="D")

In [None]:
def load_temperature(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Temperature'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Temperature:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_temperature, model, tokenizer, horizon=24, frequency="M")

In [None]:
def load_precipitation(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Precipitation'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Precipitation:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_precipitation, model, tokenizer, horizon=24, frequency="M")

In [None]:
def load_humidity(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Specific Humidity'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Specific Humidity:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_humidity, model, tokenizer, horizon=24, frequency="M")

In [None]:
def load_relative_humidity(horizon=24, unique_id="AP1"):
    # Load the dataset
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/Rainfall_data.csv")

    # Create timestamp from Year, Month, Day
    df['ds'] = pd.to_datetime(df[['Year', 'Month', 'Day']])

    # Use Temperature column as 'y'
    df['y'] = pd.to_numeric(df['Relative Humidity'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean Relative Humidity:', df['y'].mean())

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_relative_humidity, model, tokenizer, horizon=24, frequency="M")

In [None]:
def load_birth(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-total-female-births.csv")

    # Remove unnamed columns
    if 'Unnamed: 2' in df.columns:
        df = df.drop(columns=['Unnamed: 2'])

    # Parse date safely
    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "Births": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_birth, model, tokenizer, horizon=24, frequency="M")

In [None]:
def load_store(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/store.csv")
    df = df[(df['store'] == 0) & (df['product'] == 0)]
    df = df.drop(columns=['store', 'product'])

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "number_sold": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_store, model, tokenizer, horizon=24, frequency="M")

In [None]:
def load_hospitality(horizon=24, unique_id="AP1"):
    df = pd.read_csv("https://raw.githubusercontent.com/ahamed14051/time-series-dataset/refs/heads/main/HospitalityEmployees.csv")

    df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

    # Drop rows with invalid dates (like the description text)
    df = df.dropna(subset=['Date'])

    # Rename columns
    df = df.rename(columns={"Date": "ds", "Employees": "y"})

    # Convert 'y' to numeric, coercing errors to NaN and dropping them
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df = df.dropna(subset=['y'])

    print('Mean:', df['y'].mean())
    print(df.shape)

    # Train/test split
    train_df = df.iloc[:-horizon].copy()
    test_df = df.iloc[-horizon:].copy()

    # Add unique_id
    train_df['unique_id'] = unique_id
    test_df['unique_id'] = unique_id

    return train_df[['unique_id', 'ds', 'y']], test_df[['unique_id', 'ds', 'y']]

gptoss_prediction(load_hospitality, model, tokenizer, horizon=24, frequency="M")

In [None]:
GPT_results = {

'load_air_passengers': {'horizon': 24,
  'MAE': 55.58,
  'RMSE': 60.91,
  'MAPE (%)': 12.0,
  'R²': 0.3346},

 'load_air_sunsopts': {'horizon': 120,
  'MAE': 39.16,
  'RMSE': 47.6,
  'MAPE (%)': 708.45,
  'R²': -0.277},

  'load_temp': {'horizon': 240,
  'MAE': "",
  'RMSE': "",
  'MAPE (%)': "",
  'R²': ""},

  'load_temperature': {'horizon': 24,
  'MAE': 3.73,
  'RMSE': 4.52,
  'MAPE (%)': 28.77,
  'R²': 0.5822},

 'load_precipitation': {'horizon': 24,
  'MAE': 266.94,
  'RMSE': 463.79,
  'MAPE (%)': 100.0,
  'R²': -0.4954},

 'load_humidity': {'horizon': 24,
  'MAE': 2.55,
  'RMSE': 2.97,
  'MAPE (%)': 18.7,
  'R²': 0.5559},

 'load_relative_humidity': {'horizon': 24,
  'MAE': 28.43,
  'RMSE': 33.3,
  'MAPE (%)': 42.09,
  'R²': -2.3648},

 'load_birth': {'horizon': 24,
  'MAE': 5.75,
  'RMSE': 6.46,
  'MAPE (%)': 13.76,
  'R²': -0.0442},

  'load_store': {'horizon': 24,
  'MAE': 15.21,
  'RMSE': 17.78,
  'MAPE (%)': 1.82,
  'R²': -1.1633},

 'load_hospitality': {'horizon': 24,
  'MAE': 19.56,
  'RMSE': 24.21,
  'MAPE (%)': 0.99,
  'R²': 0.5722}

  }

df = pd.DataFrame(GPT_results).T
df.to_csv('GPT_results.csv', index=True)