# Simple Chronos Fine-tuning for INTC Data

This notebook:
1. Loads INTC 5M stock data
2. Fine-tunes a Chronos model
3. Saves the model for use in other notebooks

**Output**: Fine-tuned model saved to `../models/chronos_finetuned_INTC_5M/`

In [None]:
import gc
import torch
gc.collect()

import pandas as pd
import numpy as np
import os
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

In [143]:
# Configuration
TICKER = "INTC"
# The pandas frequency string for 5 minutes is '5T'.
TIMEFRAME_FREQ = "5T"
# The identifier for the timeframe in your file names is '5M'.
DATA_TIMEFRAME_ID = "5M"
MODEL_NAME = "amazon/chronos-bolt-base" # Using CPU-compatible model
PREDICTION_LENGTH = 20

# Paths
# Construct the path using the correct file name identifier.
data_path = f"../data/{DATA_TIMEFRAME_ID}/{TICKER}_{DATA_TIMEFRAME_ID}.csv"
model_save_dir = f"../models/chronos_finetuned_{TICKER}_{DATA_TIMEFRAME_ID}"
os.makedirs(model_save_dir, exist_ok=True)

print(f" Ticker: {TICKER}")
print(f" Timeframe (Frequency): {TIMEFRAME_FREQ}")
print(f" Timeframe (File ID): {DATA_TIMEFRAME_ID}")
print(f" Base Model: {MODEL_NAME}")
print(f" Prediction Length: {PREDICTION_LENGTH}")
print(f" Model will be saved to: {model_save_dir}")

 Ticker: INTC
 Timeframe (Frequency): 5T
 Timeframe (File ID): 5M
 Base Model: amazon/chronos-bolt-base
 Prediction Length: 20
 Model will be saved to: ../models/chronos_finetuned_INTC_5M


In [144]:
# Load and prepare data
print(f"📈 Loading data from: {data_path}")

try:
    df = pd.read_csv(data_path)
    print(f"✅ Loaded {len(df)} rows")
    
    # Check data structure
    print(f"   Columns: {df.columns.tolist()}")
    print(f"   Sample datetime values: {df['Datetime'].head().tolist()}")
    
    # Convert datetime - handle timezone issues more carefully
    try:
        # First try simple conversion
        df['Datetime'] = pd.to_datetime(df['Datetime'])
        print(f"✅ Basic datetime conversion successful")
        
        # Check if timezone aware
        if df['Datetime'].dt.tz is not None:
            print(f"   Timezone detected: {df['Datetime'].dt.tz}")
            df['Datetime'] = df['Datetime'].dt.tz_convert('UTC').dt.tz_localize(None)
            print(f"   Converted to UTC and removed timezone")
        else:
            print(f"   No timezone detected")
            
    except Exception as dt_error:
        print(f"   ⚠️ Standard conversion failed, trying UTC: {dt_error}")
        df['Datetime'] = pd.to_datetime(df['Datetime'], utc=True)
        df['Datetime'] = df['Datetime'].dt.tz_localize(None)
        print(f"   UTC conversion successful")
    
    # Sort by datetime and remove duplicates
    df = df.sort_values('Datetime').drop_duplicates(subset=['Datetime']).reset_index(drop=True)
    print(f"✅ After sorting and deduplication: {len(df)} rows")
    
    # Check for data quality
    print(f"✅ Date range: {df['Datetime'].min()} to {df['Datetime'].max()}")
    print(f"   Close price range: ${df['Close'].min():.2f} to ${df['Close'].max():.2f}")
    
    # Check for missing values
    missing_close = df['Close'].isna().sum()
    if missing_close > 0:
        print(f"   ⚠️ Warning: {missing_close} missing Close values - will be dropped")
        df = df.dropna(subset=['Close'])
        print(f"   After dropping missing Close: {len(df)} rows")
    
    # Check time intervals (sample first 10)
    if len(df) > 1:
        time_diffs = df['Datetime'].diff().dt.total_seconds() / 60  # Convert to minutes
        print(f"   Sample time intervals (minutes): {time_diffs.dropna().head(5).tolist()}")
    
except Exception as e:
    print(f"❌ Error loading data: {e}")
    raise

📈 Loading data from: ../data/5M/INTC_5M.csv
✅ Loaded 11089 rows
   Columns: ['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits']
   Sample datetime values: ['2025-01-23 04:00:00-05:00', '2025-01-23 04:05:00-05:00', '2025-01-23 04:10:00-05:00', '2025-01-23 04:15:00-05:00', '2025-01-23 04:20:00-05:00']
✅ Basic datetime conversion successful
   ⚠️ Standard conversion failed, trying UTC: Can only use .dt accessor with datetimelike values
   UTC conversion successful
✅ After sorting and deduplication: 11089 rows
✅ Date range: 2025-01-23 09:00:00 to 2025-04-17 23:55:00
   Close price range: $17.67 to $27.75
   Sample time intervals (minutes): [5.0, 5.0, 5.0, 5.0, 5.0]


In [145]:

try:
    # Create the required format
    ts_df = pd.DataFrame({
        'item_id': TICKER,
        'timestamp': df['Datetime'],
        'target': df['Close']
    })
    
    # Convert to TimeSeriesDataFrame
    tsd = TimeSeriesDataFrame.from_data_frame(
        ts_df, 
        id_column='item_id', 
        timestamp_column='timestamp'
    )
    
    print(f"Successfully created TSD with {len(tsd)} rows.")

    # Convert to a regular frequency. This is the critical step to fix the error.
    # It fills gaps (like weekends/holidays) with NaN values so the index is regular.
    print(f"   Converting to regular frequency '{TIMEFRAME_FREQ}'...")
    tsd = tsd.convert_frequency(freq=TIMEFRAME_FREQ)
    
    if len(tsd) < 100:
        print(f"   ❌ Warning: Very few rows ({len(tsd)}) after resampling - this may not be enough for training")
    else:
        print(f"✅ TimeSeriesDataFrame created with {len(tsd)} rows and regular frequency.")
    
except Exception as e:
    print(f"❌ Error creating TimeSeriesDataFrame: {e}")
    print(f"   DataFrame info:")
    print(f"     Shape: {ts_df.shape}")
    print(f"     Columns: {ts_df.columns.tolist()}")
    print(f"     Dtypes: {ts_df.dtypes}")
    raise

Successfully created TSD with 11089 rows.
   Converting to regular frequency '5T'...
✅ TimeSeriesDataFrame created with 24372 rows and regular frequency.
✅ TimeSeriesDataFrame created with 24372 rows and regular frequency.


In [146]:
# Split data for training (use 90% for training)
split_idx = int(len(tsd) * 0.9)
train_data = tsd.iloc[:split_idx]
val_data = tsd.iloc[split_idx:]

print(f" Data split:")
print(f"   Training: {len(train_data)} rows")
print(f"   Validation: {len(val_data)} rows")
print(f"   Total: {len(tsd)} rows")

 Data split:
   Training: 21934 rows
   Validation: 2438 rows
   Total: 24372 rows


In [147]:
# Fine-tune the Chronos model
print(f"   Base model: {MODEL_NAME}")
print(f"   Training data: {len(train_data)} rows")
print(f"   roughly 5-10 minutes...")

try:
    predictor = TimeSeriesPredictor(
        target='target',
        prediction_length=PREDICTION_LENGTH,
        path=model_save_dir,
        eval_metric='MASE',
        freq=TIMEFRAME_FREQ,
        verbosity=2
    )

    # Fine-tune the model
    predictor.fit(
        train_data,
        hyperparameters={'Chronos': {"model_path": MODEL_NAME}},
        presets='medium_quality',
        time_limit=600  # 10 min
    )

    print("✅ Fine-tuning completed successfully!")

except Exception as e:
    # print(f"Fine-tuning failed: {e}")
    # print("   Troubleshooting info:")
    # print(f"     Train data shape: {train_data.shape}")
    # print(f"     Train data index levels: {train_data.index.names}")
    # print(f"     Sample train data:")
    # if len(train_data) > 0:
    #     print(train_data.head())

    raise

Frequency '5T' stored as '5min'
Beginning AutoGluon training... Time limit = 600s
AutoGluon will save models to 'c:\Users\micha\code\finance\diploma\models\chronos_finetuned_INTC_5M'
Frequency '5T' stored as '5min'
Beginning AutoGluon training... Time limit = 600s
AutoGluon will save models to 'c:\Users\micha\code\finance\diploma\models\chronos_finetuned_INTC_5M'
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          32
GPU Count:          0
Memory Avail:       36.85 GB / 63.10 GB (58.4%)
Disk Space Avail:   128.80 GB / 465.02 GB (27.7%)
Setting presets to: medium_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MASE,
 'freq': '5min',
 'hyperparameters': {'Chronos': {'model_path': 'amazon/chronos-bolt-base'}},
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 20,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'r

   Base model: amazon/chronos-bolt-base
   Training data: 21934 rows
   roughly 5-10 minutes...


	-0.1141       = Validation score (-MASE)
	0.03    s     = Training runtime
	12.82   s     = Validation (prediction) runtime
Not fitting ensemble as only 1 model was trained.
Training complete. Models trained: ['Chronos[amazon__chronos-bolt-base]']
Total runtime: 12.86 s
Best model: Chronos[amazon__chronos-bolt-base]
Best model score: -0.1141
	0.03    s     = Training runtime
	12.82   s     = Validation (prediction) runtime
Not fitting ensemble as only 1 model was trained.
Training complete. Models trained: ['Chronos[amazon__chronos-bolt-base]']
Total runtime: 12.86 s
Best model: Chronos[amazon__chronos-bolt-base]
Best model score: -0.1141


✅ Fine-tuning completed successfully!


In [None]:
# # Save model information
# model_info = {
#     'ticker': TICKER,
#     'timeframe': TIMEFRAME_FREQ,
#     'base_model': MODEL_NAME,
#     'prediction_length': PREDICTION_LENGTH,
#     'training_rows': len(train_data),
#     'model_path': model_save_dir
# }

# # Save as JSON for easy loading
# import json
# info_path = os.path.join(model_save_dir, 'model_info.json')
# with open(info_path, 'w') as f:
#     json.dump(model_info, f, indent=2)

# print("💾 Model saved successfully!")
# print(f"📁 Location: {model_save_dir}")
# print(f"📄 Info file: {info_path}")
# print("🎯 To load this model in another notebook:")
# print(f"   from autogluon.timeseries import TimeSeriesPredictor")
# print(f"   predictor = TimeSeriesPredictor.load('{model_save_dir}')")

💾 Model saved successfully!
📁 Location: ../models/chronos_finetuned_INTC_5M
📄 Info file: ../models/chronos_finetuned_INTC_5M\model_info.json
🎯 To load this model in another notebook:
   from autogluon.timeseries import TimeSeriesPredictor
   predictor = TimeSeriesPredictor.load('../models/chronos_finetuned_INTC_5M')


In [150]:
# Clean up memory
try:
    if 'predictor' in locals():
        del predictor
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    print("🧹 Memory cleaned")
except:
    print("⚠️ Memory cleanup had issues, but continuing...")

print("✅ Fine-tuning notebook completed!")
print(f"🎯 Fine-tuned model ready at: {model_save_dir}")

🧹 Memory cleaned
✅ Fine-tuning notebook completed!
🎯 Fine-tuned model ready at: ../models/chronos_finetuned_INTC_5M
