# TimesFM Model: 5-Minute and 15-Minute Data Predictions

This notebook uses the TimesFM model to generate predictions for stock datasets with 5-minute and 15-minute intervals.

In [40]:
import timesfm
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
import warnings
import numpy as np
import os
# Import configuration and reusable functions
from config_times_fm import TimesFmConfig

In [41]:
# Initialize TimesFm model using configuration
tfm = timesfm.TimesFm(
    hparams=timesfm.TimesFmHparams(
        backend=TimesFmConfig.BACKEND,
        per_core_batch_size=TimesFmConfig.PER_CORE_BATCH_SIZE,
        horizon_len=TimesFmConfig.HORIZON_LEN,
        num_layers=TimesFmConfig.NUM_LAYERS,
        model_dims=TimesFmConfig.MODEL_DIMS,
        context_len=TimesFmConfig.CONTEXT_LEN
    ),
    checkpoint=timesfm.TimesFmCheckpoint(huggingface_repo_id=TimesFmConfig.CHECKPOINT_REPO)
)

Fetching 5 files: 100%|██████████| 5/5 [00:00<?, ?it/s]



In [42]:
# Define reusable plotting function
def plot_forecast(actual_data, forecast_data, title, save_path=None, xlabel='Date', ylabel='Price ($)'):
    plt.figure(figsize=(18, 6))
    plt.plot(actual_data['ds'], actual_data['actual'], color='green', label='Actual')
    plt.plot(forecast_data['ds'], forecast_data['forecast'], color='red', linestyle='--', label='Predicted')
    plt.title(title, fontsize=14)
    plt.xlabel(xlabel, fontsize=10)
    plt.ylabel(ylabel, fontsize=10)
    plt.xticks(rotation=45)
    plt.legend(frameon=True, shadow=True)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.tight_layout()
    
    # Save the plot if a path is provided
    if save_path:
        plt.savefig(save_path, dpi=300)
        
    plt.show()

In [43]:
# Function to process a single dataset
def process_dataset(file_path, time_interval):
    # Extract ticker symbol from file name
    ticker = os.path.basename(file_path).split('_')[0]
    print(f'Processing {ticker} dataset with {time_interval} interval...')
    
    # Load the dataset
    df = pd.read_csv(file_path)
    df['Datetime'] = pd.to_datetime(df['Datetime'])
    
    # Format dataframe for TimesFM
    input_df = pd.DataFrame({
        'unique_id': [1] * len(df),
        'ds': df['Datetime'].values.astype('datetime64[ns]'), 
        'y': df['Close']
    })
    
    # Config
    context_window = 2048
    forecast_horizon = 128
    max_start = len(input_df) - context_window - forecast_horizon
    
    if max_start < 0:
        print(f'Warning: {ticker} dataset too small for forecasting with current window sizes')
        return
    
    # Create results directory for this interval and ticker if it doesn't exist
    interval_folder = f'times_{time_interval}_{ticker}'
    ticker_results_dir = os.path.join('results', interval_folder)
    os.makedirs(ticker_results_dir, exist_ok=True)
    
    # Define the starting points for each backtesting window
    backtest_starts = list(range(0, max_start + 1, forecast_horizon))
    
    # Loop through each backtesting window
    for idx, start_idx in enumerate(backtest_starts):
        print(f'Processing window {idx+1}/{len(backtest_starts)}...')
        
        context_end = start_idx + context_window
        context_data = input_df.iloc[start_idx:context_end]
        
        # Determine frequency for TimesFM based on time interval
        if time_interval == '5M':
            freq = '5min'
        elif time_interval == '15M':
            freq = '15min'
        else:
            freq = 'h'  # Default to hourly
        
        forecast_df = tfm.forecast_on_df(
            context_data,
            freq=freq,
            value_name='y',
            num_jobs=-1
        )[:forecast_horizon]
        
        # Align the forecast with the actual data
        actual_start = context_end
        actual_end = actual_start + forecast_horizon
        actual_data = input_df.iloc[actual_start:actual_end].reset_index(drop=True)
        forecast_df = forecast_df.reset_index(drop=True)
        
        # Only keep rows where both actual and forecast exist
        min_len = min(len(actual_data), len(forecast_df))
        actual_data = actual_data.iloc[:min_len]
        forecast_df = forecast_df.iloc[:min_len]
        
        mae = mean_absolute_error(actual_data['y'], forecast_df['timesfm'])
        mse = mean_squared_error(actual_data['y'], forecast_df['timesfm'])
        rmse = np.sqrt(mse)
        
        # Plot the forecast for this window
        # Align forecast and actual by timestamp for plotting (in case of missing/non-trading periods)
        merged_plot = pd.merge(
            actual_data[['ds', 'y']],
            forecast_df[['ds', 'timesfm']],
            on='ds',
            how='inner'
        )
        title = f'{ticker} ({time_interval}) - Window {idx+1} Forecast (MAE: {mae:.4f}, RMSE: {rmse:.4f})'
        plot_path = os.path.join(ticker_results_dir, f'{ticker}_window_{idx+1}.png')
        plot_forecast(
            merged_plot.rename(columns={'y': 'actual', 'timesfm': 'forecast'}),
            merged_plot.rename(columns={'y': 'actual', 'timesfm': 'forecast'}),
            title,
            save_path=plot_path
        )
        
        # Save the results to CSV (aligned by date, actual, forecast)
        results_df = pd.DataFrame({
            'date': actual_data['ds'],
            'actual': actual_data['y'],
            'forecast': forecast_df['timesfm']
        })
        csv_path = os.path.join(ticker_results_dir, f'{ticker}_window_{idx+1}.csv')
        # Overwrite the CSV file if it exists
        results_df.to_csv(csv_path, index=False)
        
    print(f'Finished processing {ticker} dataset.')

## Process 5-Minute Data

In [44]:
# Define 5M data directory
data_dir_5m = os.path.join(os.getcwd(), "data", "5M")

# Get list of all CSV files in the 5M directory
csv_files_5m = [os.path.join(data_dir_5m, f) for f in os.listdir(data_dir_5m) if f.endswith('.csv')]

if not csv_files_5m:
    print(f"No CSV files found in {data_dir_5m}")
else:
    print(f"Found {len(csv_files_5m)} CSV files in 5M directory:")
    for file in csv_files_5m:
        print(f"- {os.path.basename(file)}")

Found 11 CSV files in 5M directory:
- INTC_5M.csv
- IONQ_5M.csv
- MSTR_5M.csv
- MU_5M.csv
- NVDA_5M.csv
- QBTS_5M.csv
- RGTI_5M.csv
- SMCI_5M.csv
- SRPT_5M.csv
- TSLA_5M.csv
- VKTX_5M.csv


## Process 15-Minute Data

In [46]:
# Define 15M data directory
data_dir_15m = os.path.join(os.getcwd(), "data", "15M")

# Get list of all CSV files in the 15M directory
csv_files_15m = [os.path.join(data_dir_15m, f) for f in os.listdir(data_dir_15m) if f.endswith('.csv')]

if not csv_files_15m:
    print(f"No CSV files found in {data_dir_15m}")
else:
    print(f"Found {len(csv_files_15m)} CSV files in 15M directory:")
    for file in csv_files_15m:
        print(f"- {os.path.basename(file)}")

Found 11 CSV files in 15M directory:
- INTC_15M.csv
- IONQ_15M.csv
- MSTR_15M.csv
- MU_15M.csv
- NVDA_15M.csv
- QBTS_15M.csv
- RGTI_15M.csv
- SMCI_15M.csv
- SRPT_15M.csv
- TSLA_15M.csv
- VKTX_15M.csv
