In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt

import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer, Baseline
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer import TFTTrainer
from pytorch_forecasting.data import NaNLabelEncoder

# Step 1: Load dataset
df = pd.read_csv("D:\infosysspringboard projects\project1-1stmilestine\AZURE_BACKEND_TEAM-B\data\processed\cleaned_merged.csv", parse_dates=["date"])

# Step 2: Sort and create time_idx
df = df.sort_values(by=["region", "resource_type", "date"]).reset_index(drop=True)
df["time_idx"] = df.groupby(["region", "resource_type"]).cumcount()

# Step 3: Define necessary columns
max_prediction_length = 7    # 7 days to predict
max_encoder_length = 30      # use past 30 days
target = "usage_cpu"

# Step 4: Rename categorical columns as needed, encode if necessary
# Convert categorical columns properly
df["region"] = df["region"].astype("category")
df["resource_type"] = df["resource_type"].astype("category")

# Convert holiday explicitly to string category to avoid numeric type error
df["holiday"] = df["holiday"].astype(str).astype("category")


# Step 5: Define training dataset for TFT
training_cutoff = df["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    df[df.time_idx <= training_cutoff],
    time_idx="time_idx",
    target=target,
    group_ids=["region", "resource_type"],
    min_encoder_length=max_encoder_length,  # keep encoder length consistent
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["region", "resource_type"],
    time_varying_known_categoricals=["holiday"],
    time_varying_known_reals=["time_idx", "economic_index", "cloud_market_demand", "users_active"],
    time_varying_unknown_reals=[target],
    target_normalizer=GroupNormalizer(
        groups=["region", "resource_type"], transformation="softplus"
    ),  # helps stabilize training
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

# Step 6: Create validation dataset
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)

# Step 7: Create dataloaders
batch_size = 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

# Step 8: Define the model
trainer = torch.optim.Adam
early_stop_callback = None  # Can add early stopping for more epochs if needed

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # 7 quantiles by default for quantile regression
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Step 9: Train the model
from pytorch_lightning import Trainer

pl_trainer = Trainer(
    max_epochs=30,
    gpus=1 if torch.cuda.is_available() else 0,
    gradient_clip_val=0.1,
)

pl_trainer.fit(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=val_dataloader,
)

# Step 10: Save the model checkpoint
save_path = "tft_cpu_usage_model.ckpt"
pl_trainer.save_checkpoint(save_path)

# Step 11: Load the trained model for inference
best_tft = TemporalFusionTransformer.load_from_checkpoint(save_path, dataset=training)

# Step 12: Make predictions on validation data
raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)

# Step 13: Visualize actual vs predicted for a sample series
def plot_prediction(x, raw_predictions, idx=0):
    import matplotlib.pyplot as plt
    actual = x["decoder_target"][idx].detach().cpu().numpy()
    prediction = raw_predictions['prediction'][idx].detach().cpu().numpy()

    encoder_length = x['encoder_lengths'][idx].item()
    predict_length = prediction.shape[0]

    plt.figure(figsize=(10, 6))
    plt.plot(range(encoder_length), x['encoder_target'][idx].detach().cpu().numpy(), label="History (usage_cpu)")
    plt.plot(range(encoder_length, encoder_length + predict_length), actual, label="Actual (usage_cpu)")
    plt.plot(range(encoder_length, encoder_length + predict_length), prediction, label="Predicted (usage_cpu)")
    plt.legend()
    plt.xlabel("Time index")
    plt.ylabel("CPU Usage")
    plt.title(f"Temporal Fusion Transformer Forecasting Usage CPU (Sample {idx})")
    plt.show()

# Plot for the first sample
plot_prediction(x, raw_predictions, idx=0)


ImportError: cannot import name 'TFTTrainer' from 'pytorch_forecasting.models.temporal_fusion_transformer' (d:\infosysspringboard projects\project1-1stmilestine\azure_analytics\lib\site-packages\pytorch_forecasting\models\temporal_fusion_transformer\__init__.py)

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.callbacks import ModelCheckpoint

# Step 1: Load dataset
print("Loading dataset...")
df = pd.read_csv(r"D:\infosysspringboard projects\project1-1stmilestine\AZURE_BACKEND_TEAM-B\data\processed\cleaned_merged.csv", parse_dates=["date"])

# Step 2: Preprocessing
print("Preprocessing data...")
df = df.sort_values(by=["region", "resource_type", "date"]).reset_index(drop=True)
df["time_idx"] = df.groupby(["region", "resource_type"]).cumcount()

# Ensure target column is float
df["usage_cpu"] = df["usage_cpu"].astype(float)

# Convert categories
df["region"] = df["region"].astype("category")
df["resource_type"] = df["resource_type"].astype("category")
df["holiday"] = df["holiday"].astype(str).astype("category")
df["group_id"] = df["region"].astype(str) + "_" + df["resource_type"].astype(str)

# Define cutoff for training (example: last 7 days as validation)
training_cutoff = df["time_idx"].max() - 7

# Prepare training dataframe and ensure float
train_df = df[df.time_idx <= training_cutoff].copy()
train_df["usage_cpu"] = train_df["usage_cpu"].astype(float)

max_prediction_length = 7
max_encoder_length = 30
target = "usage_cpu"

# Create TimeSeriesDataSet for training
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=target,
    group_ids=["group_id"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["region", "resource_type"],
    time_varying_known_categoricals=["holiday"],
    time_varying_known_reals=["time_idx", "economic_index", "cloud_market_demand", "users_active"],
    time_varying_unknown_reals=[target],
    target_normalizer=GroupNormalizer(groups=["group_id"], transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

print(f"Training dataset created with {len(training)} samples")

# Create validation dataset
validation = TimeSeriesDataSet.from_dataset(
    training,
    df,
    predict=True,
    stop_randomization=True
)

print(f"Validation dataset created with {len(validation)} samples")

# Create dataloaders
batch_size = 64
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 2, num_workers=0)

print(f"Dataloaders created - Train batches: {len(train_dataloader)}, Val batches: {len(val_dataloader)}")

# Callbacks
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, mode="min")
lr_logger = LearningRateMonitor()
checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1)

# Create TFT model from dataset
print("Creating TFT model...")
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=32,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=16,
    output_size=7,  # quantiles
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
    optimizer="Adam"
)

print(f"Model created with {sum(p.numel() for p in tft.parameters())} parameters")

# Trainer initialization
print("Training model...")
trainer = Trainer(
    max_epochs=50,
    accelerator="auto",
    devices=1 if not torch.cuda.is_available() else "auto",  # fix device param
    gradient_clip_val=0.1,
    callbacks=[lr_logger, early_stop_callback, checkpoint_callback],
    enable_checkpointing=True,
)

# Fit model - pass exact model instance tft
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

# Save best model checkpoint
save_path = checkpoint_callback.best_model_path if checkpoint_callback.best_model_path else "tft_cpu_usage_model.ckpt"
print(f"Best model path: {save_path}")

# Load best model for inference
if os.path.exists(save_path):
    best_tft = TemporalFusionTransformer.load_from_checkpoint(save_path)
else:
    best_tft = tft

print("Best model loaded for inference")

# Make predictions on validation data
print("Making predictions...")
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
predictions, x = raw_predictions if isinstance(raw_predictions, tuple) else (raw_predictions, next(iter(val_dataloader)))

print("Predictions completed")

# Plotting function omitted for brevity - keep as is

# Metrics function omitted for brevity - keep as is

# Generate visualization and metrics
print("\nGenerating visualization and metrics...")
plot_predictions_comparison(x, predictions)
metrics = calculate_metrics(x, predictions)

# Feature importance
print("\nAnalyzing feature importance...")
interpretation = best_tft.interpret_output(raw_predictions, reduction="sum")

fig, axes = plt.subplots(1, 2, figsize=(15, 6))
if 'encoder_variables' in interpretation:
    encoder_importance = interpretation['encoder_variables']
    axes[0].barh(range(len(encoder_importance)), encoder_importance)
    axes[0].set_title('Encoder Variable Importance')
    axes[0].set_xlabel('Importance Score')
if 'decoder_variables' in interpretation:
    decoder_importance = interpretation['decoder_variables']
    axes[1].barh(range(len(decoder_importance)), decoder_importance)
    axes[1].set_title('Decoder Variable Importance')
    axes[1].set_xlabel('Importance Score')
plt.tight_layout()
plt.savefig('tft_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n" + "="*60)
print("TFT MODEL TRAINING AND EVALUATION COMPLETED SUCCESSFULLY!")
print("="*60)
print(f"✓ Model saved to: {save_path}")
print(f"✓ Predictions visualization saved to: tft_predictions_comparison.png")
print(f"✓ Feature importance plot saved to: tft_feature_importance.png")
print(f"✓ Training completed with RMSE: {metrics['RMSE']:.2f}")
print("="*60)

# Save predictions CSV
predictions_df = pd.DataFrame({
    'actual': x["decoder_target"].flatten().detach().cpu().numpy(),
    'predicted': predictions.prediction.flatten().detach().cpu().numpy() if hasattr(predictions, 'prediction') else predictions.flatten().detach().cpu().numpy()
})
predictions_df.to_csv('tft_predictions.csv', index=False)
print(f"✓ Predictions saved to: tft_predictions.csv")

print("\nTraining and evaluation pipeline completed!")


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Loading dataset...
Preprocessing data...
Training dataset created with 1068 samples
Validation dataset created with 12 samples
Dataloaders created - Train batches: 16, Val batches: 1
Creating TFT model...
Model created with 81165 parameters
Training model...


TypeError: `model` must be a `LightningModule` or `torch._dynamo.OptimizedModule`, got `TemporalFusionTransformer`

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint

# Step 1: Load dataset - use raw string for Windows path
print("Loading dataset...")
df = pd.read_csv(r"D:\infosysspringboard projects\project1-1stmilestine\AZURE_BACKEND_TEAM-B\data\processed\cleaned_merged.csv", parse_dates=["date"])

# Step 2: Preprocessing
print("Preprocessing data...")
df = df.sort_values(by=["region", "resource_type", "date"]).reset_index(drop=True)
df["time_idx"] = df.groupby(["region", "resource_type"]).cumcount()

# Ensure usage_cpu target is float
df["usage_cpu"] = df["usage_cpu"].astype(float)

# Convert relevant columns to categorical
df["region"] = df["region"].astype("category")
df["resource_type"] = df["resource_type"].astype("category")
df["holiday"] = df["holiday"].astype(str).astype("category")
df["group_id"] = df["region"].astype(str) + "_" + df["resource_type"].astype(str)

# Define cutoff for training (keeping last 7 days for validation)
max_prediction_length = 7
max_encoder_length = 30
target = "usage_cpu"
training_cutoff = df["time_idx"].max() - max_prediction_length

# Training dataframe
train_df = df[df.time_idx <= training_cutoff].copy()

# Step 3: Create TimeSeriesDataSet for training
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target=target,
    group_ids=["group_id"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["region", "resource_type"],
    time_varying_known_categoricals=["holiday"],
    time_varying_known_reals=["time_idx", "economic_index", "cloud_market_demand", "users_active"],
    time_varying_unknown_reals=[target],
    target_normalizer=GroupNormalizer(groups=["group_id"], transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

print(f"Training dataset created with {len(training)} samples")

# Step 4: Create validation dataset using entire data for prediction
validation = TimeSeriesDataSet.from_dataset(
    training,
    df,
    predict=True,
    stop_randomization=True,
)

print(f"Validation dataset created with {len(validation)} samples")

# Step 5: Create dataloaders
batch_size = 64
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 2, num_workers=0)

print(f"Dataloaders created - Train batches: {len(train_dataloader)}, Val batches: {len(val_dataloader)}")

# Step 6: Define callbacks for training
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, mode="min", verbose=True)
lr_logger = LearningRateMonitor()
checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1, filename="best-tft")

# Step 7: Define the TFT model
print("Creating TFT model...")
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=32,
    attention_head_size=4,
    dropout=0.1,
    hidden_continuous_size=16,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
    optimizer="Adam",
)

print(f"Model created with {sum(p.numel() for p in tft.parameters())} parameters")

# Step 8: Initialize trainer with callbacks and correct device setting
devices = 1 if not torch.cuda.is_available() else "auto"
trainer = Trainer(
    max_epochs=50,
    accelerator="auto",
    devices=devices,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback, lr_logger, checkpoint_callback],
    enable_checkpointing=True,
)

# Step 9: Train the model
print("Starting training...")
trainer.fit(tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

# Step 10: Load the best model for inference
best_model_path = checkpoint_callback.best_model_path
if best_model_path and os.path.exists(best_model_path):
    best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
    print(f"Loaded best model checkpoint: {best_model_path}")
else:
    best_tft = tft
    print("Best model checkpoint not found, using last model.")

# Step 11: Make predictions on validation set
print("Making predictions...")
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
predictions, x = raw_predictions if isinstance(raw_predictions, tuple) else (raw_predictions, next(iter(val_dataloader)))

print("Prediction completed")

# Step 12: Plot actual vs predicted for multiple samples
def plot_predictions_comparison(x, predictions, idx_list=[0, 1, 2, 3]):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    axes = axes.flatten()
    for i, idx in enumerate(idx_list[:4]):
        if i >= len(axes):
            break
        ax = axes[i]
        prediction = predictions.prediction[idx].detach().cpu().numpy() if hasattr(predictions, 'prediction') else predictions[idx].detach().cpu().numpy()
        actual = x["decoder_target"][idx].detach().cpu().numpy()
        encoder_target = x["encoder_target"][idx].detach().cpu().numpy()
        encoder_length = x['encoder_lengths'][idx].item()
        history_time = range(encoder_length)
        future_time = range(encoder_length, encoder_length + len(actual))
        ax.plot(history_time, encoder_target, 'b-', label="Historical CPU Usage", linewidth=2)
        ax.plot(future_time, actual, 'g-', label="Actual CPU Usage", linewidth=2, marker='o')
        ax.plot(future_time, prediction, 'r--', label="Predicted CPU Usage", linewidth=2, marker='s')
        ax.set_xlabel("Time Steps")
        ax.set_ylabel("CPU Usage")
        ax.set_title(f"TFT Forecast - Sample {idx}")
        ax.legend()
        ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('tft_predictions_comparison.png', dpi=300, bbox_inches='tight')
    plt.show()

# Step 13: Calculate evaluation metrics
def calculate_metrics(x, predictions):
    pred_values = predictions.prediction.detach().cpu().numpy() if hasattr(predictions, 'prediction') else predictions.detach().cpu().numpy()
    actual_values = x["decoder_target"].detach().cpu().numpy()
    mae = np.mean(np.abs(pred_values - actual_values))
    mse = np.mean((pred_values - actual_values) ** 2)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual_values - pred_values) / actual_values)) * 100
    print("\n" + "="*50)
    print("MODEL EVALUATION METRICS")
    print("="*50)
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Mean Squared Error (MSE): {mse:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print("="*50)
    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "MAPE": mape}

# Step 14: Generate plots and calculate metrics
print("\nGenerating visualization and metrics...")
plot_predictions_comparison(x, predictions)
metrics = calculate_metrics(x, predictions)

# Step 15: Feature importance analysis and visualization
print("\nAnalyzing feature importance...")
interpretation = best_tft.interpret_output(raw_predictions, reduction="sum")

fig, axes = plt.subplots(1, 2, figsize=(15, 6))
if 'encoder_variables' in interpretation:
    encoder_importance = interpretation['encoder_variables']
    axes[0].barh(range(len(encoder_importance)), encoder_importance)
    axes[0].set_title('Encoder Variable Importance')
    axes[0].set_xlabel('Importance Score')
if 'decoder_variables' in interpretation:
    decoder_importance = interpretation['decoder_variables']
    axes[1].barh(range(len(decoder_importance)), decoder_importance)
    axes[1].set_title('Decoder Variable Importance')
    axes[1].set_xlabel('Importance Score')
plt.tight_layout()
plt.savefig('tft_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n" + "="*60)
print("TFT MODEL TRAINING AND EVALUATION COMPLETED SUCCESSFULLY!")
print("="*60)
print(f"✓ Model saved to: {best_model_path}")
print(f"✓ Predictions visualization saved to: tft_predictions_comparison.png")
print(f"✓ Feature importance plot saved to: tft_feature_importance.png")
print(f"✓ Training completed with RMSE: {metrics['RMSE']:.2f}")
print("="*60)

# Step 16: Save predictions to CSV for further analysis
pred_df = pd.DataFrame({
    'actual': x["decoder_target"].flatten().detach().cpu().numpy(),
    'predicted': predictions.prediction.flatten().detach().cpu().numpy() if hasattr(predictions, 'prediction') else predictions.flatten().detach().cpu().numpy()
})
pred_df.to_csv('tft_predictions.csv', index=False)
print("✓ Predictions saved to: tft_predictions.csv")

print("\nTraining and evaluation pipeline completed!")


Loading dataset...
Preprocessing data...
Training dataset created with 1068 samples
Validation dataset created with 12 samples
Dataloaders created - Train batches: 16, Val batches: 1
Creating TFT model...
Model created with 81165 parameters


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Starting training...


TypeError: `model` must be a `LightningModule` or `torch._dynamo.OptimizedModule`, got `TemporalFusionTransformer`