In [None]:
# Setup and imports
%pip install nixtla pandas numpy matplotlib seaborn python-dotenv

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from dotenv import load_dotenv
from nixtla import NixtlaClient

# Load environment variables and initialize client
load_dotenv()
api_key = os.getenv('TIME_GPT_API_KEY')

if not api_key:
    raise ValueError("TIME_GPT_API_KEY not found in environment variables.")

nixtla_client = NixtlaClient(api_key=api_key)
print("✅ TimeGPT client initialized for anomaly detection!")


In [None]:
def generate_energy_data_with_anomalies(days=60):
    """Generate energy data with realistic patterns and intentional anomalies"""
    
    # Create hourly timestamps
    start_date = datetime.now() - timedelta(days=days)
    dates = pd.date_range(start=start_date, periods=days*24, freq='H')
    
    np.random.seed(42)
    
    # Base energy price pattern
    base_price = 0.08
    
    # Daily seasonality (peak during day)
    hours = dates.hour
    daily_pattern = 0.03 * np.sin(2 * np.pi * (hours - 6) / 24)
    
    # Weekly seasonality (higher on weekdays)
    weekdays = dates.weekday
    weekly_pattern = 0.015 * (weekdays < 5).astype(float)
    
    # Normal market noise
    noise = np.random.normal(0, 0.008, len(dates))
    
    # Create base prices
    energy_prices = base_price + daily_pattern + weekly_pattern + noise
    
    # Add intentional anomalies
    anomaly_indices = []
    
    # 1. Price spike during "grid emergency" (day 10, hours 14-16)
    spike_start = 10 * 24 + 14
    spike_end = 10 * 24 + 17
    if spike_end < len(energy_prices):
        energy_prices[spike_start:spike_end] += 0.15  # Major spike
        anomaly_indices.extend(range(spike_start, spike_end))
    
    # 2. Negative pricing during oversupply (day 25, hours 2-4)
    negative_start = 25 * 24 + 2
    negative_end = 25 * 24 + 5
    if negative_end < len(energy_prices):
        energy_prices[negative_start:negative_end] = -0.02  # Negative prices
        anomaly_indices.extend(range(negative_start, negative_end))
    
    # 3. Equipment failure causing sustained high prices (day 40, 6 hours)
    failure_start = 40 * 24 + 8
    failure_end = 40 * 24 + 14
    if failure_end < len(energy_prices):
        energy_prices[failure_start:failure_end] += 0.08  # Sustained high
        anomaly_indices.extend(range(failure_start, failure_end))
    
    # 4. Random price spikes (5 single-hour spikes)
    random_spikes = np.random.choice(range(100, len(energy_prices)-100), 5, replace=False)
    for spike_idx in random_spikes:
        energy_prices[spike_idx] += np.random.uniform(0.05, 0.12)
        anomaly_indices.append(spike_idx)
    
    # Create DataFrame
    df = pd.DataFrame({
        'ds': dates,
        'y': energy_prices,
        'unique_id': 'energy_price'
    })
    
    # Create ground truth anomaly labels
    df['true_anomaly'] = 0
    df.iloc[anomaly_indices, df.columns.get_loc('true_anomaly')] = 1
    
    return df, anomaly_indices

# Generate the data
energy_data, true_anomalies = generate_energy_data_with_anomalies(days=60)

print(f"Generated {len(energy_data)} data points")
print(f"Injected {len(true_anomalies)} known anomalies")
print(f"Anomaly rate: {len(true_anomalies)/len(energy_data)*100:.2f}%")

# Basic statistics
print(f"\nPrice statistics:")
print(f"Mean: ${energy_data['y'].mean():.4f}/kWh")
print(f"Min:  ${energy_data['y'].min():.4f}/kWh") 
print(f"Max:  ${energy_data['y'].max():.4f}/kWh")


In [None]:
# Plot the full time series with known anomalies highlighted
plt.figure(figsize=(16, 8))

# Plot normal data points
normal_data = energy_data[energy_data['true_anomaly'] == 0]
plt.plot(normal_data['ds'], normal_data['y'], 
         color='blue', alpha=0.7, linewidth=1, label='Normal Data')

# Plot known anomalies
anomaly_data = energy_data[energy_data['true_anomaly'] == 1]
plt.scatter(anomaly_data['ds'], anomaly_data['y'], 
           color='red', s=30, alpha=0.8, label=f'Known Anomalies ({len(anomaly_data)})', zorder=5)

plt.title('Energy Prices with Injected Anomalies', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Energy Price ($/kWh)', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Show some specific anomaly examples
print("🔍 Known Anomaly Examples:")
anomaly_examples = energy_data[energy_data['true_anomaly'] == 1].head(10)
for _, row in anomaly_examples.iterrows():
    print(f"  {row['ds'].strftime('%Y-%m-%d %H:%M')}: ${row['y']:.4f}/kWh")


In [None]:
# Prepare data for TimeGPT anomaly detection (remove our ground truth column)
detection_data = energy_data[['ds', 'y', 'unique_id']].copy()

print("🔍 Running TimeGPT anomaly detection...")

try:
    # Run anomaly detection
    anomalies_df = nixtla_client.detect_anomalies(
        df=detection_data,
        time_col='ds',
        target_col='y'
    )
    
    print("✅ Anomaly detection completed successfully!")
    print(f"Data shape: {anomalies_df.shape}")
    print(f"Columns: {list(anomalies_df.columns)}")
    
    # Count detected anomalies
    if 'anomaly' in anomalies_df.columns:
        detected_anomalies = anomalies_df['anomaly'].sum()
        print(f"\nDetected anomalies: {detected_anomalies}")
        print(f"Detection rate: {detected_anomalies/len(anomalies_df)*100:.2f}%")
        
        # Show first few detected anomalies
        if detected_anomalies > 0:
            print("\n📊 Sample detected anomalies:")
            detected_samples = anomalies_df[anomalies_df['anomaly'] == 1].head()
            print(detected_samples[['ds', 'y', 'anomaly']])
    else:
        print("⚠️ No 'anomaly' column found in results")
        print("Available columns:", list(anomalies_df.columns))
        
except Exception as e:
    print(f"❌ Anomaly detection failed: {e}")
    anomalies_df = None


In [None]:
# Compare detection results with ground truth
if anomalies_df is not None and 'anomaly' in anomalies_df.columns:
    
    # Merge detected anomalies with ground truth
    comparison_df = energy_data.copy()
    comparison_df['detected_anomaly'] = anomalies_df['anomaly']
    
    # Calculate detection metrics
    true_positives = ((comparison_df['true_anomaly'] == 1) & (comparison_df['detected_anomaly'] == 1)).sum()
    false_positives = ((comparison_df['true_anomaly'] == 0) & (comparison_df['detected_anomaly'] == 1)).sum()
    false_negatives = ((comparison_df['true_anomaly'] == 1) & (comparison_df['detected_anomaly'] == 0)).sum()
    true_negatives = ((comparison_df['true_anomaly'] == 0) & (comparison_df['detected_anomaly'] == 0)).sum()
    
    # Calculate performance metrics
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    print("📊 TimeGPT Anomaly Detection Performance:")
    print("=" * 50)
    print(f"True Positives:  {true_positives} (correctly detected anomalies)")
    print(f"False Positives: {false_positives} (false alarms)")
    print(f"False Negatives: {false_negatives} (missed anomalies)")
    print(f"True Negatives:  {true_negatives} (correctly identified normal)")
    print(f"\nPrecision: {precision:.3f} (accuracy of anomaly alerts)")
    print(f"Recall:    {recall:.3f} (percentage of anomalies caught)")
    print(f"F1 Score:  {f1_score:.3f} (overall performance)")
    
    # Visualization comparing detections
    plt.figure(figsize=(16, 10))
    
    # Plot 1: Full comparison
    plt.subplot(2, 1, 1)
    
    # Normal data
    normal_mask = (comparison_df['true_anomaly'] == 0) & (comparison_df['detected_anomaly'] == 0)
    plt.plot(comparison_df[normal_mask]['ds'], comparison_df[normal_mask]['y'], 
             color='blue', alpha=0.6, linewidth=1, label='Normal Data')
    
    # True positives (correctly detected)
    tp_mask = (comparison_df['true_anomaly'] == 1) & (comparison_df['detected_anomaly'] == 1)
    if tp_mask.sum() > 0:
        plt.scatter(comparison_df[tp_mask]['ds'], comparison_df[tp_mask]['y'], 
                   color='green', s=50, label=f'True Positives ({true_positives})', zorder=5)
    
    # False positives (false alarms)
    fp_mask = (comparison_df['true_anomaly'] == 0) & (comparison_df['detected_anomaly'] == 1)
    if fp_mask.sum() > 0:
        plt.scatter(comparison_df[fp_mask]['ds'], comparison_df[fp_mask]['y'], 
                   color='orange', s=50, label=f'False Positives ({false_positives})', zorder=5)
    
    # False negatives (missed)
    fn_mask = (comparison_df['true_anomaly'] == 1) & (comparison_df['detected_anomaly'] == 0)
    if fn_mask.sum() > 0:
        plt.scatter(comparison_df[fn_mask]['ds'], comparison_df[fn_mask]['y'], 
                   color='red', s=50, marker='x', label=f'Missed Anomalies ({false_negatives})', zorder=5)
    
    plt.title('TimeGPT Anomaly Detection Results vs Ground Truth', fontsize=14, fontweight='bold')
    plt.ylabel('Energy Price ($/kWh)', fontsize=12)
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.xticks(rotation=45)
    
    # Plot 2: Focus on anomalous periods
    plt.subplot(2, 1, 2)
    
    # Show a focused view of anomalous regions
    anomaly_periods = comparison_df[comparison_df['true_anomaly'] == 1]['ds']
    if len(anomaly_periods) > 0:
        # Get time range around anomalies
        start_focus = anomaly_periods.min() - timedelta(hours=12)
        end_focus = anomaly_periods.max() + timedelta(hours=12)
        
        focus_data = comparison_df[(comparison_df['ds'] >= start_focus) & (comparison_df['ds'] <= end_focus)]
        
        # Plot focused data
        plt.plot(focus_data['ds'], focus_data['y'], color='blue', alpha=0.7, linewidth=1)
        
        # Highlight anomalies in focused view
        focus_tp = focus_data[focus_data['true_anomaly'] == 1]
        focus_detected = focus_data[focus_data['detected_anomaly'] == 1]
        
        plt.scatter(focus_tp['ds'], focus_tp['y'], color='red', s=60, alpha=0.8, label='Known Anomalies')
        plt.scatter(focus_detected['ds'], focus_detected['y'], color='green', s=40, marker='D', 
                   alpha=0.8, label='TimeGPT Detected')
        
        plt.title('Focused View: Anomalous Periods', fontsize=14, fontweight='bold')
        plt.xlabel('Date', fontsize=12)
        plt.ylabel('Energy Price ($/kWh)', fontsize=12)
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Trading insights based on anomaly detection
    print("\n💡 Trading Insights from Anomaly Detection:")
    print("=" * 50)
    
    if true_positives > 0:
        # Analyze detected anomalies for trading opportunities
        detected_anomaly_data = comparison_df[comparison_df['detected_anomaly'] == 1]
        avg_anomaly_price = detected_anomaly_data['y'].mean()
        normal_price = comparison_df[comparison_df['detected_anomaly'] == 0]['y'].mean()
        
        print(f"✅ Average price during detected anomalies: ${avg_anomaly_price:.4f}/kWh")
        print(f"📊 Average normal price: ${normal_price:.4f}/kWh")
        print(f"💰 Price difference: ${avg_anomaly_price - normal_price:+.4f}/kWh")
        
        if avg_anomaly_price > normal_price * 1.2:
            print("🔺 High-price anomalies detected - consider reducing mining operations")
        elif avg_anomaly_price < normal_price * 0.8:
            print("🔻 Low-price anomalies detected - consider increasing mining operations")
            
        print(f"\n⚡ Recommendation: Monitor anomaly detection in real-time for:")
        print("   • Automatic mining ramp-up during low-price anomalies")
        print("   • Risk management during high-price anomalies")
        print("   • Mean reversion trading opportunities")
        
else:
    print("❌ Cannot perform comparison - anomaly detection results not available")
