In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from math import sin, cos, pi
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# 1) Load & preprocess data
plant_df = pd.read_csv('Plant1_filtered.csv')
weather_df = pd.read_csv('Plant1_Weather_filtered.csv')

plant_df['DATE_TIME'] = pd.to_datetime(plant_df['DATE_TIME'])
weather_df['DATE_TIME'] = pd.to_datetime(weather_df['DATE_TIME'])

# Aggregate plant data - average AC_POWER for each timestamp
plant_agg = plant_df.groupby('DATE_TIME').agg({
    'AC_POWER': 'mean',
    'DC_POWER': 'mean',  # Include if available
}).reset_index()

# 2) Merge and prepare dataframe
df = pd.merge(weather_df, plant_agg, on='DATE_TIME', how='inner')
df.set_index('DATE_TIME', inplace=True)
df = df.sort_index()  # Ensure time-ordered data

# Check for and handle missing values
print(f"Missing values before handling: {df.isnull().sum().sum()}")
df = df.dropna()
print(f"Missing values after handling: {df.isnull().sum().sum()}")
print(f"Data shape: {df.shape}")

# 3) Advanced feature engineering
# Time-based features
df['hour'] = df.index.hour + df.index.minute/60.0
df['day_of_year'] = df.index.dayofyear
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month
df['is_weekend'] = df.index.dayofweek >= 5

# Multi-frequency Fourier features - capture daily and yearly patterns
for period, name in [(24, 'hour'), (168, 'week'), (8760, 'year')]:
    for harm in [1, 2, 3]:  # Multiple harmonics for better approximation
        df[f'sin_{name}_{harm}'] = np.sin(2*pi*harm*df['hour']/period)
        df[f'cos_{name}_{harm}'] = np.cos(2*pi*harm*df['hour']/period)

# Basic rolling statistics on key weather variables
for col in ['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION']:
    if col in df.columns:
        # Rolling mean - smooths out short-term fluctuations
        df[f'{col}_rolling_3h'] = df[col].rolling(window=12).mean()  # 3-hour window (12 * 15min)
        # Rolling standard deviation - captures variability/instability
        df[f'{col}_rolling_var'] = df[col].rolling(window=12).std()  # Variability

# Add lag features for key variables
target_col = 'AC_POWER'
lag_features = ['AC_POWER', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION']
lag_hours = [1, 2, 3, 6, 12]  # Multiple lag periods

for col in lag_features:
    if col in df.columns:
        for lag in lag_hours:
            lag_steps = lag * 4  # Assuming 15-minute data (4 steps per hour)
            df[f'{col}_lag_{lag}h'] = df[col].shift(lag_steps)

# Drop rows with NaN from lag features
df = df.dropna()

# Convert boolean to int
df['is_weekend'] = df['is_weekend'].astype(int)

# Drop non-numeric and redundant columns
df = df.select_dtypes(include=[np.number])
df.drop(['hour', 'day_of_year', 'day_of_week', 'month'], axis=1, inplace=True)
print(f"Final data shape: {df.shape}")

# 4) Select features & target
target_col = 'AC_POWER'
feature_cols = [col for col in df.columns if col != target_col]
print(f"Number of features: {len(feature_cols)}")
print(f"Features sample: {feature_cols[:5]}...")

# 5) Scale features - StandardScaler often works better than MinMaxScaler for LSTMs
scaler_X = StandardScaler()
scaler_y = StandardScaler()  # StandardScaler for target too
X_all = scaler_X.fit_transform(df[feature_cols])
y_all = scaler_y.fit_transform(df[[target_col]])

# 6) Improved sequence generator with overlap
def create_sequences(X, y, time_steps, horizon, stride=1):
    """Create sequences with optional stride for more training samples"""
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps - horizon + 1, stride):
        Xs.append(X[i:i + time_steps])
        ys.append(y[i + time_steps:i + time_steps + horizon].flatten())
    return np.array(Xs), np.array(ys)

# 7) Enhanced model architecture
def build_enhanced_model(input_shape, horizon):
    model = Sequential([
        # First BiLSTM layer with more units
        Bidirectional(LSTM(256, activation='tanh', return_sequences=True), 
                     input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.2),  # Lower dropout
        
        # Second BiLSTM layer 
        Bidirectional(LSTM(128, activation='tanh', return_sequences=True)),
        BatchNormalization(),
        Dropout(0.2),
        
        # Third BiLSTM layer
        Bidirectional(LSTM(64, activation='tanh')),
        BatchNormalization(),
        Dropout(0.2),
        
        # Output layer
        Dense(horizon, activation='linear')
    ])
    
    # Use Adam optimizer with custom learning rate
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse')
    return model

# 8) Training parameters
time_steps = 96  # 24 hours of data (assuming 15-min intervals: 4 * 24 = 96)
horizons = [4, 20, 96, 288]  # 1h, 5h, 24h, 72h (4 steps per hour)
test_frac = 0.2
stride = 2  # Create more training samples with overlapping
results = {}

for h in horizons:
    print(f"\n\n{'='*80}")
    print(f"TRAINING MODEL FOR HORIZON = {h} STEPS ({h/4} HOURS)")
    print(f"{'='*80}")
    
    # Create sequences with stride for more training data
    X, y = create_sequences(X_all, y_all, time_steps, h, stride=stride)
    print(f"Total sequences: {len(X)}")
    
    # Train-test split (time-ordered)
    split = int(len(X)*(1-test_frac))
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]
    print(f"Shapes: X_train={X_train.shape}, y_train={y_train.shape}, X_test={X_test.shape}, y_test={y_test.shape}")

    # Build and train model
    model = build_enhanced_model((time_steps, X.shape[2]), h)
    
    # Callbacks for better training
    early_stop = EarlyStopping(
        monitor='val_loss',
        patience=15,  # More patience
        restore_best_weights=True,
        verbose=1
    )
    
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,  # More aggressive LR reduction
        patience=5,
        min_lr=1e-6,
        verbose=1
    )
    
    # Fit model with more epochs and smaller batch size
    history = model.fit(
        X_train, y_train,
        validation_split=0.15,  # Larger validation set
        epochs=150,  # More epochs, early stopping will prevent overfitting
        batch_size=64,  # Larger batch size for more stable gradients
        callbacks=[early_stop, reduce_lr],
        verbose=2
    )

    # 9) Prediction and evaluation
    # Helper function to inverse transform predictions
    def inverse_transform(y_scaled):
        flat = y_scaled.reshape(-1, 1)
        inverted = scaler_y.inverse_transform(flat)
        return inverted.reshape(y_scaled.shape)

    # Generate predictions
    train_pred = model.predict(X_train, verbose=0)
    test_pred = model.predict(X_test, verbose=0)
    
    # Inverse transform
    y_train_inv = inverse_transform(y_train)
    train_pred_inv = inverse_transform(train_pred)
    y_test_inv = inverse_transform(y_test)
    test_pred_inv = inverse_transform(test_pred)
    
    # Calculate metrics
    # For first step prediction
    train_mae_first = mean_absolute_error(y_train_inv[:, 0], train_pred_inv[:, 0])
    train_rmse_first = np.sqrt(mean_squared_error(y_train_inv[:, 0], train_pred_inv[:, 0]))
    train_r2_first = r2_score(y_train_inv[:, 0], train_pred_inv[:, 0])
    
    test_mae_first = mean_absolute_error(y_test_inv[:, 0], test_pred_inv[:, 0])
    test_rmse_first = np.sqrt(mean_squared_error(y_test_inv[:, 0], test_pred_inv[:, 0]))
    test_r2_first = r2_score(y_test_inv[:, 0], test_pred_inv[:, 0])
    
    # For all steps in the horizon
    train_mae_all = mean_absolute_error(y_train_inv.flatten(), train_pred_inv.flatten())
    train_rmse_all = np.sqrt(mean_squared_error(y_train_inv.flatten(), train_pred_inv.flatten()))
    train_r2_all = r2_score(y_train_inv.flatten(), train_pred_inv.flatten())
    
    test_mae_all = mean_absolute_error(y_test_inv.flatten(), test_pred_inv.flatten())
    test_rmse_all = np.sqrt(mean_squared_error(y_test_inv.flatten(), test_pred_inv.flatten()))
    test_r2_all = r2_score(y_test_inv.flatten(), test_pred_inv.flatten())
    
    # Print results
    print(f"\nMETRICS FOR HORIZON = {h} STEPS ({h/4} HOURS):")
    print(f"FIRST STEP PREDICTIONS:")
    print(f"  TRAIN - MAE: {train_mae_first:.2f}, RMSE: {train_rmse_first:.2f}, R²: {train_r2_first:.4f}")
    print(f"  TEST  - MAE: {test_mae_first:.2f}, RMSE: {test_rmse_first:.2f}, R²: {test_r2_first:.4f}")
    print(f"ALL STEPS PREDICTIONS:")
    print(f"  TRAIN - MAE: {train_mae_all:.2f}, RMSE: {train_rmse_all:.2f}, R²: {train_r2_all:.4f}")
    print(f"  TEST  - MAE: {test_mae_all:.2f}, RMSE: {test_rmse_all:.2f}, R²: {test_r2_all:.4f}")
    
    # Store results
    results[h] = {
        'model': model,
        'history': history.history,
        'y_test': y_test_inv,
        'y_pred': test_pred_inv,
        'metrics_first': {
            'train_mae': train_mae_first,
            'train_rmse': train_rmse_first,
            'train_r2': train_r2_first,
            'test_mae': test_mae_first,
            'test_rmse': test_rmse_first,
            'test_r2': test_r2_first
        },
        'metrics_all': {
            'train_mae': train_mae_all,
            'train_rmse': train_rmse_all,
            'train_r2': train_r2_all,
            'test_mae': test_mae_all,
            'test_rmse': test_rmse_all,
            'test_r2': test_r2_all
        }
    }
    
    # 10) Plot training history
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Loss Curves - Horizon {h/4}h')
    plt.xlabel('Epoch')
    plt.ylabel('MSE Loss')
    plt.grid(True)
    plt.legend()
    
    # 11) Plot actual vs predicted for test set
    plt.subplot(1, 2, 2)
    n_samples = min(200, len(y_test_inv))
    
    # First step predictions
    plt.plot(y_test_inv[:n_samples, 0], 'b-', label='Actual', alpha=0.7)
    plt.plot(test_pred_inv[:n_samples, 0], 'r-', label='Predicted', alpha=0.7)
    plt.title(f'Actual vs Predicted AC_POWER - Horizon {h/4}h')
    plt.xlabel('Sample Index')
    plt.ylabel('AC_POWER')
    plt.grid(True)
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # 12) Plot multistep predictions for a few examples
    plt.figure(figsize=(15, 8))
    plt.subplot(2, 1, 1)
    
    # Plot full horizon predictions for a single example
    example_idx = 50
    steps = np.arange(h)
    plt.plot(steps, y_test_inv[example_idx], 'b-', label='Actual', linewidth=2)
    plt.plot(steps, test_pred_inv[example_idx], 'r-', label='Predicted', linewidth=2)
    plt.title(f'Multi-step Prediction Example - Horizon {h/4}h')
    plt.xlabel('Steps Ahead')
    plt.ylabel('AC_POWER')
    plt.grid(True)
    plt.legend()
    
    # Plot several first-step predictions
    plt.subplot(2, 1, 2)
    for i in range(min(5, n_samples//40)):
        start_idx = i * 40
        end_idx = start_idx + 40
        plt.plot(y_test_inv[start_idx:end_idx, 0], 'b-', alpha=0.5)
        plt.plot(test_pred_inv[start_idx:end_idx, 0], 'r-', alpha=0.5)
    
    plt.title(f'First-step Predictions - Multiple Segments')
    plt.xlabel('Sample Index')
    plt.ylabel('AC_POWER')
    plt.grid(True)
    
    plt.tight_layout()
    plt.show()

# 13) Summary of results
print("\n\n" + "="*50)
print("SUMMARY OF RESULTS FOR ALL HORIZONS")
print("="*50)

for h, result in results.items():
    metrics_first = result['metrics_first']
    metrics_all = result['metrics_all']
    
    print(f"\nHorizon = {h} steps ({h/4} hours):")
    print(f"FIRST STEP:")
    print(f"  TEST - MAE: {metrics_first['test_mae']:.2f}, RMSE: {metrics_first['test_rmse']:.2f}, R²: {metrics_first['test_r2']:.4f}")
    print(f"ALL STEPS:")
    print(f"  TEST - MAE: {metrics_all['test_mae']:.2f}, RMSE: {metrics_all['test_rmse']:.2f}, R²: {metrics_all['test_r2']:.4f}")

print("\nModel architecture summary:")
model.summary()