<a href="https://colab.research.google.com/github/skashyapsri/ghi-forecasting/blob/main/AST_Latest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler
import requests
from datetime import datetime


def process_nasa_data(data):
    ghi_data = []
    time_data = []

    ghi_dict = data['properties']['parameter']['ALLSKY_SFC_SW_DWN']
    for timestamp, value in ghi_dict.items():
        if len(timestamp) == 10:  # Full timestamp format YYYYMMDDHH
            ghi_data.append(float(value))
            time_data.append(datetime.strptime(timestamp, '%Y%m%d%H'))

    return np.array(ghi_data)


# NASA POWER API data fetcher
def fetch_power_data(lat, lon, start_date, end_date):
    base_url = "https://power.larc.nasa.gov/api/temporal/hourly/point"
    params = {
        "parameters": "ALLSKY_SFC_SW_DWN",  # GHI parameter
        "community": "RE",
        "longitude": lon,
        "latitude": lat,
        "start": start_date,
        "end": end_date,
        "format": "JSON"
    }
    response = requests.get(base_url, params=params)
    return response.json()

# Data preprocessing
class DataPreprocessor:
    def __init__(self, window_size=24, prediction_hours=1):
        self.window_size = window_size
        self.prediction_hours = prediction_hours
        self.scaler = StandardScaler()

    def create_sequences(self, data):
        X, y = [], []
        data = data.reshape(-1, 1)
        for i in range(len(data) - self.window_size - self.prediction_hours):
            X.append(data[i:(i + self.window_size)])
            y.append(data[i + self.window_size:i + self.window_size + self.prediction_hours])
        return np.array(X), np.array(y)

    def normalize_data(self, data):
        data = data.reshape(-1, 1)
        return self.scaler.fit_transform(data)

# AST Model Implementation
class AST(keras.Model):
    def __init__(self, seq_len, pred_len, d_model=256, n_heads=4, n_layers=3):
        super(AST, self).__init__()

        self.seq_len = seq_len
        self.pred_len = pred_len

        # Embedding layers
        self.input_proj = layers.Dense(d_model)
        self.pos_encoding = self._positional_encoding(seq_len, d_model)

        # Sparse Transformer Encoder
        self.encoder_layers = [
            SparseTransformerLayer(d_model, n_heads)
            for _ in range(n_layers)
        ]

        # Output projection
        self.output_proj = layers.Dense(1)

    def _positional_encoding(self, seq_len, d_model):
        positions = np.arange(seq_len)[:, np.newaxis]
        angles = np.arange(d_model)[np.newaxis, :] / d_model
        angles = positions * angles

        angles[:, 0::2] = np.sin(angles[:, 0::2])
        angles[:, 1::2] = np.cos(angles[:, 1::2])

        pos_encoding = angles[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def call(self, inputs, training=False):
        # Input projection and positional encoding
        x = self.input_proj(inputs)
        x += self.pos_encoding

        # Encoder layers
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x)

        # Output projection
        output = self.output_proj(x)
        return output

class SparseTransformerLayer(layers.Layer):
    def __init__(self, d_model, n_heads):
        super(SparseTransformerLayer, self).__init__()

        self.mha = layers.MultiHeadAttention(n_heads, d_model)
        self.ffn = keras.Sequential([
            layers.Dense(d_model * 4, activation='relu'),
            layers.Dense(d_model)
        ])

        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)

        self.dropout1 = layers.Dropout(0.1)
        self.dropout2 = layers.Dropout(0.1)

    def call(self, inputs, training=False):
        # Multi-head attention with sparse attention mask
        attn_output = self.mha(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)

        # Feed forward network
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Training
def train_model(model, train_data, val_data, epochs=100):
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss_fn = tf.keras.losses.MeanSquaredError()

    @tf.function
    def train_step(x, y):
        with tf.GradientTape() as tape:
            predictions = model(x, training=True)
            loss = loss_fn(y, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        return loss

    @tf.function
    def val_step(x, y):
        predictions = model(x, training=False)
        loss = loss_fn(y, predictions)
        return loss

    for epoch in range(epochs):
        # Track loss for each epoch
        train_losses = []
        val_losses = []

        for x_batch, y_batch in train_data:
            train_loss = train_step(x_batch, y_batch)
            train_losses.append(float(train_loss))

        for x_batch, y_batch in val_data:
            val_loss = val_step(x_batch, y_batch)
            val_losses.append(float(val_loss))

        print(f"Epoch {epoch + 1}, "
              f"Train Loss: {np.mean(train_losses):.4f}, "
              f"Val Loss: {np.mean(val_losses):.4f}")

# Main execution
def main():
    # Fetch data
    lat, lon = 40.7128, -74.0060  # New York City coordinates
    data = fetch_power_data(lat, lon, "20230101", "20231231")
    ghi_data = process_nasa_data(data)

    # Preprocess
    preprocessor = DataPreprocessor(window_size=24, prediction_hours=1)
    normalized_data = preprocessor.normalize_data(ghi_data)
    X, y = preprocessor.create_sequences(normalized_data)

    # Split data
    split = int(0.8 * len(X))
    X_train, X_val = X[:split], X[split:]
    y_train, y_val = y[:split], y[split:]

    # Create data loaders
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)

    # Initialize and train model
    model = AST(seq_len=24, pred_len=1)
    train_model(model, train_dataset, val_dataset)

    # Save model
    model.save('ast_ghi_model.h5')

In [None]:
import os
CUDA_LAUNCH_BLOCKING=1
TORCH_USE_CUDA_DSA=1
os.environ['CUDA_LAUNCH_BLOCKING']="1"
os.environ['TORCH_USE_CUDA_DSA'] = "1"
os.environ["PYTORCH_USE_CUDA_DSA"] = "1"
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import requests
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import math

# NASA POWER API data fetcher
def fetch_power_data(lat, lon, start_date, end_date):
    base_url = "https://power.larc.nasa.gov/api/temporal/hourly/point"
    params = {
        "parameters": "ALLSKY_SFC_SW_DWN",  # GHI parameter
        "community": "RE",
        "longitude": lon,
        "latitude": lat,
        "start": start_date,
        "end": end_date,
        "format": "JSON"
    }
    response = requests.get(base_url, params=params)
    return response.json()

class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_length, pred_length):
        self.data = torch.FloatTensor(data)
        self.seq_length = seq_length
        self.pred_length = pred_length

    def __len__(self):
        return len(self.data) - self.seq_length - self.pred_length + 1

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_length].unsqueeze(-1)  # Add feature dimension
        y = self.data[idx + self.seq_length:idx + self.seq_length + self.pred_length].unsqueeze(-1)
        return x, y

class AlphaEntmax(nn.Module):
    def __init__(self, alpha=1.5, dim=-1):
        super().__init__()
        self.alpha = alpha
        self.dim = dim

    def forward(self, x):
        return self.entmax(x, self.alpha, dim=self.dim)

    def entmax(self, x, alpha, dim=-1):
        if alpha == 1:
            return torch.softmax(x, dim=dim)

        x_shifted = x - x.max(dim=dim, keepdim=True)[0]
        tau = self._find_tau(x_shifted, alpha, dim)
        p = torch.clamp(((alpha - 1) * x_shifted - tau) / alpha, min=0) ** (1 / (alpha - 1))
        return p

    def _find_tau(self, x, alpha, dim):
        n = x.shape[dim]
        tau_lower = x.min(dim=dim, keepdim=True)[0] * (alpha - 1)
        tau_upper = x.max(dim=dim, keepdim=True)[0] * (alpha - 1)

        for _ in range(20):
            tau = (tau_lower + tau_upper) / 2
            p = torch.clamp(((alpha - 1) * x - tau) / alpha, min=0) ** (1 / (alpha - 1))
            sum_p = p.sum(dim=dim, keepdim=True)
            too_high = (sum_p > 1)
            tau_lower = torch.where(too_high, tau, tau_lower)
            tau_upper = torch.where(too_high, tau_upper, tau)

        return tau

class SparseMultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads, dropout=0.1, alpha=1.5):
        super().__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        self.head_dim = d_model // num_heads

        self.q_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.out_linear = nn.Linear(d_model, d_model)

        self.alpha_entmax = AlphaEntmax(alpha=alpha)
        self.dropout = nn.Dropout(dropout)

    def forward(self, q, k, v, mask=None):
        batch_size = q.size(0)

        # Linear transformations
        q = self.q_linear(q).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        k = self.k_linear(k).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        v = self.v_linear(v).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)

        # Compute attention scores
        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.head_dim)

        if mask is not None:
            # Ensure mask matches scores dimensions
            if mask.dim() == 3:
                mask = mask.unsqueeze(1)
            elif mask.dim() == 2:
                mask = mask.unsqueeze(1).unsqueeze(2)

            # Ensure mask size matches scores size
            scores_size = scores.size()
            mask = mask[:, :, :scores_size[2], :scores_size[3]]

            scores = scores.masked_fill(mask == 0, float('-inf'))

        # Apply sparse attention
        attn = self.alpha_entmax(scores)
        attn = self.dropout(attn)

        # Compute weighted sum
        output = torch.matmul(attn, v)
        output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)

        return self.out_linear(output)

def create_mask(seq_len):
    """
    Creates a causal mask for the decoder self-attention.
    Args:
        seq_len: Length of the sequence
    Returns:
        mask: Binary mask where 1 indicates attention is allowed
    """
    mask = torch.triu(torch.ones(seq_len, seq_len), diagonal=1).bool()
    return ~mask  # Invert to get attention mask where 1 means attend, 0 means don't attend

def create_src_mask(src):
    """
    Creates a mask for the encoder self-attention.
    Args:
        src: Source sequence tensor of shape [batch_size, seq_len, ...]
    Returns:
        mask: Binary mask where 1 indicates attention is allowed
    """
    batch_size, seq_len = src.size(0), src.size(1)
    return torch.ones((batch_size, seq_len, seq_len)).to(src.device)

def create_tgt_mask(tgt):
    """
    Creates a causal mask for the decoder self-attention.
    Args:
        tgt: Target sequence tensor of shape [batch_size, seq_len, ...]
    Returns:
        mask: Binary mask where 1 indicates attention is allowed
    """
    batch_size, seq_len = tgt.size(0), tgt.size(1)
    mask = create_mask(seq_len)
    return mask.unsqueeze(0).expand(batch_size, -1, -1).to(tgt.device)

def train_ast(data, seq_length=168, pred_length=24, batch_size=32, epochs=100,
              d_model=256, num_heads=8, num_layers=3, dropout=0.1):

    # Create dataset and dataloader
    dataset = TimeSeriesDataset(data, seq_length, pred_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Initialize models
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = AST(input_dim=1, output_dim=1, d_model=d_model,
               num_heads=num_heads, num_layers=num_layers, dropout=dropout).to(device)
    discriminator = Discriminator(pred_length).to(device)

    # Initialize optimizers
    g_optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=0.0001)

    # Loss functions
    mse_loss = nn.MSELoss()
    bce_loss = nn.BCELoss()

    # Initialize training history
    training_history = {
        'g_loss': [],
        'd_loss': [],
        'g_loss_avg': [],
        'd_loss_avg': []
    }

    print("Training on device:", device)

    for epoch in range(epochs):
        model.train()
        discriminator.train()
        total_g_loss = 0
        total_d_loss = 0

        for batch_idx, (x, y) in enumerate(dataloader):
            batch_size = x.size(0)
            x, y = x.to(device), y.to(device)

            # Train discriminator
            d_optimizer.zero_grad()
            with torch.no_grad():
                fake_seq = model(x, y)
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            d_real = discriminator(y)
            d_fake = discriminator(fake_seq.detach())

            d_real_loss = bce_loss(d_real, real_labels)
            d_fake_loss = bce_loss(d_fake, fake_labels)
            d_loss = d_real_loss + d_fake_loss

            d_loss.backward()
            d_optimizer.step()

            # Train generator
            g_optimizer.zero_grad()
            fake_seq = model(x, y)
            d_fake = discriminator(fake_seq)

            mse = mse_loss(fake_seq, y)
            adversarial_loss = bce_loss(d_fake, real_labels)
            g_loss = mse + 0.1 * adversarial_loss

            g_loss.backward()
            g_optimizer.step()

            # Record batch losses
            training_history['g_loss'].append(g_loss.item())
            training_history['d_loss'].append(d_loss.item())

            total_g_loss += g_loss.item()
            total_d_loss += d_loss.item()

        # Calculate and store average losses for the epoch
        avg_g_loss = total_g_loss / len(dataloader)
        avg_d_loss = total_d_loss / len(dataloader)
        training_history['g_loss_avg'].append(avg_g_loss)
        training_history['d_loss_avg'].append(avg_d_loss)

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], G Loss: {avg_g_loss:.4f}, D Loss: {avg_d_loss:.4f}')

    # Attach training history to the model
    model.training_history = training_history
    discriminator.training_history = training_history

    return model, discriminator


class AST(nn.Module):
    def __init__(self, input_dim=1, output_dim=1, d_model=256, num_heads=8, num_layers=3, dropout=0.1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model

        # Input embedding
        self.input_embedding = nn.Linear(input_dim, d_model)

        # Positional encoding
        self.register_buffer('pos_encoding', self._create_positional_encoding(5000, d_model))

        # Encoder and decoder layers with layer normalization
        self.encoder_layers = nn.ModuleList([
            nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dropout=dropout)
            for _ in range(num_layers)
        ])

        self.decoder_layers = nn.ModuleList([
            nn.TransformerDecoderLayer(d_model=d_model, nhead=num_heads, dropout=dropout)
            for _ in range(num_layers)
        ])

        # Output projection
        self.output_linear = nn.Linear(d_model, output_dim)
        self.training_history = None

    def _create_positional_encoding(self, max_seq_len, d_model):
        pos_encoding = torch.zeros(max_seq_len, d_model)
        position = torch.arange(0, max_seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pos_encoding[:, 0::2] = torch.sin(position * div_term)
        pos_encoding[:, 1::2] = torch.cos(position * div_term)
        return pos_encoding

    def forward(self, src, tgt):
        # Add batch dimension if not present
        if src.dim() == 2:
            src = src.unsqueeze(0)
        if tgt.dim() == 2:
            tgt = tgt.unsqueeze(0)

        # Create masks
        src_mask = None  # Allow attending to all source positions
        tgt_mask = self._generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)

        # Embedding and positional encoding
        src = self.input_embedding(src)
        tgt = self.input_embedding(tgt)

        src = src + self.pos_encoding[:src.size(1)]
        tgt = tgt + self.pos_encoding[:tgt.size(1)]

        # Transpose for transformer input [seq_len, batch, features]
        src = src.transpose(0, 1)
        tgt = tgt.transpose(0, 1)

        # Encoder
        for enc_layer in self.encoder_layers:
            src = enc_layer(src, src_mask)

        # Decoder
        for dec_layer in self.decoder_layers:
            tgt = dec_layer(tgt, src, tgt_mask)

        # Transpose back [batch, seq_len, features]
        output = tgt.transpose(0, 1)

        return self.output_linear(output)

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

class Discriminator(nn.Module):
    def __init__(self, seq_len, hidden_dim=256):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(seq_len, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim // 2, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        batch_size = x.size(0)
        x = x.view(batch_size, -1)  # Flatten the sequence
        return self.model(x)

class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, dropout=0.1):
        super().__init__()
        self.self_attn = SparseMultiHeadAttention(d_model, num_heads, dropout)
        self.feed_forward = nn.Sequential(
            nn.Linear(d_model, d_model * 4),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model * 4, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout(ff_output))
        return x

class TransformerDecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, dropout=0.1):
        super().__init__()
        self.self_attn = SparseMultiHeadAttention(d_model, num_heads, dropout)
        self.cross_attn = SparseMultiHeadAttention(d_model, num_heads, dropout)
        self.feed_forward = nn.Sequential(
            nn.Linear(d_model, d_model * 4),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model * 4, d_model)
        )
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_output, src_mask=None, tgt_mask=None):
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout(attn_output))

        cross_attn_output = self.cross_attn(x, enc_output, enc_output, src_mask)
        x = self.norm2(x + self.dropout(cross_attn_output))

        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout(ff_output))
        return x

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import os

# City coordinates from your thesis
CITIES = {
    'Delhi': (28.6139, 77.2090),
    'Mumbai': (19.0760, 72.8777),
    'Chennai': (13.0827, 80.2707),
    'Kolkata': (22.5726, 88.3639),
    'Bangalore': (12.9716, 77.5946)
}

class MultiCityAnalysis:
    def __init__(self, save_dir='results/'):
        self.save_dir = save_dir
        os.makedirs(save_dir, exist_ok=True)
        os.makedirs(f"{save_dir}/plots", exist_ok=True)
        self.metrics = {}

    def fetch_all_city_data(self):
        """Fetch and process data for all cities"""
        city_data = {}
        for city, (lat, lon) in CITIES.items():
            print(f"Fetching data for {city}...")
            data = fetch_power_data(lat, lon, "20230101", "20231231")
            values = list(data['properties']['parameter']['ALLSKY_SFC_SW_DWN'].values())
            values = [float(x) for x in values if isinstance(x, (int, float))]
            normalized_data = (values - np.mean(values)) / np.std(values)
            city_data[city] = {
                'raw': values,
                'normalized': normalized_data
            }
        return city_data

    def train_models(self, city_data, seq_length=168, pred_length=24,
                    batch_size=32, epochs=100):
        """Train models for each city"""
        models = {}
        training_histories = {}

        for city, data in city_data.items():
            print(f"\nTraining model for {city}...")
            model, discriminator = train_ast(
                data['normalized'],
                seq_length=seq_length,
                pred_length=pred_length,
                batch_size=batch_size,
                epochs=epochs
            )
            models[city] = {
                'generator': model,
                'discriminator': discriminator
            }

            # Store training metrics
            training_histories[city] = {
                'generator_loss': model.training_history['g_loss'],
                'discriminator_loss': model.training_history['d_loss']
            }

        return models, training_histories

    def evaluate_models(self, models, city_data):
        """Evaluate model performance for each city"""
        metrics = {}

        for city, model_dict in models.items():
            print(f"\nEvaluating model for {city}...")
            model = model_dict['generator']
            data = city_data[city]['normalized']

            # Generate predictions
            test_seq = data[-192:]  # Last 8 days
            input_seq = test_seq[:168]  # Use 7 days as input
            true_values = test_seq[168:192]  # Next 24 hours as ground truth

            with torch.no_grad():
                predictions = model(torch.FloatTensor(input_seq).unsqueeze(0))
                predictions = predictions.squeeze().numpy()

            # Calculate metrics
            metrics[city] = {
                'MAE': mean_absolute_error(true_values, predictions),
                'RMSE': np.sqrt(mean_squared_error(true_values, predictions)),
                'MAPE': mean_absolute_percentage_error(true_values, predictions) * 100
            }

        return metrics

    def plot_training_history(self, training_histories):
        """Plot training history for all cities"""
        plt.figure(figsize=(15, 10))

        for city, history in training_histories.items():
            plt.plot(history['generator_loss'], label=f'{city} - Generator')

        plt.title('Training History Across Cities')
        plt.xlabel('Epoch')
        plt.ylabel('Generator Loss')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'{self.save_dir}/plots/training_history.png')
        plt.close()

    def plot_prediction_comparison(self, models, city_data):
        """Plot prediction comparison for all cities"""
        fig, axes = plt.subplots(3, 2, figsize=(20, 25))
        axes = axes.ravel()

        for idx, (city, model_dict) in enumerate(models.items()):
            if idx >= len(axes):
                break

            model = model_dict['generator']
            data = city_data[city]['normalized']
            test_seq = data[-192:]
            input_seq = test_seq[:168]
            true_values = test_seq[168:192]

            with torch.no_grad():
                predictions = model(torch.FloatTensor(input_seq).unsqueeze(0))
                predictions = predictions.squeeze().numpy()

            hours = np.arange(24)
            axes[idx].plot(hours, true_values, 'b-', label='Actual')
            axes[idx].plot(hours, predictions, 'r--', label='Predicted')
            axes[idx].set_title(f'{city} - 24h Forecast')
            axes[idx].set_xlabel('Hour')
            axes[idx].set_ylabel('Normalized GHI')
            axes[idx].legend()
            axes[idx].grid(True)

        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/plots/prediction_comparison.png')
        plt.close()

    def plot_metrics_comparison(self, metrics):
        """Plot performance metrics comparison"""
        metrics_df = pd.DataFrame(metrics).T

        plt.figure(figsize=(15, 8))
        metrics_df.plot(kind='bar')
        plt.title('Performance Metrics Across Cities')
        plt.xlabel('City')
        plt.ylabel('Value')
        plt.legend(title='Metric')
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/plots/metrics_comparison.png')
        plt.close()

    def save_results(self, metrics):
        """Save numerical results to CSV"""
        metrics_df = pd.DataFrame(metrics).T
        metrics_df.to_csv(f'{self.save_dir}/model_metrics.csv')

In [None]:
class GHIExploratoryAnalysis:
    def __init__(self, save_dir='results/eda/'):
        self.save_dir = save_dir
        os.makedirs(save_dir, exist_ok=True)
        # plt.style.use('seaborn')

    def run_complete_eda(self, city_data):
        """Run complete exploratory data analysis"""
        print("\nPerforming Exploratory Data Analysis...")

        # Validate data
        for city, data in city_data.items():
            print(f"\nValidating data for {city}:")
            print(f"Number of raw data points: {len(data['raw'])}")
            print(f"Number of normalized data points: {len(data['normalized'])}")

            # Check for missing or invalid values
            raw_missing = np.sum(np.isnan(data['raw']))
            norm_missing = np.sum(np.isnan(data['normalized']))
            if raw_missing > 0 or norm_missing > 0:
                print(f"Warning: Found {raw_missing} missing raw values and {norm_missing} missing normalized values")

        # Create DataFrames for each city with datetime index
        dfs = {}
        for city, data in city_data.items():
            # Get actual number of data points
            n_points = len(data['raw'])
            # Create date range matching the data length
            dates = pd.date_range(start='2023-01-01', periods=n_points, freq='h')

            # Create DataFrame with matching lengths
            df = pd.DataFrame({
                'GHI': data['raw'][:n_points],
                'GHI_normalized': data['normalized'][:n_points]
            }, index=dates[:n_points])
            dfs[city] = df

        # Generate all EDA plots
        self.plot_daily_patterns(dfs)
        self.plot_monthly_patterns(dfs)
        self.plot_seasonal_patterns(dfs)
        self.plot_city_distributions(dfs)
        self.plot_correlation_heatmap(dfs)
        self.plot_data_statistics(dfs)

        # Print summary statistics
        print("\nSummary Statistics for Each City:")
        summary_stats = {}
        for city, df in dfs.items():
            print(f"\n{city}:")
            stats = df['GHI'].describe()
            print(stats)
            summary_stats[city] = stats

            # Additional metrics
            print(f"\nDaily max average: {df.groupby(df.index.date)['GHI'].max().mean():.2f}")
            print(f"Daily duration of non-zero GHI: {(df['GHI'] > 0).groupby(df.index.date).sum().mean():.1f} hours")

        return dfs, summary_stats

    def plot_daily_patterns(self, dfs):
        """Plot average daily GHI patterns for each city"""
        plt.figure(figsize=(15, 8))

        for city, df in dfs.items():
            # Calculate average GHI for each hour
            daily_pattern = df.groupby(df.index.hour)['GHI'].mean()
            plt.plot(daily_pattern.index, daily_pattern.values, label=city, marker='o')

        plt.title('Average Daily GHI Patterns Across Cities')
        plt.xlabel('Hour of Day')
        plt.ylabel('Average GHI (kW/m²)')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'{self.save_dir}/daily_patterns.png')
        plt.close()

    def plot_monthly_patterns(self, dfs):
        """Plot monthly GHI patterns"""
        fig, axes = plt.subplots(2, 3, figsize=(20, 12))
        axes = axes.ravel()

        for idx, (city, df) in enumerate(dfs.items()):
            if idx >= len(axes):
                break

            monthly_avg = df.groupby(df.index.month)['GHI'].agg(['mean', 'std'])
            monthly_avg['mean'].plot(yerr=monthly_avg['std'],
                                   capsize=5, marker='o', ax=axes[idx])
            axes[idx].set_title(f'{city} - Monthly GHI Pattern')
            axes[idx].set_xlabel('Month')
            axes[idx].set_ylabel('GHI (kW/m²)')
            axes[idx].grid(True)

        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/monthly_patterns.png')
        plt.close()

    def plot_seasonal_patterns(self, dfs):
        """Plot seasonal patterns using boxplots"""
        plt.figure(figsize=(15, 8))

        seasonal_data = []
        for city, df in dfs.items():
            df['Season'] = pd.cut(df.index.month,
                                bins=[0,3,6,9,12],
                                labels=['Winter', 'Spring', 'Summer', 'Fall'])
            for season in ['Winter', 'Spring', 'Summer', 'Fall']:
                seasonal_data.append({
                    'City': city,
                    'Season': season,
                    'GHI': df[df['Season'] == season]['GHI'].mean()
                })

        seasonal_df = pd.DataFrame(seasonal_data)
        sns.boxplot(x='Season', y='GHI', hue='City', data=seasonal_df)

        plt.title('Seasonal GHI Patterns Across Cities')
        plt.ylabel('Average GHI (kW/m²)')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/seasonal_patterns.png')
        plt.close()

    def plot_city_distributions(self, dfs):
        """Plot GHI distributions for each city"""
        fig, axes = plt.subplots(2, 3, figsize=(20, 12))
        axes = axes.ravel()

        for idx, (city, df) in enumerate(dfs.items()):
            if idx >= len(axes):
                break

            sns.histplot(data=df, x='GHI', kde=True, ax=axes[idx])
            axes[idx].set_title(f'{city} - GHI Distribution')
            axes[idx].set_xlabel('GHI (kW/m²)')

            # Add distribution statistics
            stats_text = f'Mean: {df["GHI"].mean():.2f}\n'
            stats_text += f'Std: {df["GHI"].std():.2f}\n'
            stats_text += f'Skew: {df["GHI"].skew():.2f}'
            axes[idx].text(0.95, 0.95, stats_text,
                         transform=axes[idx].transAxes,
                         verticalalignment='top',
                         horizontalalignment='right',
                         bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/ghi_distributions.png')
        plt.close()

    def plot_correlation_heatmap(self, dfs):
        """Plot correlation heatmap between cities"""
        # Create DataFrame with all cities' GHI values
        combined_df = pd.DataFrame({city: df['GHI'] for city, df in dfs.items()})

        plt.figure(figsize=(10, 8))
        sns.heatmap(combined_df.corr(), annot=True, cmap='coolwarm', center=0)
        plt.title('Inter-city GHI Correlation Heatmap')
        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/correlation_heatmap.png')
        plt.close()

    def plot_data_statistics(self, dfs):
        """Plot key statistics for each city"""
        stats = {}
        for city, df in dfs.items():
            stats[city] = {
                'Mean': df['GHI'].mean(),
                'Std': df['GHI'].std(),
                'Max': df['GHI'].max(),
                'Min': df['GHI'].min(),
                'Skewness': df['GHI'].skew(),
                'Kurtosis': df['GHI'].kurtosis()
            }

        stats_df = pd.DataFrame(stats).T

        # Plot statistics
        fig, axes = plt.subplots(2, 3, figsize=(20, 12))
        axes = axes.ravel()

        for idx, column in enumerate(stats_df.columns):
            if idx >= len(axes):
                break

            stats_df[column].plot(kind='bar', ax=axes[idx])
            axes[idx].set_title(f'{column} by City')
            axes[idx].set_ylabel(column)
            plt.xticks(rotation=45)

        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/data_statistics.png')
        plt.close()

        # Save statistics to CSV
        stats_df.to_csv(f'{self.save_dir}/data_statistics.csv')

        return stats_df

def extend_main_with_eda():
    try:
        # Initialize analyzers
        analyzer = MultiCityAnalysis()
        eda = GHIExploratoryAnalysis()

        # Fetch data
        city_data = analyzer.fetch_all_city_data()

        # Perform EDA
        print("\nPerforming Exploratory Data Analysis...")
        dfs, summary_stats = eda.run_complete_eda(city_data)

        # Additional analysis
        print("\nCross-city Analysis:")
        for city in city_data.keys():
            daily_max = dfs[city].groupby(dfs[city].index.date)['GHI'].max()
            daily_sum = dfs[city].groupby(dfs[city].index.date)['GHI'].sum()
            print(f"\n{city}:")
            print(f"Average daily maximum GHI: {daily_max.mean():.2f} kW/m²")
            print(f"Average daily total GHI: {daily_sum.mean():.2f} kW/m²/day")

        # Train models with the processed data
        models, training_histories = analyzer.train_models(city_data)

        return city_data, dfs, summary_stats, models, training_histories

    except Exception as e:
        print(f"\nError during analysis: {str(e)}")
        raise

In [None]:
class GHIDataPreprocessor:
    def __init__(self):
        pass

    def preprocess_data(self, city_data):
        """Comprehensive data preprocessing pipeline"""
        print("\nPerforming data preprocessing...")
        processed_data = {}

        for city, data in city_data.items():
            print(f"\nProcessing {city} data:")

            # Convert to DataFrame with datetime index
            dates = pd.date_range(start='2023-01-01', periods=len(data['raw']), freq='h')
            df = pd.DataFrame({
                'GHI': data['raw']
            }, index=dates)

            # 1. Handle missing values
            df = self.handle_missing_values(df)

            # 2. Remove physically impossible values
            df = self.remove_impossible_values(df)

            # 3. Apply temporal consistency check
            df = self.ensure_temporal_consistency(df)

            # 4. Normalize data
            normalized_data = self.normalize_data(df['GHI'].values)

            # Store processed data
            processed_data[city] = {
                'raw': df['GHI'].values,
                'normalized': normalized_data,
                'df': df
            }

            # Print preprocessing summary
            self.print_preprocessing_summary(df, city)

        return processed_data

    def handle_missing_values(self, df):
        """Handle missing values using appropriate interpolation methods"""
        # Count initial missing values
        initial_missing = df['GHI'].isna().sum()

        if initial_missing > 0:
            print(f"Found {initial_missing} missing values")

            # For short gaps (≤3 hours), use linear interpolation
            df['GHI'] = df['GHI'].interpolate(method='linear', limit=3)

            # For medium gaps (3-6 hours), use spline interpolation
            remaining_missing = df['GHI'].isna().sum()
            if remaining_missing > 0:
                df['GHI'] = df['GHI'].interpolate(method='spline', order=3, limit=6)

            # For longer gaps, use pattern-based filling
            remaining_missing = df['GHI'].isna().sum()
            if remaining_missing > 0:
                df = self.pattern_based_filling(df)

            final_missing = df['GHI'].isna().sum()
            print(f"Filled {initial_missing - final_missing} missing values")

        return df

    def remove_impossible_values(self, df):
        """Remove physically impossible GHI values"""
        # GHI cannot be negative
        impossible_low = (df['GHI'] < 0).sum()
        if impossible_low > 0:
            print(f"Found {impossible_low} negative GHI values")
            df.loc[df['GHI'] < 0, 'GHI'] = 0

        # GHI cannot exceed solar constant (~1361 W/m²)
        impossible_high = (df['GHI'] > 1361).sum()
        if impossible_high > 0:
            print(f"Found {impossible_high} impossibly high GHI values")
            df.loc[df['GHI'] > 1361, 'GHI'] = 1361

        return df

    def ensure_temporal_consistency(self, df):
        """Ensure temporal consistency in GHI values"""
        # Calculate rate of change
        df['GHI_diff'] = df['GHI'].diff()

        # Flag sudden changes (more than 50% change in 1 hour)
        sudden_changes = (abs(df['GHI_diff']) > 0.5 * df['GHI'].shift()).sum()
        if sudden_changes > 0:
            print(f"Found {sudden_changes} sudden changes in GHI")

            # Smooth out extreme changes using rolling mean
            mask = abs(df['GHI_diff']) > 0.5 * df['GHI'].shift()
            df.loc[mask, 'GHI'] = df['GHI'].rolling(window=3, center=True).mean()

        df.drop('GHI_diff', axis=1, inplace=True)
        return df

    def pattern_based_filling(self, df):
        """Fill missing values using pattern matching from similar days"""
        for idx in df[df['GHI'].isna()].index:
            # Find similar time points from other days
            hour = idx.hour
            similar_hours = df[df.index.hour == hour]['GHI'].dropna()

            if len(similar_hours) > 0:
                # Use median of similar hours
                df.loc[idx, 'GHI'] = similar_hours.median()

        return df

    def normalize_data(self, data):
        """Normalize GHI data using standardization"""
        return (data - np.mean(data)) / np.std(data)

    def print_preprocessing_summary(self, df, city):
        """Print summary of preprocessing results"""
        print(f"\nPreprocessing summary for {city}:")
        print(f"Total data points: {len(df)}")
        print(f"Final missing values: {df['GHI'].isna().sum()}")
        print(f"Data range: {df['GHI'].min():.2f} to {df['GHI'].max():.2f}")
        print(f"Mean GHI: {df['GHI'].mean():.2f}")
        print(f"Std GHI: {df['GHI'].std():.2f}")

def extend_main():
    # Get results from main analysis
    analyzer = MultiCityAnalysis()

    # Fetch raw data
    raw_city_data = analyzer.fetch_all_city_data()

    # Preprocess data
    preprocessor = GHIDataPreprocessor()
    city_data = preprocessor.preprocess_data(raw_city_data)

    # Perform EDA
    eda_analyzer = GHIExploratoryAnalysis()
    print("\nPerforming Exploratory Data Analysis...")
    dfs, summary_stats = eda_analyzer.run_complete_eda(city_data)

    # Additional analysis
    print("\nCross-city Analysis:")
    for city in city_data.keys():
        daily_max = dfs[city].groupby(dfs[city].index.date)['GHI'].max()
        daily_sum = dfs[city].groupby(dfs[city].index.date)['GHI'].sum()
        print(f"\n{city}:")
        print(f"Average daily maximum GHI: {daily_max.mean():.2f} kW/m²")
        print(f"Average daily total GHI: {daily_sum.mean():.2f} kW/m²/day")

    # Train models
    models, training_histories = analyzer.train_models(city_data)
    metrics = analyzer.evaluate_models(models, city_data)

    # Generate advanced visualizations
    advanced_viz = AdvancedVisualizations()
    advanced_viz.plot_attention_patterns(models, city_data)
    advanced_viz.plot_temporal_stability(models, city_data)
    advanced_viz.plot_error_distribution(models, city_data)

    # Create performance summary
    summary = advanced_viz.create_performance_summary(metrics, training_histories)

    return summary

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import pandas as pd

class AdvancedVisualizations:
    def __init__(self, save_dir='results/advanced_plots/'):
        self.save_dir = save_dir
        os.makedirs(save_dir, exist_ok=True)

    def plot_attention_patterns(self, models, city_data):
        """Visualize attention patterns for each city"""
        fig, axes = plt.subplots(3, 2, figsize=(20, 25))
        axes = axes.ravel()

        for idx, (city, model_dict) in enumerate(models.items()):
            if idx >= len(axes):
                break

            model = model_dict['generator']
            # Extract attention weights from the model's last layer
            with torch.no_grad():
                attention_weights = model.get_attention_weights(
                    torch.FloatTensor(city_data[city]['normalized'][-168:]).unsqueeze(0)
                )

            sns.heatmap(attention_weights[0].numpy(), ax=axes[idx],
                       cmap='viridis', xticklabels=24, yticklabels=24)
            axes[idx].set_title(f'{city} - Attention Patterns')
            axes[idx].set_xlabel('Input Time Steps')
            axes[idx].set_ylabel('Output Time Steps')

        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/attention_patterns.png')
        plt.close()

    def plot_temporal_stability(self, models, city_data):
        """Analyze temporal stability across different horizons"""
        horizons = [1, 6, 12, 24]
        stability_scores = {city: [] for city in models.keys()}

        for city, model_dict in models.items():
            model = model_dict['generator']
            data = city_data[city]['normalized']

            for horizon in horizons:
                predictions = []
                true_values = []

                # Generate predictions for different windows
                for i in range(0, len(data)-168-horizon, 24):
                    input_seq = data[i:i+168]
                    with torch.no_grad():
                        pred = model(torch.FloatTensor(input_seq).unsqueeze(0))
                        predictions.append(pred[:, :horizon].numpy())
                    true_values.append(data[i+168:i+168+horizon])

                # Calculate stability score
                prediction_std = np.std([p.std() for p in predictions])
                truth_std = np.std([t.std() for t in true_values])
                stability_scores[city].append(1 - prediction_std/truth_std)

        # Plot stability scores
        plt.figure(figsize=(12, 8))
        for city, scores in stability_scores.items():
            plt.plot(horizons, scores, 'o-', label=city)

        plt.title('Temporal Stability Analysis')
        plt.xlabel('Forecast Horizon (hours)')
        plt.ylabel('Stability Score')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'{self.save_dir}/temporal_stability.png')
        plt.close()

    def plot_error_distribution(self, models, city_data):
        """Analyze error distributions"""
        fig, axes = plt.subplots(3, 2, figsize=(20, 25))
        axes = axes.ravel()

        for idx, (city, model_dict) in enumerate(models.items()):
            if idx >= len(axes):
                break

            model = model_dict['generator']
            data = city_data[city]['normalized']

            # Generate predictions for the last month
            input_seq = data[-720:-24]  # Last month minus last day
            true_values = data[-24:]    # Last day

            with torch.no_grad():
                predictions = model(torch.FloatTensor(input_seq).unsqueeze(0))
                predictions = predictions.squeeze().numpy()

            # Calculate errors
            errors = predictions - true_values

            # Plot error distribution
            sns.histplot(errors, kde=True, ax=axes[idx])
            axes[idx].set_title(f'{city} - Error Distribution')
            axes[idx].set_xlabel('Prediction Error')
            axes[idx].set_ylabel('Frequency')

            # Add normal distribution fit
            mu, std = stats.norm.fit(errors)
            x = np.linspace(min(errors), max(errors), 100)
            p = stats.norm.pdf(x, mu, std)
            axes[idx].plot(x, p * len(errors) * (x[1]-x[0]), 'r-', lw=2,
                         label=f'Normal: μ={mu:.2f}, σ={std:.2f}')
            axes[idx].legend()

        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/error_distribution.png')
        plt.close()

    def create_performance_summary(self, metrics, training_histories):
        """Create comprehensive performance summary"""
        # Prepare summary data
        summary = pd.DataFrame(metrics).T
        summary['Convergence_Speed'] = [
            len(hist['generator_loss']) for hist in training_histories.values()
        ]
        summary['Final_Training_Loss'] = [
            hist['generator_loss'][-1] for hist in training_histories.values()
        ]

        # Plot summary metrics
        plt.figure(figsize=(15, 10))
        summary.plot(kind='bar', subplots=True, layout=(3,2), figsize=(15, 20))
        plt.tight_layout()
        plt.savefig(f'{self.save_dir}/performance_summary.png')
        plt.close()

        # Save summary to CSV
        summary.to_csv(f'{self.save_dir}/performance_summary.csv')

        return summary

def extend_main():
    # Get results from main analysis
    analyzer = MultiCityAnalysis()

    # Fetch raw data
    raw_city_data = analyzer.fetch_all_city_data()

    # Preprocess data
    preprocessor = GHIDataPreprocessor()
    city_data = preprocessor.preprocess_data(raw_city_data)
    eda_analyzer = GHIExploratoryAnalysis()
    print("\nPerforming Exploratory Data Analysis...")
    dfs, summary_stats = eda_analyzer.run_complete_eda(city_data)

    # Additional analysis
    print("\nCross-city Analysis:")
    for city in city_data.keys():
        daily_max = dfs[city].groupby(dfs[city].index.date)['GHI'].max()
        daily_sum = dfs[city].groupby(dfs[city].index.date)['GHI'].sum()
        print(f"\n{city}:")
        print(f"Average daily maximum GHI: {daily_max.mean():.2f} kW/m²")
        print(f"Average daily total GHI: {daily_sum.mean():.2f} kW/m²/day")
    models, training_histories = analyzer.train_models(city_data)
    training_histories = {
        city: {
            'generator_loss': models[city]['generator'].training_history['g_loss_avg'],
            'discriminator_loss': models[city]['generator'].training_history['d_loss_avg']
        } for city in models.keys()
    }
    metrics = analyzer.evaluate_models(models, city_data)

    # Initialize advanced visualizations
    advanced_viz = AdvancedVisualizations()

    # Generate advanced visualizations
    advanced_viz.plot_attention_patterns(models, city_data)
    advanced_viz.plot_temporal_stability(models, city_data)
    advanced_viz.plot_error_distribution(models, city_data)

    # Create performance summary
    summary = advanced_viz.create_performance_summary(metrics, training_histories)

    return summary

In [None]:
import os
import json
import torch
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

class ThesisExperimentManager:
    def __init__(self, base_dir='thesis_results/'):
        """Initialize experiment manager with directory structure"""
        self.base_dir = Path(base_dir)
        self.dirs = self._create_directory_structure()
        self.experiment_metadata = {
            'timestamp': datetime.now().strftime('%Y%m%d_%H%M%S'),
            'cities': {},
            'model_params': {},
            'training_params': {},
            'results': {}
        }

    def _create_directory_structure(self):
        """Create organized directory structure for thesis results"""
        dirs = {
            'data': self.base_dir / 'data',
            'models': self.base_dir / 'models',
            'plots': self.base_dir / 'plots',
            'metrics': self.base_dir / 'metrics',
            'attention': self.base_dir / 'attention_analysis'
        }

        for dir_path in dirs.values():
            dir_path.mkdir(parents=True, exist_ok=True)

        return dirs

    def save_raw_data(self, city_data):
        """Save raw data for each city"""
        for city, data in city_data.items():
            city_path = self.dirs['data'] / f'{city.lower()}'
            city_path.mkdir(exist_ok=True)

            # Save raw and normalized data
            np.save(city_path / 'raw_data.npy', data['raw'])
            np.save(city_path / 'normalized_data.npy', data['normalized'])

            # Save metadata
            metadata = {
                'data_points': len(data['raw']),
                'mean': float(np.mean(data['raw'])),
                'std': float(np.std(data['raw'])),
                'max': float(np.max(data['raw'])),
                'min': float(np.min(data['raw']))
            }
            self.experiment_metadata['cities'][city] = metadata

            with open(city_path / 'metadata.json', 'w') as f:
                json.dump(metadata, f, indent=4)

    def save_model(self, city, model_dict):
        """Save model and training history for each city"""
        city_path = self.dirs['models'] / f'{city.lower()}'
        city_path.mkdir(exist_ok=True)

        # Save model state
        torch.save(model_dict['generator'].state_dict(),
                  city_path / 'generator.pth')
        torch.save(model_dict['discriminator'].state_dict(),
                  city_path / 'discriminator.pth')

        # Save training history
        history = {
            'generator_loss': model_dict['generator'].training_history['g_loss_avg'],
            'discriminator_loss': model_dict['generator'].training_history['d_loss_avg']
        }
        with open(city_path / 'training_history.json', 'w') as f:
            json.dump(history, f, indent=4)

    def save_metrics(self, metrics):
        """Save evaluation metrics"""
        metrics_df = pd.DataFrame(metrics).T
        metrics_df.to_csv(self.dirs['metrics'] / 'model_metrics.csv')
        self.experiment_metadata['results']['metrics'] = metrics

        # Create metrics visualization
        plt.figure(figsize=(12, 8))
        metrics_df.plot(kind='bar')
        plt.title('Model Performance Metrics Across Cities')
        plt.tight_layout()
        plt.savefig(self.dirs['plots'] / 'metrics_comparison.png')
        plt.close()

    def save_attention_weights(self, city, attention_weights):
        """Save attention weight matrices"""
        attention_path = self.dirs['attention'] / f'{city.lower()}'
        attention_path.mkdir(exist_ok=True)

        np.save(attention_path / 'attention_weights.npy',
                attention_weights.cpu().numpy())

    def save_experiment_metadata(self):
        """Save complete experiment metadata"""
        with open(self.base_dir / 'experiment_metadata.json', 'w') as f:
            json.dump(self.experiment_metadata, f, indent=4)

def run_thesis_experiment():
    """Main experiment execution function"""
    # Initialize experiment manager
    experiment = ThesisExperimentManager()

    # Initialize analyzers
    analyzer = MultiCityAnalysis()
    preprocessor = GHIDataPreprocessor()
    eda_analyzer = GHIExploratoryAnalysis()

    # Model parameters
    model_params = {
        'seq_length': 168,
        'pred_length': 24,
        'batch_size': 32,
        'epochs': 100,
        'd_model': 256,
        'num_heads': 8,
        'num_layers': 3,
        'dropout': 0.1
    }
    experiment.experiment_metadata['model_params'] = model_params

    try:
        # 1. Data Collection and Preprocessing
        print("\nFetching and preprocessing data...")
        raw_city_data = analyzer.fetch_all_city_data()
        city_data = preprocessor.preprocess_data(raw_city_data)
        experiment.save_raw_data(city_data)

        # 2. Exploratory Data Analysis
        print("\nPerforming EDA...")
        dfs, summary_stats = eda_analyzer.run_complete_eda(city_data)

        # 3. Model Training
        print("\nTraining models...")
        models, training_histories = analyzer.train_models(
            city_data,
            seq_length=model_params['seq_length'],
            pred_length=model_params['pred_length'],
            batch_size=model_params['batch_size'],
            epochs=model_params['epochs']
        )

        # Save models and training histories
        for city, model_dict in models.items():
            experiment.save_model(city, model_dict)

            # Extract and save attention weights
            with torch.no_grad():
                attention_weights = model_dict['generator'].get_attention_weights(
                    torch.FloatTensor(city_data[city]['normalized'][-168:]).unsqueeze(0)
                )
                experiment.save_attention_weights(city, attention_weights)

        # 4. Model Evaluation
        print("\nEvaluating models...")
        metrics = analyzer.evaluate_models(models, city_data)
        experiment.save_metrics(metrics)

        # 5. Generate Advanced Visualizations
        print("\nGenerating visualizations...")
        advanced_viz = AdvancedVisualizations()
        advanced_viz.plot_attention_patterns(models, city_data)
        advanced_viz.plot_temporal_stability(models, city_data)
        advanced_viz.plot_error_distribution(models, city_data)

        # 6. Save Final Experiment Metadata
        experiment.save_experiment_metadata()

        print("\nExperiment completed successfully!")
        return experiment.experiment_metadata

    except Exception as e:
        print(f"\nError during experiment: {str(e)}")
        raise

if __name__ == "__main__":
    metadata = run_thesis_experiment()
    print("\nExperiment Summary:")
    print(json.dumps(metadata, indent=2))


Fetching and preprocessing data...
Fetching data for Delhi...
Fetching data for Bangalore...

Performing data preprocessing...

Processing Delhi data:
Found 2319 sudden changes in GHI

Preprocessing summary for Delhi:
Total data points: 8760
Final missing values: 0
Data range: 0.00 to 1021.65
Mean GHI: 196.77
Std GHI: 265.71

Processing Bangalore data:
Found 2263 sudden changes in GHI

Preprocessing summary for Bangalore:
Total data points: 8760
Final missing values: 0
Data range: 0.00 to 1078.40
Mean GHI: 231.03
Std GHI: 300.55

Performing EDA...

Performing Exploratory Data Analysis...

Validating data for Delhi:
Number of raw data points: 8760
Number of normalized data points: 8760

Validating data for Bangalore:
Number of raw data points: 8760
Number of normalized data points: 8760

Summary Statistics for Each City:

Delhi:
count    8760.000000
mean      196.772393
std       265.707036
min         0.000000
25%         0.000000
50%        34.071667
75%       362.407500
max      102

AttributeError: 'AST' object has no attribute 'get_attention_weights'