# Imports and Setup

In [1]:
# Section 1: Imports and Setup
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
%matplotlib inline
import time
import statsmodels.api as sm
import os
# import os
import json
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import keras.backend as K
from keras.layers import Layer
from keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau, ModelCheckpoint
import tensorflow as tf
from colorama import init, Fore, Back, Style
from keras.regularizers import l1, l2
from tensorflow.keras.optimizers import RMSprop, Adam, SGD
from tensorflow.keras.layers import ( Dense, Dropout, GRU,Bidirectional, Input)
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import Sequence
from keras.callbacks import ModelCheckpoint

In [2]:
# Available foreground colors for logging
FORES = [Fore.RED, Fore.GREEN, Fore.YELLOW, Fore.BLUE, Fore.MAGENTA, Fore.CYAN, Fore.WHITE]

In [3]:
# Hyperparameter configurations
# model_configs = {'units_1': [128],'units_2': [32],'dropout_rate': [0.1],'learning_rate': [0.001],'regularization': [0.001]}
model_configs = {'units_1': [64, 128],'units_2': [32, 64],'dropout_rate': [0.1, 0.2],'learning_rate': [0.001, 0.01],'regularization': [0.001, 0.01]}


In [4]:
# Custom Dataset Class
class MyCustomDataset(Sequence):
    def __init__(self, data, labels, batch_size=32, **kwargs):
        super().__init__(**kwargs)  # Call the parent constructor with kwargs
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.data))

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))

    def __getitem__(self, index):
        # Generate one batch of data
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        batch_data = self.data[batch_indexes]
        batch_labels = self.labels[batch_indexes]
        return batch_data, batch_labels

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if needed
        np.random.shuffle(self.indexes)

In [5]:
# Global parameters
MODEL_NAME = 'BiGru_Optimized'
N_EPOCHS = 300
STEPS_PER_EPOCH = 320
N_INPUT = 24
N_FEATURES = 1
BATCH_SIZE = 1

In [6]:
# Load data paths
dh = glob.glob('../DataSets/*/*/H*_INDEX_*.csv')
print(f"Found {len(dh)} data files")

# Create initial data list
li = []
for file_name in dh:
    df = pd.read_csv(file_name)
    li.append(df)
dh

Found 17 data files


['../DataSets\\GUJARAT\\ANKLESHWAR\\H_Ankl_1_2_19-3_12_22-_41_ (copy)_INDEX_Mean.csv',
 '../DataSets\\HARYANA\\AMBALA\\H_Amba_1_1_19-2_12_22-_29_ (copy)_INDEX_Median.csv',
 '../DataSets\\HARYANA\\CHARKHI_DADRI\\H_Char_1_3_20-2_12_22-_58_ (copy)_INDEX_Mean.csv',
 '../DataSets\\HARYANA\\DHARUHERA\\H_Dhar_1_1_19-2_12_22-_43_ (copy)_INDEX_Mean.csv',
 '../DataSets\\HARYANA\\FATEHABAD\\H_Fate_1_1_19-2_12_22-_32_ (copy)_INDEX_Median.csv',
 '../DataSets\\HARYANA\\HISAR\\H_Hisa_1_1_19-2_12_22-_10_ (copy)_INDEX_Mean.csv',
 '../DataSets\\HARYANA\\JIND\\H_Jind_1_1_19-2_12_22-_56_ (copy)_INDEX_Median.csv',
 '../DataSets\\HARYANA\\KURUKSHETRA\\H_Kuru_1_1_19-2_12_12-_48_ (copy)_INDEX_Mean.csv',
 '../DataSets\\HARYANA\\SONIPAT\\H_Soni_1_1_19-2_12_22-_21_ (copy)_INDEX_Mean.csv',
 '../DataSets\\HARYANA\\YAMUNA_NAGAR\\H_Yamu_1_1_19-2_12_22-_30_ (copy)_INDEX_Mean.csv',
 '../DataSets\\MADHYA_PRADESH\\SINGRAULI\\H_Sing_1_12_17-2_12_22-_48_ (copy)_INDEX_Mean.csv',
 '../DataSets\\PUNJAB\\LUDHIANA\\H_Ludh_1_5_

# Data Processing Functions

In [7]:
# Section 2: Data Processing Functions

def process_dataset(df, data_path):
    """Process a single dataset with proper path handling"""
    # Create output directory path
    dataso = 'Res_Data/' + data_path.split('/')[2] + '/' + data_path.split('/')[3]
    
    # Convert date and set index
    df['DATE'] = pd.to_datetime(df['DATE'])
    df = df.set_index('DATE')
    
    # Decompose time series
    decomposition = sm.tsa.seasonal_decompose(df['PM2.5'], model='additive')
    
    # Create DataFrame with components
    df1 = pd.DataFrame()
    df1['Date'] = df.index
    df1['Seasonality'] = decomposition.seasonal.values
    df1['Trend'] = decomposition.trend.values
    df1['Noise'] = decomposition.resid.values
    df1['Original'] = df.iloc[:,-1:].values
    
    # Process dataframe
    df1 = df1.iloc[12:-12,:]
    df1 = df1.reset_index()
    df1 = df1.iloc[:,1:]
    df1 = df1.set_index('Date')
    
    # Create directory if it doesn't exist
    os.makedirs(dataso, exist_ok=True)
    
    # Save decomposed data
    df1.to_csv(dataso + '/' + data_path.split('/')[4])
    
    return df1, dataso


In [8]:
def prepare_component_data(df, component_name, dataso):
    """Prepare data for a specific component, checking for existing CSV files."""
    # Define the path for the component CSV file
    component_file_path = os.path.join(dataso, f'{component_name}.csv')
    
    # Check if the component CSV file already exists
    if os.path.exists(component_file_path):
        print(f"Loading existing data for {component_name} from {component_file_path}")
        component_df = pd.read_csv(component_file_path, index_col=0)
    else:
        print(f"Creating new data for {component_name}...")
        # Select appropriate component data
        if component_name == 'Seasonality':
            component_df = df.iloc[:, :1]
        elif component_name == 'Trend':
            component_df = df.iloc[:, 1:2]
        elif component_name == 'Noise':
            component_df = df.iloc[:, 2:3]
        else:  # Original
            component_df = df.iloc[:, 3:4]
        
        # Save component data
        component_df.to_csv(component_file_path)
    
    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(component_df.values)
    
    # Split data
    train, test = train_test_split(scaled, test_size=0.30, shuffle=False)
    train_target = train[:]
    test_target = test[:]
    
    # Create generators
    train_generator = TimeseriesGenerator(
        train, train_target, 
        length=N_INPUT, 
        batch_size=BATCH_SIZE
    )
    test_generator = TimeseriesGenerator(
        test, test_target, 
        length=N_INPUT, 
        batch_size=BATCH_SIZE
    )
    
    return train_generator, test_generator, scaler, train, test

In [9]:
# Custom Attention Layer
class attention(Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)
    
    def build(self,input_shape):
        self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1],1), initializer='random_normal', trainable=True)
        self.b = self.add_weight(name='attention_bias', shape=(input_shape[1],1), initializer='zeros', trainable=True)        
        super(attention, self).build(input_shape)
    
    def call(self,x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x,self.W)+self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)   
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context


In [10]:
# def simple_grid_search(train_generator, test_generator, configs):
#     """Perform manual grid search for hyperparameters"""
#     best_val_loss = float('inf')
#     best_params = None
#     best_model = None
    
#     # Try different combinations
#     for units_1 in configs['units_1']:
#         for units_2 in configs['units_2']:
#             for dropout in configs['dropout_rate']:
#                 for lr in configs['learning_rate']:
#                     for reg in configs['regularization']:
#                         print(f"\nTrying parameters: units1={units_1}, units2={units_2}, " f"dropout={dropout}, lr={lr}, reg={reg}")
                        
#                         # Create and train model
#                         model = create_optimized_model(units_1=units_1,units_2=units_2,dropout_rate=dropout,learning_rate=lr,regularization=reg)
                        
#                         # Early stopping
#                         es = EarlyStopping(monitor='val_loss', mode='min', patience=5, verbose=0)
                        
#                         # Train for fewer epochs during search
#                         history = model.fit(train_generator,validation_data=test_generator,epochs=50, steps_per_epoch=STEPS_PER_EPOCH,shuffle=False,callbacks=[es],verbose=0)
                        
#                         val_loss = min(history.history['val_loss'])
                        
#                         if val_loss < best_val_loss:
#                             best_val_loss = val_loss
#                             best_params = {'units_1': units_1,'units_2': units_2,'dropout_rate': dropout,'learning_rate': lr,'regularization': reg}
#                             best_model = model
                            
#                         print(f"Val Loss: {val_loss:.4f}")
                        
#     return best_params, best_model, best_val_loss






# def simple_grid_search(train_generator, test_generator, configs, model_dir, component):
#     """Perform manual grid search for hyperparameters"""
#     best_val_loss = float('inf')
#     best_params = None
#     best_model = None
    
#     # Check if a model already exists
#     model_path = f'{model_dir}/best_model_{component}.keras'
#     if os.path.exists(model_path):
#         print(f"Loading existing model for {component} from {model_path}")
#         best_model = tf.keras.models.load_model(model_path)
#         best_val_loss = float('inf')  # Set to inf to ensure it trains
#         return best_params, best_model, best_val_loss
    
#     # Try different combinations
#     for units_1 in configs['units_1']:
#         for units_2 in configs['units_2']:
#             for dropout in configs['dropout_rate']:
#                 for lr in configs['learning_rate']:
#                     for reg in configs['regularization']:
#                         print(f"\nTrying parameters: units1={units_1}, units2={units_2}, " f"dropout={dropout}, lr={lr}, reg={reg}")
                        
#                         # Create and train model
#                         model = create_optimized_model(units_1=units_1,units_2=units_2,dropout_rate=dropout,learning_rate=lr,regularization=reg)
                        
#                         # Early stopping
#                         es = EarlyStopping(monitor='val_loss', mode='min', patience=5, verbose=0)
                        
#                         # Train for fewer epochs during search
#                         history = model.fit(train_generator,validation_data=test_generator,epochs=50, steps_per_epoch=STEPS_PER_EPOCH,shuffle=False,callbacks=[es],verbose=0)
                        
#                         val_loss = min(history.history['val_loss'])
                        
#                         if val_loss < best_val_loss:
#                             best_val_loss = val_loss
#                             best_params = {'units_1': units_1,'units_2': units_2,'dropout_rate': dropout,'learning_rate': lr,'regularization': reg}
#                             best_model = model
                            
#                         print(f"Val Loss: {val_loss:.4f}")
                        
#     return best_params, best_model, best_val_loss





def simple_grid_search(train_generator, test_generator, configs, model_dir, component):
    """Perform manual grid search for hyperparameters"""
    best_val_loss = float('inf')
    best_params = None
    best_model = None
    
    # Check if a model already exists
    model_path = f'{model_dir}/best_model_{component}.keras'
    if os.path.exists(model_path):
        print(f"Loading existing model for {component} from {model_path}")
        best_model = tf.keras.models.load_model(model_path)
        # Return None for best_params since we are not performing a search
        return None, best_model, best_val_loss
    
    # Try different combinations
    for units_1 in configs['units_1']:
        for units_2 in configs['units_2']:
            for dropout in configs['dropout_rate']:
                for lr in configs['learning_rate']:
                    for reg in configs['regularization']:
                        print(f"\nTrying parameters: units1={units_1}, units2={units_2}, " f"dropout={dropout}, lr={lr}, reg={reg}")
                        
                        # Create and train model
                        model = create_optimized_model(units_1=units_1,units_2=units_2,dropout_rate=dropout,learning_rate=lr,regularization=reg)
                        
                        # Early stopping
                        es = EarlyStopping(monitor='val_loss', mode='min', patience=5, verbose=0)
                        
                        # Train for fewer epochs during search
                        history = model.fit(train_generator, validation_data=test_generator, epochs=50, steps_per_epoch=STEPS_PER_EPOCH, shuffle=False, callbacks=[es], verbose=0)
                        
                        val_loss = min(history.history['val_loss'])
                        
                        if val_loss < best_val_loss:
                            best_val_loss = val_loss
                            best_params = {'units_1': units_1, 'units_2': units_2, 'dropout_rate': dropout, 'learning_rate': lr, 'regularization': reg}
                            best_model = model
                            
                        print(f"Val Loss: {val_loss:.4f}")
    
    # print(f"Best parameters: {best_params}, Best validation loss: {best_val_loss:.4f}")  # Debugging line
    print(f"Best parameters: {best_params}, Best validation loss: {best_val_loss:.4f}")  # Debugging line
    return best_params, best_model, best_val_loss

# Model Definition and Architecture

In [11]:
# Section 3: Model Definition and Architecture

def create_optimized_model(units_1=128, units_2=64, dropout_rate=0.2, 
                         learning_rate=0.001, regularization=0.001):
    """Create the optimized BiGRU model with specified hyperparameters"""
    model = Sequential()
    
    # Input layer
    model.add(Input(shape=(N_INPUT, N_FEATURES)))  # Define input shape here

    # First Bidirectional GRU Layer
    model.add(Bidirectional(GRU(units=units_1, return_sequences=True, 
                                 kernel_regularizer=l1(regularization))))
    model.add(Dropout(dropout_rate))
    
    # Second Bidirectional GRU Layer
    model.add(Bidirectional(GRU(units=units_2, return_sequences=False, 
                                 kernel_regularizer=l1(regularization))))
    model.add(Dropout(dropout_rate))
    
    # Dense layers for final prediction
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    
    # Compile model
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    
    return model

In [12]:
def create_callbacks(model_dir, component):
    """Create callbacks for model training"""
    callbacks = [
        # Early Stopping
        EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=12),
        
        # Model Checkpoint
        ModelCheckpoint(filepath=f'{model_dir}/best_model_{component}.keras',monitor='val_loss',mode='min',save_best_only=True,verbose=1),
        
        # Reduce Learning Rate
        ReduceLROnPlateau(monitor='val_loss',factor=0.2,patience=5,min_lr=0.0001,verbose=1)
    ]
    
    return callbacks


In [13]:
def process_predictions(prediction, pre_train, test, train, n_input, scaler):
    """Process model predictions"""
    # Process test predictions
    test1 = test[n_input:,:]
    prediction1 = np.concatenate((prediction, test1), axis=1) if prediction.size > 0 and test1.size > 0 else np.array([])
    rescaled_prediction = scaler.inverse_transform(prediction1) if prediction1.size > 0 else np.array([])
    rescaled_prediction_test = rescaled_prediction[:,0] if rescaled_prediction.size > 0 else np.array([])
    
    # Process train predictions
    train1 = train[n_input:,:]
    pre_train1 = np.concatenate((pre_train, train1), axis=1) if pre_train.size > 0 and train1.size > 0 else np.array([])
    rescaled_prediction_trai = scaler.inverse_transform(pre_train1) if pre_train1.size > 0 else np.array([])
    rescaled_prediction_train = rescaled_prediction_trai[:,0] if rescaled_prediction_trai.size > 0 else np.array([])
    
    return rescaled_prediction_test, rescaled_prediction_train


In [14]:
# def save_predictions(rescaled_prediction_test, rescaled_prediction_train, 
#                     component, model_dir):
#     """Save model predictions"""
#     # Create DataFrame for real and predicted values
#     if component == 'Original':
#         test_pr = pd.DataFrame(rescaled_prediction_test, columns=['Predicted']) if rescaled_prediction_test.size > 0 else pd.DataFrame()
#         train_pr = pd.DataFrame(rescaled_prediction_train, columns=['Predicted']) if rescaled_prediction_train.size > 0 else pd.DataFrame()
#         # Add real data column
#         test_pr['Real'] = test[:, 0]  # Assuming the first column is the real data
#         train_pr['Real'] = train[:, 0]  # Assuming the first column is the real data
#         test_pr['Date'] = pd.date_range(start='2021-01-01', periods=len(test_pr))  # Replace with actual date range
#         train_pr['Date'] = pd.date_range(start='2021-01-01', periods=len(train_pr))  # Replace with actual date range
#     else:
#         # Load existing predictions if available
#         test_pr = pd.read_csv(f'{model_dir}/test_pre.csv', index_col=0) \
#                  if os.path.exists(f'{model_dir}/test_pre.csv') \
#                  else pd.DataFrame()
#         train_pr = pd.read_csv(f'{model_dir}/train_pre.csv', index_col=0) \
#                   if os.path.exists(f'{model_dir}/train_pre.csv') \
#                   else pd.DataFrame()
        
#         # Add new predictions if they are not empty
#         if rescaled_prediction_test.size > 0:
#             test_pr[f'{component[0]}_Predicted'] = rescaled_prediction_test
#         if rescaled_prediction_train.size > 0:
#             train_pr[f'{component[0]}_Predicted'] = rescaled_prediction_train
        
#         # Add real data column
#         test_pr['Real'] = test[:, 0]  # Assuming the first column is the real data
#         train_pr['Real'] = train[:, 0]  # Assuming the first column is the real data
#         test_pr['Date'] = pd.date_range(start='2021-01-01', periods=len(test_pr))  # Replace with actual date range
#         train_pr['Date'] = pd.date_range(start='2021-01-01', periods=len(train_pr))  # Replace with actual date range
    
#     # Save to files
#     test_pr.to_csv(f'{model_dir}/{component}_test_predictions.csv', index=False)
#     train_pr.to_csv(f'{model_dir}/{component}_train_predictions.csv', index=False)
    
#     return test_pr, train_pr

In [15]:
# def save_predictions(rescaled_prediction_test, rescaled_prediction_train, 
#                     component, model_dir, real_test, real_train):
#     """Save model predictions"""
#     # Create DataFrame for real and predicted values
#     test_pr = pd.DataFrame({
#         'Date': pd.date_range(start='2021-01-01', periods=len(rescaled_prediction_test)),  # Replace with actual date range
#         'Real': real_test[:, 0] if real_test.size > 0 else np.array([]),  # Assuming the first column is the real data
#         'Predicted': rescaled_prediction_test.flatten() if rescaled_prediction_test.size > 0 else np.array([])  # Flatten in case of 2D array
#     }) if rescaled_prediction_test.size > 0 else pd.DataFrame()

#     train_pr = pd.DataFrame({
#         'Date': pd.date_range(start='2021-01-01', periods=len(rescaled_prediction_train)),  # Replace with actual date range
#         'Real': real_train[:, 0] if real_train.size > 0 else np.array([]),  # Assuming the first column is the real data
#         'Predicted': rescaled_prediction_train.flatten() if rescaled_prediction_train.size > 0 else np.array([])  # Flatten in case of 2D array
#     }) if rescaled_prediction_train.size > 0 else pd.DataFrame()

#     # Save to files
#     test_pr.to_csv(f'{model_dir}/{component}_test_predictions.csv', index=False)
#     train_pr.to_csv(f'{model_dir}/{component}_train_predictions.csv', index=False)
    
#     return test_pr, train_pr

In [16]:
def save_predictions(rescaled_prediction_test, rescaled_prediction_train, 
                     component, model_dir):
    """Save model predictions"""
    # Create DataFrames for predictions
    if component == 'Original':
        test_pr = pd.DataFrame(rescaled_prediction_test, columns=['Original_Predicted']) if rescaled_prediction_test.size > 0 else pd.DataFrame()
        train_pr = pd.DataFrame(rescaled_prediction_train, columns=['Original_Predicted']) if rescaled_prediction_train.size > 0 else pd.DataFrame()
    else:
        # Load existing predictions if available
        test_pr = pd.read_csv(f'{model_dir}/test_pre.csv', index_col=0) \
                 if os.path.exists(f'{model_dir}/test_pre.csv') \
                 else pd.DataFrame()
        train_pr = pd.read_csv(f'{model_dir}/train_pre.csv', index_col=0) \
                  if os.path.exists(f'{model_dir}/train_pre.csv') \
                  else pd.DataFrame()
        
        # Add new predictions if they are not empty
        if rescaled_prediction_test.size > 0:
            test_pr[f'{component}_Predicted'] = rescaled_prediction_test
        if rescaled_prediction_train.size > 0:
            train_pr[f'{component}_Predicted'] = rescaled_prediction_train
    
    # Save to files with component-specific names
    test_pr.to_csv(f'{model_dir}/{component}_test_predictions.csv', index=False)
    train_pr.to_csv(f'{model_dir}/{component}_train_predictions.csv', index=False)
    
    return test_pr, train_pr

In [17]:
# def save_predictions(rescaled_prediction_test, rescaled_prediction_train, 
#                     component, model_dir):
#     """Save model predictions"""
#     if component == 'Original':
#         test_pr = pd.DataFrame(rescaled_prediction_test, columns=['o_prid']) if rescaled_prediction_test.size > 0 else pd.DataFrame()
#         train_pr = pd.DataFrame(rescaled_prediction_train, columns=['o_prid']) if rescaled_prediction_train.size > 0 else pd.DataFrame()
#     else:
#         # Load existing predictions if available
#         test_pr = pd.read_csv(f'{model_dir}/test_pre.csv', index_col=0) \
#                  if os.path.exists(f'{model_dir}/test_pre.csv') \
#                  else pd.DataFrame()
#         train_pr = pd.read_csv(f'{model_dir}/train_pre.csv', index_col=0) \
#                   if os.path.exists(f'{model_dir}/train_pre.csv') \
#                   else pd.DataFrame()
        
#         # Add new predictions if they are not empty
#         if rescaled_prediction_test.size > 0:
#             test_pr[f'{component[0]}_prid'] = rescaled_prediction_test
#         if rescaled_prediction_train.size > 0:
#             train_pr[f'{component[0]}_prid'] = rescaled_prediction_train
#     # ... existing code ...
#     # Save to files
#     test_pr.to_csv(f'{model_dir}/{component}_test_predictions.csv', index=False)  # Updated to save with component name
#     train_pr.to_csv(f'{model_dir}/{component}_train_predictions.csv', index=False)  # Updated to save with component name
#     return test_pr, train_pr

In [18]:
# def save_predictions(rescaled_prediction_test, rescaled_prediction_train, 
#                     component, model_dir):
#     """Save model predictions"""
#     if component == 'Original':
#         test_pr = pd.DataFrame(rescaled_prediction_test, columns=['o_prid']) if rescaled_prediction_test.size > 0 else pd.DataFrame()
#         train_pr = pd.DataFrame(rescaled_prediction_train, columns=['o_prid']) if rescaled_prediction_train.size > 0 else pd.DataFrame()
#     else:
#         # Load existing predictions if available
#         test_pr = pd.read_csv(f'{model_dir}/test_pre.csv', index_col=0) \
#                  if os.path.exists(f'{model_dir}/test_pre.csv') \
#                  else pd.DataFrame()
#         train_pr = pd.read_csv(f'{model_dir}/train_pre.csv', index_col=0) \
#                   if os.path.exists(f'{model_dir}/train_pre.csv') \
#                   else pd.DataFrame()
        
#         # Add new predictions if they are not empty
#         if rescaled_prediction_test.size > 0:
#             test_pr[f'{component[0]}_prid'] = rescaled_prediction_test
#         if rescaled_prediction_train.size > 0:
#             train_pr[f'{component[0]}_prid'] = rescaled_prediction_train
    
#     # Save to files
#     test_pr.to_csv(f'{model_dir}/test_pre.csv')
#     train_pr.to_csv(f'{model_dir}/train_pre.csv')
    
#     return test_pr, train_pr

In [19]:
def save_model_history(history, model_dir, component):
    """Save model training history"""
    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(f'{model_dir}/{component}_history.csv')
    
    # Create and save training plots
    plt.figure(figsize=(12, 4))
    
    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{component} Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    # Plot MAE
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mae'], label='Training MAE')
    plt.plot(history.history['val_mae'], label='Validation MAE')
    plt.title(f'{component} Model MAE')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(f'{model_dir}/{component}_training_history.png')
    plt.close()

# Training and Evaluation Loop

In [20]:
# Section 4: Training and Evaluation Loop

# Define components to process
components = ['Seasonality', 'Trend', 'Noise', 'Original']

# Main training loop
for ds in range(len(dh)):
    print(f"\nProcessing dataset {ds+1}/{len(dh)}")
    print(FORES[ds % len(FORES)] + f'DATASET----------------{dh[ds]}')
    
    try:
        # Process dataset
        df1, dataso = process_dataset(li[ds], dh[ds])
        
        # Create model directory
        model_dir = dataso + '/' + MODEL_NAME
        os.makedirs(model_dir, exist_ok=True)
        
        # Store results for this dataset
        dataset_results = {
            'dataset': dh[ds],
            'components': {}
        }
        
        # Process each component
        for component in components:
            print(f"\nProcessing {component} component...")
            
            try:
                # Prepare data
                train_generator, test_generator, scaler, train, test = prepare_component_data(df1, component, dataso)
                
                print(f"Data shapes - Train: {train.shape}, Test: {test.shape}")
                
                # Perform grid search
                print("Performing hyperparameter search...")
                # # best_params, best_model, best_val_loss = simple_grid_search(train_generator, test_generator, model_configs)
                # best_params, best_model, best_val_loss = simple_grid_search(train_generator, test_generator, model_configs, model_dir, component)
                
                # # Check if best_params is None before proceeding
                # if best_params is not None:
                #     print("\nBest parameters found:")
                #     for param, value in best_params.items():
                #         print(f"{param}: {value}")
                #     print(f"Best validation loss: {best_val_loss:.4f}")
                # else:
                #     print(f"No valid parameters found for {component}. Skipping training for this component.")
                #     continue  # Skip to the next component


                # # print("\nBest parameters found:")
                # # for param, value in best_params.items():
                # #     print(f"{param}: {value}")
                # # print(f"Best validation loss: {best_val_loss:.4f}")
                
                # # Create callbacks
                # callbacks = create_callbacks(model_dir, component)
                
                # # Train final model with best parameters
                # print("\nTraining final model with best parameters...")
                # final_model = create_optimized_model(**best_params)
                
                # # Train model
                # history = final_model.fit(train_generator,validation_data=test_generator,epochs=N_EPOCHS,steps_per_epoch=STEPS_PER_EPOCH,shuffle=False,callbacks=callbacks,verbose=1)
                



                # Inside the training loop, after calling simple_grid_search
                # best_params, best_model, best_val_loss = simple_grid_search(train_generator, test_generator, model_configs, model_dir, component)

                # # Check if best_params is None before proceeding
                # if best_params is not None:
                #     print("\nBest parameters found:")
                #     for param, value in best_params.items():
                #         print(f"{param}: {value}")
                #     print(f"Best validation loss: {best_val_loss:.4f}")
                # else:
                #     # Set default parameters if no valid parameters were found
                #     best_params = {'units_1': 64, 'units_2': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'regularization': 0.001}
                #     print("No valid parameters found for Seasonality. Using default parameters.")
                    
                # # Create callbacks
                # callbacks = create_callbacks(model_dir, component)

                # # Train final model with best parameters
                # print("\nTraining final model with best parameters...")
                # final_model = create_optimized_model(**best_params)

                # # Train model
                # history = final_model.fit(train_generator, validation_data=test_generator, epochs=N_EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, shuffle=False, callbacks=callbacks, verbose=1)




                # # Inside the training loop, after calling simple_grid_search
                # best_params, best_model, best_val_loss = simple_grid_search(train_generator, test_generator, model_configs, model_dir, component)

                # # Check if best_params is None before proceeding
                # if best_params is not None:
                #     print("\nBest parameters found:")
                #     for param, value in best_params.items():
                #         print(f"{param}: {value}")
                #     print(f"Best validation loss: {best_val_loss:.4f}")
                # else:
                #     # Set internal default parameters if no valid parameters were found
                #     internal_params = {
                #         'units_1': 64,
                #         'units_2': 32,
                #         'dropout_rate': 0.1,
                #         'learning_rate': 0.001,
                #         'regularization': 0.001
                #     }
                #     print("No valid parameters found for Seasonality. Using internal default parameters.")
                    
                #     # Use internal parameters for model creation
                #     best_params = internal_params

                # # Create callbacks
                # callbacks = create_callbacks(model_dir, component)

                # # Add ModelCheckpoint to save the best model
                # checkpoint_path = f"{model_dir}/best_model_{component}.keras"
                # model_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min', verbose=1)
                # callbacks.append(model_checkpoint)

                # # Train final model with best parameters
                # print("\nTraining final model with best parameters...")
                # final_model = create_optimized_model(**best_params)

                # # Train model
                # history = final_model.fit(train_generator, validation_data=test_generator, epochs=N_EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, shuffle=False, callbacks=callbacks, verbose=1)

                # # Save the best hyperparameters to a file
                # with open(f"{model_dir}/best_hyperparameters_{component}.txt", "w") as f:
                #     f.write("Best Hyperparameters:\n")
                #     for param, value in best_params.items():
                #         f.write(f"{param}: {value}\n")

                # print(f"Best hyperparameters saved to {model_dir}/best_hyperparameters_{component}.txt")





                # Define the path for the best hyperparameters file
                hyperparams_file = f"{model_dir}/best_hyperparameters_{component}.txt"

                # Check if the best hyperparameters file exists
                if os.path.exists(hyperparams_file):
                    print(f"Loading existing best hyperparameters for {component} from {hyperparams_file}")
                    with open(hyperparams_file, "r") as f:
                        # Read the hyperparameters from the file
                        best_params = {}
                        for line in f.readlines()[1:]:  # Skip the first line
                            param, value = line.strip().split(": ")
                            best_params[param] = float(value) if '.' in value else int(value)
                else:
                    # Perform hyperparameter search if no file exists
                    best_params, best_model, best_val_loss = simple_grid_search(train_generator, test_generator, model_configs, model_dir, component)

                    # Check if best_params is None before proceeding
                    if best_params is not None:
                        print("\nBest parameters found:")
                        for param, value in best_params.items():
                            print(f"{param}: {value}")
                        print(f"Best validation loss: {best_val_loss:.4f}")
                    else:
                        # Set internal default parameters if no valid parameters were found
                        internal_params = {
                            'units_1': 64,
                            'units_2': 32,
                            'dropout_rate': 0.1,
                            'learning_rate': 0.001,
                            'regularization': 0.001
                        }
                        print("No valid parameters found for Seasonality. Using internal default parameters.")
                        
                        # Use internal parameters for model creation
                        best_params = internal_params

                # Create callbacks
                callbacks = create_callbacks(model_dir, component)

                # Add ModelCheckpoint to save the best model
                checkpoint_path = f"{model_dir}/best_model_{component}.keras"
                model_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min', verbose=1)
                callbacks.append(model_checkpoint)

                # Train final model with best parameters
                print("\nTraining final model with best parameters...")
                final_model = create_optimized_model(**best_params)

                # Train model
                history = final_model.fit(train_generator, validation_data=test_generator, epochs=N_EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, shuffle=False, callbacks=callbacks, verbose=1)

                # Save the best hyperparameters to a file
                with open(hyperparams_file, "w") as f:
                    f.write("Best Hyperparameters:\n")
                    for param, value in best_params.items():
                        f.write(f"{param}: {value}\n")

                print(f"Best hyperparameters saved to {hyperparams_file}")



                # Generate predictions
                print("Generating predictions...")
                prediction = final_model.predict(test_generator)
                pre_train = final_model.predict(train_generator)
                
                # Process predictions
                rescaled_prediction_test, rescaled_prediction_train = process_predictions(prediction, pre_train, test, train, N_INPUT, scaler)
                
                # Save predictions
                # test_pr, train_pr = save_predictions(rescaled_prediction_test,rescaled_prediction_train,component,model_dir)
                # Example call to save_predictions
                # test_pr, train_pr = save_predictions(rescaled_prediction_test, rescaled_prediction_train, component, model_dir, test, train)
                # Example call to save_predictions within your training loop
                try:
                    # Assuming rescaled_prediction_test and rescaled_prediction_train are defined
                    test_pr, train_pr = save_predictions(rescaled_prediction_test, rescaled_prediction_train, component, model_dir)
                except Exception as e:
                    print(f"Error processing {component} component: {str(e)}")
                # Before calling save_predictions
                print(f"Shapes - Test Predictions: {rescaled_prediction_test.shape}, Train Predictions: {rescaled_prediction_train.shape}")

                # Call save_predictions
                test_pr, train_pr = save_predictions(rescaled_prediction_test, rescaled_prediction_train, component, model_dir)


                # Save model history
                save_model_history(history, model_dir, component)
                
                # Store component results
                dataset_results['components'][component] = {'best_params': best_params,'best_val_loss': best_val_loss,'final_val_loss': history.history['val_loss'][-1],'final_val_mae': history.history['val_mae'][-1]}
                
                print(f"Completed {component} component")
                
            except Exception as e:
                print(f"Error processing {component} component: {str(e)}")
                continue
        
        # Save dataset results summary
        results_df = pd.DataFrame.from_dict(
            {(i,j): dataset_results['components'][i][j] 
             for i in dataset_results['components'].keys() 
             for j in dataset_results['components'][i].keys()},
            orient='index'
        )
        results_df.to_csv(f'{model_dir}/model_summary.csv')
        
        # Create summary plots
        plt.figure(figsize=(15, 5))
        
        # Plot final validation losses
        plt.subplot(1, 2, 1)
        val_losses = [data['final_val_loss'] 
                     for data in dataset_results['components'].values()]
        plt.bar(components, val_losses)
        plt.title('Final Validation Loss by Component')
        plt.xticks(rotation=45)
        plt.ylabel('Loss')
        
        # Plot final MAE
        plt.subplot(1, 2, 2)
        val_maes = [data['final_val_mae'] 
                   for data in dataset_results['components'].values()]
        plt.bar(components, val_maes)
        plt.title('Final Validation MAE by Component')
        plt.xticks(rotation=45)
        plt.ylabel('MAE')
        
        plt.tight_layout()
        plt.savefig(f'{model_dir}/final_metrics_summary.png')
        plt.close()
        
        print(f"Completed dataset {ds+1}")
        
    except Exception as e:
        print(f"Error processing dataset {ds}: {str(e)}")
        continue

print("\nTraining completed for all datasets!")


Processing dataset 1/17
[31mDATASET----------------../DataSets\GUJARAT\ANKLESHWAR\H_Ankl_1_2_19-3_12_22-_41_ (copy)_INDEX_Mean.csv

Processing Seasonality component...
Loading existing data for Seasonality from Res_Data\GUJARAT\ANKLESHWAR\Seasonality.csv
Data shapes - Train: (23457, 1), Test: (10054, 1)
Performing hyperparameter search...

Trying parameters: units1=128, units2=32, dropout=0.1, lr=0.001, reg=0.001


  self._warn_if_super_not_called()


Val Loss: 0.0125
Best parameters: {'units_1': 128, 'units_2': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'regularization': 0.001}, Best validation loss: 0.0125

Best parameters found:
units_1: 128
units_2: 32
dropout_rate: 0.1
learning_rate: 0.001
regularization: 0.001
Best validation loss: 0.0125

Training final model with best parameters...
[1m319/320[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 9ms/step - loss: 1.5403 - mae: 0.1670
Epoch 1: val_loss improved from inf to 0.07557, saving model to Res_Data\GUJARAT\ANKLESHWAR/BiGru_Opt/best_model_Seasonality.keras

Epoch 1: val_loss improved from inf to 0.07557, saving model to Res_Data\GUJARAT\ANKLESHWAR/BiGru_Opt/best_model_Seasonality.keras
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 91ms/step - loss: 1.5351 - mae: 0.1667 - val_loss: 0.0756 - val_mae: 0.0484 - learning_rate: 0.0010
Best hyperparameters saved to Res_Data\GUJARAT\ANKLESHWAR/BiGru_Opt/best_hyperparameters_Seasonality.txt
Generating

  self._warn_if_super_not_called()


Val Loss: 0.0263
Best parameters: {'units_1': 128, 'units_2': 32, 'dropout_rate': 0.1, 'learning_rate': 0.001, 'regularization': 0.001}, Best validation loss: 0.0263

Best parameters found:
units_1: 128
units_2: 32
dropout_rate: 0.1
learning_rate: 0.001
regularization: 0.001
Best validation loss: 0.0263

Training final model with best parameters...
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.2984 - mae: 0.0567
Epoch 1: val_loss improved from inf to 0.03154, saving model to Res_Data\GUJARAT\ANKLESHWAR/BiGru_Opt/best_model_Trend.keras

Epoch 1: val_loss improved from inf to 0.03154, saving model to Res_Data\GUJARAT\ANKLESHWAR/BiGru_Opt/best_model_Trend.keras
[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 93ms/step - loss: 1.2959 - mae: 0.0567 - val_loss: 0.0315 - val_mae: 0.1184 - learning_rate: 0.0010
Best hyperparameters saved to Res_Data\GUJARAT\ANKLESHWAR/BiGru_Opt/best_hyperparameters_Trend.txt
Generating predictions...
[

  self._warn_if_super_not_called()


KeyboardInterrupt: 

In [21]:
filename ='AO_BiGru'
Pre_Ds='Res_Data/'
vip=f'jupyter nbconvert --to html {filename}.ipynb --stdout > {Pre_Ds}{MODEL_NAME}.html'
os.system(vip)

0

In [89]:
from IPython.core.display import Javascript, display_javascript

def get_notebook_name():
    """Execute JS code to save Jupyter notebook name to variable notebook_name"""
    js = Javascript("""
    var kernel = IPython.notebook.kernel;
    kernel.execute('notebook_name = "' + IPython.notebook.notebook_name + '"');
    """)
    return display_javascript(js)

def get_notebook_path():
    """Execute JS code to save Jupyter notebook path to variable notebook_path"""
    js = Javascript("""
    var kernel = IPython.notebook.kernel;
    kernel.execute('notebook_path = "' + IPython.notebook.notebook_path + '"');
    """)
    return display_javascript(js)
notebook_name = get_notebook_name()
notebook_path = get_notebook_path()
%whos

Variable                 Type                   Data/Info
---------------------------------------------------------
Adam                     type                   <class 'keras.src.optimizers.adam.Adam'>
BATCH_SIZE               int                    1
Back                     AnsiBack               <colorama.ansi.AnsiBack o<...>ct at 0x000002C68EEF0F10>
Bidirectional            type                   <class 'keras.src.layers.<...>rectional.Bidirectional'>
Dense                    type                   <class 'keras.src.layers.core.dense.Dense'>
Dropout                  type                   <class 'keras.src.layers.<...>ization.dropout.Dropout'>
EarlyStopping            type                   <class 'keras.src.callbac<...>_stopping.EarlyStopping'>
FORES                    list                   n=7
Fore                     AnsiFore               <colorama.ansi.AnsiFore o<...>ct at 0x000002C68EEF0E80>
GRU                      type                   <class 'keras.src.layers.rnn.gru.

In [90]:
print(f"{notebook_name}")
print(f"{notebook_path}")

None
None


In [91]:
print(f"{notebook_name}")
print(f"{notebook_path}")

None
None


In [92]:
sessaon_name = os.getenv('JPY_SESSION_NAME')
sessaon_name

In [93]:
session_name = os.getenv('JPY_SESSION_NAME')
session_name