In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import joblib


In [None]:

# Function to preprocess the dataset
def preprocess_data(file_path, target_columns, look_back=5):
    """
    Preprocess the dataset and reshape it for RNN.
    """
    data = pd.read_excel(file_path)
    data.columns = data.iloc[0]
    data = data[1:]
    data = data.apply(lambda x: pd.to_numeric(x, errors='coerce'))
    data.fillna(data.mean(), inplace=True)

    X = data.drop(target_columns, axis=1).values
    y = data[target_columns].values

    # Standardize the features
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    # Create sequences for the RNN
    X_seq, y_seq = [], []
    for i in range(len(X) - look_back):
        X_seq.append(X[i:i + look_back])  # Keep 3D shape for RNN
        y_seq.append(y[i + look_back])
    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    return X_seq, y_seq, scaler_X, scaler_y



In [None]:
# Function to preprocess testing data using the same scalers
def preprocess_test_data(file_path, target_columns, scaler_X, scaler_y, look_back=5):
    """
    Preprocess the testing dataset using the provided scalers.
    """
    data = pd.read_excel(file_path)
    data.columns = data.iloc[0]
    data = data[1:]
    data = data.apply(lambda x: pd.to_numeric(x, errors='coerce'))
    data.fillna(data.mean(), inplace=True)

    X = data.drop(target_columns, axis=1).values
    y = data[target_columns].values

    X = scaler_X.transform(X)  # Standardize using training scaler
    y = scaler_y.transform(y)  # Standardize using training scaler

    # Create sequences for the RNN
    X_seq, y_seq = [], []
    for i in range(len(X) - look_back):
        X_seq.append(X[i:i + look_back])  # Keep 3D shape for RNN
        y_seq.append(y[i + look_back])
    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    return X_seq, y_seq



In [None]:
# File paths
training_file = '/kaggle/input/heterogenous-dataset/Training Dataset.xlsx'
testing_file = '/kaggle/input/heterogenous-dataset/Testing Dataset.xlsx'
target_columns = ['Cloud_Throughput', 'Total_Energy_Consumption', 'Total_Exec_Time']


In [None]:
# ========================= TRAINING PHASE =========================
look_back = 5  # Number of past time steps for prediction
X_train, y_train, scaler_X, scaler_y = preprocess_data(training_file, target_columns, look_back)

# Build the RNN model
model = Sequential([
    SimpleRNN(64, activation='relu', return_sequences=True, input_shape=(look_back, X_train.shape[2]), kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    SimpleRNN(32, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.3),
    Dense(y_train.shape[1], activation='linear')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0003), loss='mse', metrics=['mae'])
print(model.summary())

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, verbose=1)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=16,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

# Save the model and scalers
model.save('rnn_model_fixed.keras')
joblib.dump((scaler_X, scaler_y), 'rnn_scalers_fixed.pkl')
print("RNN model and scalers saved successfully!")

# Evaluate training performance
y_train_pred = model.predict(X_train)
y_train = scaler_y.inverse_transform(y_train)
y_train_pred = scaler_y.inverse_transform(y_train_pred)

train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
train_mae = mean_absolute_error(y_train, y_train_pred)

print("\nRNN Training Metrics:")
print(f"RMSE: {train_rmse:.4f}, MAE: {train_mae:.4f}")



In [None]:
# ========================= TESTING PHASE =========================
# Preprocess the testing data using the same scalers
X_test, y_test = preprocess_test_data(testing_file, target_columns, scaler_X, scaler_y, look_back)

# Predict and evaluate on the testing data
y_test_pred = model.predict(X_test)
y_test = scaler_y.inverse_transform(y_test)
y_test_pred = scaler_y.inverse_transform(y_test_pred)

test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
test_mae = mean_absolute_error(y_test, y_test_pred)

print("\nRNN Testing Metrics:")
print(f"RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}")

# Save predictions
y_test_combined = pd.DataFrame(y_test, columns=target_columns)
y_test_combined['Pred_Cloud_Throughput'] = y_test_pred[:, 0]
y_test_combined['Pred_Total_Energy_Consumption'] = y_test_pred[:, 1]
y_test_combined['Pred_Total_Exec_Time'] = y_test_pred[:, 2]

y_test_combined.to_excel('testing_predictions_rnn_fixed.xlsx', index=False)
print("Testing predictions saved to 'testing_predictions_rnn_fixed.xlsx'.")
