In [5]:
import numpy as np
from sklearn.neural_network import BernoulliRBM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from deap import base, creator, tools, algorithms
import random
import pandas as pd

In [6]:
df = pd.read_csv('./datasets/final_train_dataset.csv')

In [7]:
df.index = pd.to_datetime(df.Datetime)
new_df = df[['Average_Receive_bps', 'Average_Transmit_bps']]
new_df.head()

Unnamed: 0_level_0,Average_Receive_bps,Average_Transmit_bps
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-08-01 00:00:00,338950304.0,65747312.0
2023-08-01 00:05:00,338950304.0,65747312.0
2023-08-01 00:10:00,338873216.0,61819992.0
2023-08-01 00:15:00,338873216.0,61819992.0
2023-08-01 00:20:00,338956608.0,69735288.0


In [8]:
# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(new_df)

# Define sequence length and features
sequence_length = 10  # Number of time steps in each sequence
num_features = len(new_df.columns)

# Create sequences and corresponding labels
sequences = []
labels = []
for i in range(len(scaled_data) - sequence_length):
    seq = scaled_data[i:i+sequence_length]
    label = scaled_data[i+sequence_length][1]  # Assuming 'average_receive_bps' is at index 1
    sequences.append(seq)
    labels.append(label)

# Convert to numpy arrays
sequences = np.array(sequences)
labels = np.array(labels)

# Split into train and test sets
train_x, test_x, train_y, test_y = train_test_split(sequences, labels, test_size=0.2, random_state=42)

In [13]:
# Define the RBM model
def create_rbm(n_components):
    rbm = BernoulliRBM(n_components=n_components, learning_rate=0.01, n_iter=20, random_state=42)
    return rbm


In [14]:
# Define fitness function for Genetic Algorithm (using RMSE)
def evaluate_rbm(individual):
    n_components = individual[0]
    rbm = create_rbm(n_components=n_components)
    rbm.fit(train_x.reshape(len(train_x), -1))  # Flatten the sequences
    transformed_train = rbm.transform(train_x.reshape(len(train_x), -1))
    transformed_test = rbm.transform(test_x.reshape(len(test_x), -1))
    
    # Use RBM output as features for a regression model
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    model.fit(transformed_train, train_y)
    predictions = model.predict(transformed_test)
    
    rmse = np.sqrt(np.mean((predictions - test_y) ** 2))
    return (rmse,)

In [19]:
import random
from deap import base, creator, tools, algorithms

# Step 1: Set up the individual and fitness function
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

# Step 2: Define attributes, individual, and population
toolbox.register("attribute", random.random)  # Each gene is a random float
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attribute, n=10)  # 10 genes per individual
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Step 3: Define the evaluation function
def eval_individual(individual):
    return sum(individual),  # Fitness is the sum of genes

toolbox.register("evaluate", eval_individual)

# Step 4: Define the genetic operators
toolbox.register("mate", tools.cxTwoPoint)  # Two-point crossover
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1)  # Gaussian mutation
toolbox.register("select", tools.selTournament, tournsize=3)  # Tournament selection

# Step 5: Set parameters for the Genetic Algorithm
population_size = 50  # Ensure the population size is greater than 1
num_generations = 50  # Number of generations to evolve
prob_crossover = 0.7  # Crossover probability
prob_mutation = 0.2  # Mutation probability

# Step 6: Initialize the population
population = toolbox.population(n=population_size)

# Step 7: Run the Genetic Algorithm
algorithms.eaSimple(population, toolbox, cxpb=prob_crossover, mutpb=prob_mutation, ngen=num_generations, verbose=True)




gen	nevals
0  	50    
1  	47    
2  	33    
3  	46    
4  	42    
5  	38    
6  	38    
7  	41    
8  	39    
9  	39    
10 	38    
11 	42    
12 	43    
13 	44    
14 	32    
15 	37    
16 	39    
17 	40    
18 	48    
19 	33    
20 	42    
21 	39    
22 	36    
23 	30    
24 	43    
25 	35    
26 	43    
27 	33    
28 	33    
29 	37    
30 	39    
31 	39    
32 	37    
33 	44    
34 	44    
35 	40    
36 	33    
37 	42    
38 	35    
39 	40    
40 	39    
41 	41    
42 	36    
43 	44    
44 	35    
45 	35    
46 	37    
47 	35    
48 	30    
49 	39    
50 	38    


([[10.948585686794102,
   2.2719509448840567,
   5.195954116645609,
   7.067031385063283,
   3.925880846690479,
   8.382582050922634,
   5.848406455776617,
   6.8880423746571156,
   6.870494700968175,
   2.258490637833377],
  [10.948585686794102,
   2.2719509448840567,
   5.195954116645609,
   7.067031385063283,
   3.925880846690479,
   8.382582050922634,
   5.848406455776617,
   6.8880423746571156,
   6.870494700968175,
   2.258490637833377],
  [10.322082973336624,
   2.2719509448840567,
   5.195954116645609,
   7.067031385063283,
   3.925880846690479,
   8.382582050922634,
   5.848406455776617,
   6.8880423746571156,
   6.870494700968175,
   2.258490637833377],
  [10.948585686794102,
   2.2719509448840567,
   5.195954116645609,
   7.067031385063283,
   3.925880846690479,
   8.382582050922634,
   5.848406455776617,
   6.8880423746571156,
   6.870494700968175,
   2.258490637833377],
  [10.948585686794102,
   2.2719509448840567,
   5.195954116645609,
   6.475276747272741,
   4.705252484

In [24]:
# Cast or round best_n_components to an integer
best_n_components = int(round(best_individual[0]))

print(f"Best number of components (hidden units): {best_n_components}")

# Train the final RBM with the best number of components
final_rbm = create_rbm(n_components=best_n_components)
final_rbm.fit(train_x.reshape(len(train_x), -1))


Best number of components (hidden units): 11


Model Evaluation on Test Set:

In [25]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Transform the test data using the final RBM model
transformed_test_x = final_rbm.transform(test_x.reshape(len(test_x), -1))

# Use the same regression model (Linear Regression or any other) for evaluation
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(final_rbm.transform(train_x.reshape(len(train_x), -1)), train_y)

# Predict on the test data
test_predictions = model.predict(transformed_test_x)

# RMSE Calculation
rmse = np.sqrt(mean_squared_error(test_y, test_predictions))
print(f"Test RMSE: {rmse}")

# Optionally, evaluate with MAE or R²
from sklearn.metrics import mean_absolute_error, r2_score
mae = mean_absolute_error(test_y, test_predictions)
r2 = r2_score(test_y, test_predictions)

print(f"Test MAE: {mae}")
print(f"Test R²: {r2}")


Test RMSE: 0.023575358226864136
Test MAE: 0.013280260834332812
Test R²: 0.6945109925734346


In [27]:
# Cross Validation

In [28]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

# Transform the data using the RBM
transformed_data = final_rbm.transform(sequences.reshape(len(sequences), -1))

# Initialize the regression model (or any other suitable model)
model = LinearRegression()

# Perform 5-fold cross-validation
cv_scores = cross_val_score(model, transformed_data, labels, cv=5, scoring='neg_mean_squared_error')

# Convert the negative MSE scores to positive RMSE values
rmse_scores = np.sqrt(-cv_scores)
print(f"Cross-Validation RMSE Scores: {rmse_scores}")
print(f"Mean RMSE: {rmse_scores.mean()}")

# Optionally, calculate MAE or other metrics
cv_mae_scores = cross_val_score(model, transformed_data, labels, cv=5, scoring='neg_mean_absolute_error')
mae_scores = -cv_mae_scores
print(f"Cross-Validation MAE Scores: {mae_scores}")
print(f"Mean MAE: {mae_scores.mean()}")


Cross-Validation RMSE Scores: [0.01396956 0.01660539 0.01797163 0.03691916 0.03181982]
Mean RMSE: 0.023457109936809245
Cross-Validation MAE Scores: [0.01007478 0.01131784 0.01168793 0.01740186 0.01681648]
Mean MAE: 0.01345977781231308
