# Testing Prediction Accuracy and Error Ratio for CPU Usage with VM Allocation Simulation

<span style="color:red">CPU Usage Prediction and VM Allocation Simulation - </span>

In [10]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import simpy

# Load the dataset
file_path = 'C:/Users/ratho/OneDrive/Desktop/dataset1.txt'
data = pd.read_csv(file_path, header=None)
cpu_usage = data.values.flatten()

sequence_length = 10

def create_sequences(data, seq_length):
    sequences = []
    targets = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        targets.append(data[i + seq_length])
    return np.array(sequences), np.array(targets)

X, y = create_sequences(cpu_usage, sequence_length)
X_flattened = X.reshape(-1, sequence_length)
y_flattened = y

# Scale the data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X_flattened)
y_scaled = scaler_y.fit_transform(y_flattened.reshape(-1, 1)).flatten()

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, train_size=0.7, shuffle=False)

# Define and train the Linear Regression model
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)

# Predict
lr_predictions = model_lr.predict(X_test)

# Rescale predictions
lr_predictions_inv = scaler_y.inverse_transform(lr_predictions.reshape(-1, 1)).flatten()
y_test_inv = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()

# Evaluate the model
mse_lr = mean_squared_error(y_test_inv, lr_predictions_inv)
rmse_lr = np.sqrt(mse_lr)
r2_lr = r2_score(y_test_inv, lr_predictions_inv)

print(f'Linear Regression Model - MSE: {mse_lr:.2f}, RMSE: {rmse_lr:.2f}, R²: {r2_lr:.2f}')

# SimPy Datacenter Simulation

class Datacenter:
    def __init__(self, env, capacity=10):
        self.env = env
        self.server = simpy.Resource(env, capacity=capacity)

    def process_vm_request(self, vm, duration):
        with self.server.request() as req:
            yield self.env.timeout(duration)

class VMAllocator:
    def __init__(self, env, threshold=80):
        self.env = env
        self.threshold = threshold
        self.cumulative_cpu_usage = 0
        self.vm_counter = 0

    def allocate_vms(self, predicted_value, actual_value):
        self.cumulative_cpu_usage += predicted_value
        difference = abs(predicted_value - actual_value)

        if difference > 10:
            print(f"Significant prediction error: Predicted {predicted_value}%, Actual {actual_value}%, Difference {difference}%.")


        if self.cumulative_cpu_usage > self.threshold:
            num_vms = int((self.cumulative_cpu_usage - self.threshold) // 10) + 1
            print(f"Allocating {num_vms} VM(s) due to high cumulative usage ({self.cumulative_cpu_usage}%).")
            self.cumulative_cpu_usage = 0 


env = simpy.Environment()

datacenter = Datacenter(env)
vm_allocator = VMAllocator(env)


last_n_values = X_scaled[-1]  
predicted_values = []
actual_values = y_test_inv[:10]  

for i, actual_value in enumerate(actual_values):
    predicted_value = scaler_y.inverse_transform(
        model_lr.predict([last_n_values]).reshape(-1, 1)
    ).flatten()[0]
    predicted_values.append(predicted_value)
 
    vm_allocator.allocate_vms(predicted_value, actual_value)

    last_n_values = np.append(last_n_values[1:], scaler_y.transform([[predicted_value]])).flatten()

    vm = f"VM_{i}_Predicted_{predicted_value:.2f}"
    env.process(datacenter.process_vm_request(vm, 10))

# Run the simulation
env.run(until=50)


Linear Regression Model - MSE: 55.45, RMSE: 7.45, R²: 0.68
Significant prediction error: Predicted 16.649491727565827%, Actual 4.0%, Difference 12.649491727565827%.
Significant prediction error: Predicted 16.752317624233118%, Actual 2.0%, Difference 14.752317624233118%.
Significant prediction error: Predicted 17.060068917402834%, Actual 2.0%, Difference 15.060068917402834%.
Significant prediction error: Predicted 16.99174158174463%, Actual 6.0%, Difference 10.99174158174463%.
Significant prediction error: Predicted 17.16361499632829%, Actual 0.0%, Difference 17.16361499632829%.
Allocating 1 VM(s) due to high cumulative usage (84.6172348472747%).
Significant prediction error: Predicted 16.896928129245335%, Actual 5.0%, Difference 11.896928129245335%.
Significant prediction error: Predicted 17.114791284605236%, Actual 4.0%, Difference 13.114791284605236%.
Significant prediction error: Predicted 16.80633342173532%, Actual 0.0%, Difference 16.80633342173532%.
Significant prediction error: 