In [18]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from dataset import MyDataset

# device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Assuming the 'dataset.csv' path is correct and available
train_dataset = MyDataset("dataset.csv", mode='train', test_size=1/3, random_state=42)
test_dataset = MyDataset("dataset.csv", mode='test', test_size=1/3, random_state=42)

train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=False)  # Usually, we don't shuffle the test data


def normalize(x):
        iterations_range = (5000, 120000)
        processes_range = (1, 8)
        length_of_message_range = (50, 500)
        min = torch.tensor([iterations_range[0], processes_range[0], length_of_message_range[0]]).to(device)
        max = torch.tensor([iterations_range[1], processes_range[1], length_of_message_range[1]]).to(device)
        x -= min
        x /= max
        return x

In [19]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleAutoencoder(nn.Module):
    def __init__(self):
        super(SimpleAutoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(3, 128),
            nn.ReLU(True),
            nn.Linear(128, 2)
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(2, 128),
            nn.ReLU(True),
            nn.Linear(128, 3),
            nn.Sigmoid()  # Use Sigmoid to ensure output is between 0 and 1 (since our data is normalized)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [20]:
# Define the model
model = SimpleAutoencoder().to(device)

# Loss function
criterion = nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 500  # You can adjust this

for epoch in range(num_epochs):
    for input, _ in train_dataloader:
        # normalize the input
        input = normalize(input.to(device))
        # Forward pass
        output = model(input)
        loss = criterion(output, input)
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Log progress
    if epoch % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/500], Loss: 0.0904
Epoch [101/500], Loss: 0.0013
Epoch [201/500], Loss: 0.0011
Epoch [301/500], Loss: 0.0009
Epoch [401/500], Loss: 0.0008


In [21]:
import pandas as pd

# Ensure the model is in evaluation mode
model.eval()

# Prepare a container for our data
data_to_save = []

# Loop through the test dataset
for input, _ in test_dataloader:
    # Normalize and move the input to the appropriate device
    input = normalize(input.to(device))
    
    # Forward pass through the encoder only
    encoded = model.encoder(input).detach().cpu().numpy()
    
    # Convert input to numpy for easy processing
    original_input = input.detach().cpu().numpy()
    
    # Collect the original inputs and their encoded representations
    for original, enc in zip(original_input, encoded):
        data_to_save.append(list(original) + list(enc))

# Convert to DataFrame
df = pd.DataFrame(data_to_save, columns=['Input1', 'Input2', 'Input3', 'Encoded1', 'Encoded2'])

# Save to CSV
csv_file_path = './encoded_test_data.csv'
df.to_csv(csv_file_path, index=False)

csv_file_path


'./encoded_test_data.csv'

In [5]:
example_input, _ = next(iter(test_dataloader))
example_input = example_input.to(device)
example_input = normalize(example_input)

with torch.no_grad():
    encoded = model.encoder(example_input)
    decoded = model.decoder(encoded)

print(f'Input: {example_input[0]}')
print(f'Encoded: {encoded[0]}')
print(f'Decoded: {decoded[0]}')

Input: tensor([0.1250, 0.3750, 0.6000], device='cuda:0')
Encoded: tensor([ 1.0498, -0.3082], device='cuda:0')
Decoded: tensor([0.1587, 0.3468, 0.6019], device='cuda:0')


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PredictionNN(nn.Module):
    def __init__(self):
        super(PredictionNN, self).__init__()
        # Input layer takes the 2D output from the encoder
        self.layer1 = nn.Linear(2, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, 128)
        # Adjusted output layer to produce a vector of size 2
        self.output_layer = nn.Linear(128, 2)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        # No activation function here; can be added based on the specific requirements of the output
        x = F.relu(self.output_layer(x))
        return x


In [7]:
# Define the model
pred_model = PredictionNN().to(device)

# Loss function
criterion = nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(pred_model.parameters(), lr=0.0001)

# Training loop
num_epochs = 300 

for epoch in range(num_epochs):
    for input, label in train_dataloader:
        # normalize the input
        input = normalize(input.to(device))
        label = label.to(device)
        encoded = model.encoder(input)
        # Forward pass
        output = pred_model(encoded)
        loss = criterion(output, label)
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Log progress
    if epoch % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/300], Loss: 29.3529
Epoch [101/300], Loss: 25.5227
Epoch [201/300], Loss: 26.3190


In [15]:
import pandas as pd

# Ensure the model is in evaluation mode
pred_model.eval()

# Container for data
data_to_save = []

# No gradients needed for inference
with torch.no_grad():
    for input, label in test_dataloader:
        # Normalize and move the input to the appropriate device
        input = normalize(input.to(device))
        label = label.to(device)
        
        # Encode the input
        encoded = model.encoder(input)
        
        # Perform prediction
        predicted = pred_model(encoded).cpu().numpy()
        
        # Collect actual labels, predicted values, and encodings
        actual_labels = label.cpu().numpy()
        encodings = encoded.cpu().numpy()
        
        for actual, pred, enc in zip(actual_labels, predicted, encodings):
            data_to_save.append(list(enc) + list(actual) + list(pred))

# Convert to DataFrame
columns = ['Encoded1', 'Encoded2', 'Actual1', 'Actual2', 'Predicted1', 'Predicted2']
# columns = ['Encoded1', 'Encoded2', 'Actual', 'Predicted']
df = pd.DataFrame(data_to_save, columns=columns)

# Save to CSV
csv_file_path = './prediction_results.csv'
df.to_csv(csv_file_path, index=False)

csv_file_path



'./prediction_results.csv'

In [58]:
example_input, label = next(iter(dataloader))
example_input, label = example_input.to(device), label.to(device)
example_input = normalize(example_input)

with torch.no_grad():
    encoded = model.encoder(example_input)
    decoded = model.decoder(encoded)

    prediction = pred_model(encoded)

print(f'Input: {example_input[0]}')
print(f'Encoded: {encoded[0]}')
print(f'Decoded: {decoded[0]}')
print(f'Label: {label[0]}')
print(f'Prediction: {prediction[0]}')

Input: tensor([0.7083, 0.6250, 0.4000], device='cuda:0')
Encoded: tensor([ 1.0198, -0.4764], device='cuda:0')
Decoded: tensor([0.7099, 0.6244, 0.3986], device='cuda:0')
Label: tensor([37.9658,  2.0052], device='cuda:0')
Prediction: tensor([36.2300,  2.6684], device='cuda:0')


In [17]:
import pandas as pd

# Load the data
df = pd.read_csv('./prediction_results.csv')

# Extract Actual2 and Predicted2 columns
y_actual = df['Actual'].to_numpy()
y_predicted = df['Predicted'].to_numpy()

# Define the function
def calculate_cpu_usage_error_rate(y_actual, y_predicted):
    errors = 0
    error_list = []
    for actual, predicted in zip(y_actual, y_predicted):
        actualError = abs(actual - predicted) / actual
        error_list.append(actualError)
        if not (int(actual) <= predicted < int(actual) + 1):
            errors += 1
    error_rate = errors / len(y_actual)
    return error_rate, error_list

# Apply the function
error_rate, error_list = calculate_cpu_usage_error_rate(y_actual, y_predicted)

# Optionally, save the error list to a new CSV file
error_df = pd.DataFrame({'Error': error_list})
error_csv_path = './cpu_usage_error_list.csv'
error_df.to_csv(error_csv_path, index=False)

(error_rate, error_csv_path)


(0.58, './cpu_usage_error_list.csv')

In [23]:
import pandas as pd

# Load the data
df = pd.read_csv('./prediction_results.csv')

# Extract Actual2 and Predicted2 columns
y_actual = df['Actual1'].to_numpy()
y_predicted = df['Predicted1'].to_numpy()

# Define the function
def calculate_memory_usage_error_rate(y_actual, y_predicted):
        """
        Calculate the error rate for memory usage predictions based on the criteria:
        A prediction is considered an error if it falls outside the +5 and -5 range of the actual value.
        
        Args:
            y_actual (array-like): The actual memory usage values.
            y_predicted (array-like): The predicted memory usage values.
            
        Returns:
            float: The error rate.
            list: The list of errors.
        """
        errors = 0
        error_list = []
        for actual, predicted in zip(y_actual, y_predicted):
            actualError = abs(actual - predicted)/actual
            error_list.append(actualError)
            if not (actual - 5 <= predicted <= actual + 5):
                errors += 1
        
        error_rate = errors / len(y_actual)
        return error_rate,error_list

# Apply the function
error_rate, error_list = calculate_memory_usage_error_rate(y_actual, y_predicted)

# Optionally, save the error list to a new CSV file
error_df = pd.DataFrame({'Error': error_list})
error_csv_path = './memory_usage_error_list.csv'
error_df.to_csv(error_csv_path, index=False)

(error_rate, error_csv_path)


(0.024, './memory_usage_error_list.csv')