In [53]:
!pip3 install numpy
!pip3 install pandas
!pip3 install torch
!pip3 install scikit-learn
!pip3 install tensorflow
!pip3 install setuptools



## Resource Optimization Model

In [101]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Function to generate synthetic data (simplified assumptions)
def generate_synthetic_data(num_samples=1000):
    data = []
    for _ in range(num_samples):
        request_rate = np.random.uniform(1, 10)  # requests per second (1-10 requests/sec)
        byte_rate = np.random.uniform(1000, 10000)  # byte rate in bytes/sec (1KB to 10KB/sec)
        ram_usage = np.random.uniform(100, 3000)  # RAM usage in MB
        cpu_usage = np.random.uniform(10, 1000)  # CPU usage in millicores (1/1000 of a core)
        mean_byte_per_request = np.random.uniform(100, 2000)  # Mean byte per request (100 bytes to 2KB)

        # Generate output values based on simplified assumptions
        outbound_message_size = max(1, 1000 - (ram_usage * 0.5 + cpu_usage * 0.3))  # Ensure size is positive
        outbound_frequency = max(0.01, request_rate * np.random.uniform(0.5, 0.8))  # Ensure frequency is positive and below request rate

        # Store the data
        data.append([request_rate, byte_rate, ram_usage, cpu_usage, mean_byte_per_request, outbound_message_size, outbound_frequency])

    return np.array(data)

# Prepare the synthetic data
data = generate_synthetic_data()

# Split the data into input (X) and output (y)
X = data[:, :5]  # Inputs: request rate, byte rate, ram usage, cpu usage, mean byte per request
y = data[:, 5:]  # Outputs: message size, output frequency

# Preprocessing: Normalize the features (Standardization)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define the model (RandomForestRegressor)
model = RandomForestRegressor(random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],  # Number of trees in the forest
    'max_depth': [10, 20, 30, None],  # Depth of the trees
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required to be at a leaf node
    'bootstrap': [True, False]  # Whether bootstrap samples are used when building trees
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=1, verbose=2, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Get the best parameters from the grid search
best_params = grid_search.best_params_
print(f"Best Hyperparameters: {best_params}")

# Use the best model found by GridSearchCV
best_model = grid_search.best_estimator_

# Predict on the test set
y_pred = best_model.predict(X_test)

# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error after Hyperparameter Tuning: {mse:.2f}')

# Function to predict the optimal outbound message size and frequency
def predict_optimal_params(request_rate, byte_rate, ram_usage, cpu_usage, mean_byte_per_request):
    input_data = np.array([[request_rate, byte_rate, ram_usage, cpu_usage, mean_byte_per_request]])
    input_data_scaled = scaler.transform(input_data)  # Scale the input data

    # Predict using the best model
    prediction = best_model.predict(input_data_scaled)

    outbound_message_size = max(1, prediction[0][0])  # Ensure positive message size (1 byte minimum)
    outbound_frequency = max(0.01, prediction[0][1])  # Ensure positive frequency (0.01 msg/sec minimum)

    # Ensure that outbound frequency is less than request rate (constraint)
    outbound_frequency = min(outbound_frequency, request_rate * 0.9)

    return outbound_message_size, outbound_frequency

# Example usage
request_rate = 159.99  # 159.99 requests/sec
byte_rate = 20637.01  # 20637 bytes/sec
ram_usage = 150.60  # 150.60 MB RAM usage
cpu_usage = 4.71  # 4.71% CPU usage (converted to millicores)
mean_byte_per_request = 45  # 45 bytes per request

# Get the optimal message size and frequency
optimal_message_size, optimal_frequency = predict_optimal_params(request_rate, byte_rate, ram_usage, cpu_usage, mean_byte_per_request)

print(f"Optimal Message Size: {optimal_message_size:.2f} bytes")
print(f"Optimal Output Frequency: {optimal_frequency:.2f} messages/sec")



Fitting 5 folds for each of 216 candidates, totalling 1080 fits


BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.