In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [9]:
# Load your server metrics + performance score dataset
df = pd.read_csv("scored_server_logs.csv")

# Features (independent variables)
X = df[["load_avg", "cpu_percent", "memory_percent"]]

# Target (dependent variable / performance score)
y = df["actual_score"]

In [10]:
# Split into train and test sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a simple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [11]:
# Predict on test set
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

In [12]:
# Print learned weights
print("Learned weights (coefficients):")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

Learned weights (coefficients):
load_avg: -0.4291
cpu_percent: -0.2001
memory_percent: -0.0966


In [13]:
print(f"\nIntercept (bias term): {model.intercept_:.4f}")
print(f"Mean Squared Error on Test Set: {mse:.2f}")


Intercept (bias term): 94.3078
Mean Squared Error on Test Set: 13.16


Random Forest Regressor

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [17]:
df = pd.read_csv("scored_server_logs.csv")  # Must include 'score' column

# === Features and label ===
X = df[['load_avg', 'cpu_percent', 'memory_percent']]
y = df['actual_score']

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# === Train Random Forest ===
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# === Evaluate ===
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


In [19]:
print(f"🌲 Random Forest Regressor")
print(f"➡️  Mean Squared Error: {mse:.2f}")
print(f"➡️  R² Score: {r2:.3f}")

# === Save the model for later use ===
joblib.dump(rf_model, "rf_server_score_model.pkl")

🌲 Random Forest Regressor
➡️  Mean Squared Error: 17.48
➡️  R² Score: 0.098


['rf_server_score_model.pkl']

In [None]:
import pandas as pd

def clean_csv(csv_file):
    try:
        # Read the CSV
        df = pd.read_csv(csv_file)

        # Ensure proper column names
        df.columns = ["timestamp", "server_id", "cpu_percent", "memory_percent", "load_avg", "active_connections"]

        # Convert timestamp to string format if needed
        df['timestamp'] = df['timestamp'].astype(str)
        df['server_id'] = df['server_id'].astype(str)

        # Group by timestamp + server_id, and average the numeric fields
        cleaned_df = df.groupby(['timestamp', 'server_id'], as_index=False).agg({
            'cpu_percent': 'mean',
            'memory_percent': 'mean',
            'load_avg': 'mean',
            'active_connections': 'mean'
        })

        # Round the values to 2 decimal places for cleanliness
        cleaned_df = cleaned_df.round(2)

        # Overwrite the original file
        cleaned_df.to_csv(csv_file, index=False)

        print(f"✅ Cleaned and updated CSV: {csv_file}")
    
    except Exception as e:
        print(f"❌ Error cleaning CSV: {e}")
