In [14]:
import pandas as pd
import numpy as np
import time
import psutil

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, BayesianRidge
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [15]:
df = pd.read_csv('../data/data.csv')
df.drop(columns=['ID', 'e_Sint'], inplace=True)
df.fillna(df.mean(numeric_only=True), inplace=True)

X = df.drop(columns=['Sint'])
y = df['Sint']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
# Define models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet Regression": ElasticNet(),
    "SVR": SVR(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "XGBoost": XGBRegressor(),
    "Bayesian Ridge": BayesianRidge()
}


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5762 entries, 0 to 5761
Data columns (total 16 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   RAdeg      5762 non-null   float64
 1   DEdeg      5762 non-null   float64
 2   e_RAdeg    5762 non-null   float64
 3   e_DEdeg    5762 non-null   float64
 4   RApeak     5762 non-null   float64
 5   DEpeak     5762 non-null   float64
 6   Sint       5762 non-null   float64
 7   Speak      5762 non-null   float64
 8   e_Speak    5762 non-null   float64
 9   rmspeak    5762 non-null   float64
 10  e_rmspeak  5762 non-null   float64
 11  thetamaj   5762 non-null   float64
 12  thetamin   5762 non-null   float64
 13  PA         5762 non-null   float64
 14  alpha      5762 non-null   float64
 15  e_alpha    5762 non-null   float64
dtypes: float64(16)
memory usage: 720.4 KB


In [18]:
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "Random Forest": RandomForestRegressor(random_state=42)
}

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {"MSE": mse, "R²": r2}

In [19]:
for name, model in models.items():
    print(f"\nModel: {name}")

    # Train model
    start_train = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - start_train

    # Predict
    start_pred = time.time()
    y_pred = model.predict(X_test)
    inference_time = time.time() - start_pred

    # Metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Print evaluation
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"Inference Time: {inference_time:.6f} seconds")
    print(f"Training Time: {train_time:.6f} seconds")

    # Mock-up analyses (adjust logic as needed)
    robustness = "Good" if mae < 0.5 * y.mean() else "Needs Improvement"
    scalability = "Fast" if train_time < 1.0 else "Moderate/Slow"
    energy_use = f"Approx {psutil.cpu_percent(interval=0.1)}%"
    adaptability = "Stable" if r2 > 0.75 else "Possibly Overfitting or Underfitting"

    print(f"Robustness Analysis: {robustness}")
    print(f"Scalability Analysis: {scalability}")
    print(f"Energy Analysis: {energy_use}")
    print(f"Adaptability Analysis: {adaptability}")


Model: Linear Regression
R² Score: 0.9445
MSE: 6.0014
RMSE: 2.4498
MAE: 0.6997
Inference Time: 0.001121 seconds
Training Time: 0.008746 seconds
Robustness Analysis: Needs Improvement
Scalability Analysis: Fast
Energy Analysis: Approx 55.8%
Adaptability Analysis: Stable

Model: Ridge Regression
R² Score: 0.9485
MSE: 5.5674
RMSE: 2.3595
MAE: 0.6433
Inference Time: 0.000626 seconds
Training Time: 0.001427 seconds
Robustness Analysis: Needs Improvement
Scalability Analysis: Fast
Energy Analysis: Approx 47.1%
Adaptability Analysis: Stable

Model: Lasso Regression
R² Score: 0.9472
MSE: 5.7120
RMSE: 2.3900
MAE: 0.6586
Inference Time: 0.000628 seconds
Training Time: 0.001744 seconds
Robustness Analysis: Needs Improvement
Scalability Analysis: Fast
Energy Analysis: Approx 52.1%
Adaptability Analysis: Stable

Model: Random Forest
R² Score: 0.9464
MSE: 5.7926
RMSE: 2.4068
MAE: 0.2603
Inference Time: 0.019205 seconds
Training Time: 3.580269 seconds
Robustness Analysis: Good
Scalability Analysis: 