In [4]:
# Build a regression model to predict SOC using time, voltage, current, and max_temperature
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

df = pd.read_csv("..\\unibo-powertools-dataset\\unibo-powertools-dataset\\test_result_trial_end_cleaned_v1.0.csv")

# Select features and target
features = ['time', 'voltage', 'current', 'max_temperature']
target = 'SOC'

# Drop rows with missing values in selected columns
model_df = df.dropna(subset=features + [target])

X = model_df[features]
y = model_df[target]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=20, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Test MSE: {mse:.2f}")
print(f"Test R^2: {r2:.2f}")

Test MSE: 1.15
Test R^2: 1.00


In [6]:
# Save the trained model with a unique name including version info
import joblib
import datetime

# Define model and dataset version info
model_version = "v1.0"
dataset_version = "test_result_trial_end_v1.0"
model_type = "RandomForestRegressor"
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

model_filename = f"../models/SOC_{model_type}_{model_version}_{dataset_version}_{timestamp}.joblib"
joblib.dump(model, model_filename)
print(f"Model saved as: {model_filename}")

Model saved as: ../models/SOC_RandomForestRegressor_v1.0_test_result_trial_end_v1.0_20250804_003236.joblib


In [7]:
# Create comprehensive results tracking system
import os
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Create results directory if it doesn't exist
results_dir = '../results'
os.makedirs(results_dir, exist_ok=True)

# Calculate comprehensive metrics
def calculate_metrics(y_true, y_pred, model_name, dataset_name, features_used):
    """Calculate comprehensive evaluation metrics"""
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    
    # Additional metrics
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100  # Mean Absolute Percentage Error
    
    return {
        'model_name': model_name,
        'dataset_name': dataset_name,
        'features_used': ', '.join(features_used),
        'num_features': len(features_used),
        'train_size': len(y_train),
        'test_size': len(y_true),
        'mse': mse,
        'rmse': rmse,
        'mae': mae,
        'r2_score': r2,
        'mape': mape,
        'timestamp': timestamp
    }

# Calculate metrics for current model
current_metrics = calculate_metrics(
    y_test, y_pred, 
    model_type, 
    dataset_version, 
    features
)

# Load existing results or create new DataFrame
results_file = os.path.join(results_dir, 'model_results.csv')
if os.path.exists(results_file):
    results_df = pd.read_csv(results_file)
else:
    results_df = pd.DataFrame()

# Add current results
new_result = pd.DataFrame([current_metrics])
results_df = pd.concat([results_df, new_result], ignore_index=True)

# Save updated results
results_df.to_csv(results_file, index=False)

print(f"Results saved to: {results_file}")
print("\nCurrent Model Performance:")
print(f"Model: {current_metrics['model_name']}")
print(f"Dataset: {current_metrics['dataset_name']}")
print(f"Features: {current_metrics['features_used']}")
print(f"Test Size: {current_metrics['test_size']}")
print(f"MSE: {current_metrics['mse']:.4f}")
print(f"RMSE: {current_metrics['rmse']:.4f}")
print(f"MAE: {current_metrics['mae']:.4f}")
print(f"R² Score: {current_metrics['r2_score']:.4f}")
print(f"MAPE: {current_metrics['mape']:.2f}%")

# Display all results
print("\n=== All Model Results ===")
if len(results_df) > 0:
    print(results_df[['model_name', 'dataset_name', 'r2_score', 'rmse', 'mae', 'mape']].round(4))
else:
    print("No previous results found.")

# Save detailed results with feature importance if available
if hasattr(model, 'feature_importances_'):
    feature_importance = pd.DataFrame({
        'feature': features,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\n=== Feature Importance ===")
    print(feature_importance)
    
    # Save feature importance
    importance_file = os.path.join(results_dir, f'feature_importance_{model_type}_{timestamp}.csv')
    feature_importance.to_csv(importance_file, index=False)

Results saved to: ../results\model_results.csv

Current Model Performance:
Model: RandomForestRegressor
Dataset: test_result_trial_end_v1.0
Features: time, voltage, current, max_temperature
Test Size: 81153
MSE: 1.1479
RMSE: 1.0714
MAE: 0.2397
R² Score: 0.9968
MAPE: 12.05%

=== All Model Results ===
              model_name                dataset_name  r2_score    rmse  \
0  RandomForestRegressor  test_result_trial_end_v1.0    0.9968  1.0714   

      mae    mape  
0  0.2397  12.048  

=== Feature Importance ===
           feature  importance
1          voltage    0.893856
2          current    0.047865
0             time    0.047764
3  max_temperature    0.010514


In [8]:
# Example: Train different models for comparison
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor
import datetime

# Test different models
models_to_test = {
    'LinearRegression': LinearRegression(),
    'DecisionTreeRegressor': DecisionTreeRegressor(random_state=42),
    'GradientBoostingRegressor': GradientBoostingRegressor(random_state=42, n_estimators=50)
}

# Train and evaluate each model
for model_name, model_instance in models_to_test.items():
    print(f"\nTraining {model_name}...")
    
    # Train the model
    model_instance.fit(X_train, y_train)
    
    # Make predictions
    y_pred_new = model_instance.predict(X_test)
    
    # Calculate metrics
    timestamp_new = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    metrics = calculate_metrics(
        y_test, y_pred_new, 
        model_name, 
        dataset_version, 
        features
    )
    
    # Update timestamp for this model
    metrics['timestamp'] = timestamp_new
    
    # Add to results
    new_result = pd.DataFrame([metrics])
    results_df = pd.concat([results_df, new_result], ignore_index=True)
    
    # Save the model
    model_filename_new = f"../models/SOC_{model_name}_v1.0_{dataset_version}_{timestamp_new}.joblib"
    joblib.dump(model_instance, model_filename_new)
    
    print(f"Model saved: {model_filename_new}")
    print(f"R² Score: {metrics['r2_score']:.4f}")
    print(f"RMSE: {metrics['rmse']:.4f}")

# Save updated results
results_df.to_csv(results_file, index=False)
print(f"\nAll results updated in: {results_file}")

# Display final comparison
print("\n=== Final Model Comparison ===")
comparison_cols = ['model_name', 'r2_score', 'rmse', 'mae', 'mape']
print(results_df[comparison_cols].round(4).to_string(index=False))


Training LinearRegression...
Model saved: ../models/SOC_LinearRegression_v1.0_test_result_trial_end_v1.0_20250804_004225.joblib
R² Score: 0.8599
RMSE: 7.0773

Training DecisionTreeRegressor...
Model saved: ../models/SOC_DecisionTreeRegressor_v1.0_test_result_trial_end_v1.0_20250804_004228.joblib
R² Score: 0.9948
RMSE: 1.3635

Training GradientBoostingRegressor...
Model saved: ../models/SOC_GradientBoostingRegressor_v1.0_test_result_trial_end_v1.0_20250804_004300.joblib
R² Score: 0.9480
RMSE: 4.3139

All results updated in: ../results\model_results.csv

=== Final Model Comparison ===
               model_name  r2_score   rmse    mae      mape
    RandomForestRegressor    0.9968 1.0714 0.2397   12.0480
         LinearRegression    0.8599 7.0773 5.4789 1467.9599
    DecisionTreeRegressor    0.9948 1.3635 0.2473   12.4251
GradientBoostingRegressor    0.9480 4.3139 3.2723  191.8751
