# 🤖 Notebook: model_training.ipynb

**Goal:** Train and evaluate a Linear Regression baseline.

### Include
- Load `data/cleaned_pulsebat.csv`
- Train/test split
- Fit `LinearRegression`
- Metrics: R², MSE, MAE
- Plot: Predicted vs Actual SOH
- Save model to `models/soh_linear_model.pkl` (joblib)

### Deliverable
- Saved model + performance plots


In [6]:
# Task 1 – Train Linear Regression Model
# --------------------------------------
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib

# --- Resolve paths whether running from project root or /notebooks ---
if os.path.basename(os.getcwd()) == "notebooks":
    DATA_PATH = "../data/cleaned_pulsebat.csv"
    MODEL_DIR = "../models"
else:
    DATA_PATH = "data/cleaned_pulsebat.csv"
    MODEL_DIR = "models"

print("Loading data from:", DATA_PATH)

# 1) Load cleaned dataset
df = pd.read_csv(DATA_PATH)

# 2) Separate features (U1–U21) and target (SOH)
feature_cols = [f"U{i}" for i in range(1, 22)]
target_col = "SOH"

X = df[feature_cols].values
y = df[target_col].values

# 3) Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Shapes → X_train: {X_train.shape}, X_test: {X_test.shape}, "
      f"y_train: {y_train.shape}, y_test: {y_test.shape}")

# 4) Train Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

print("✅ Model trained.")
print("Intercept:", model.intercept_)
print("Coefficients (len):", len(model.coef_))

# 5) (Optional here) Save the trained model for later steps
os.makedirs(MODEL_DIR, exist_ok=True)
model_path = os.path.join(MODEL_DIR, "soh_linear_model.pkl")
joblib.dump(model, model_path)
print(f"💾 Saved model → {model_path}")


Loading data from: ../data/cleaned_pulsebat.csv
Shapes → X_train: (536, 21), X_test: (134, 21), y_train: (536,), y_test: (134,)
✅ Model trained.
Intercept: 4.863921407176073
Coefficients (len): 21
💾 Saved model → ../models\soh_linear_model.pkl


In [7]:
# Task 2 - Compute R², MSE, and MAE
# --------------------------------------

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import pandas as pd
import joblib
import os

# --- Load trained model if not in memory ---
if "model" not in locals():
    model_path = "../models/soh_linear_model.pkl"
    model = joblib.load(model_path)
    print(f"🔁 Loaded model from {model_path}")

# --- Ensure X_test and y_test exist (if running in a new session) ---
if "X_test" not in locals() or "y_test" not in locals():
    df = pd.read_csv("../data/cleaned_pulsebat.csv")
    feature_cols = [f"U{i}" for i in range(1, 22)]
    target_col = "SOH"
    X = df[feature_cols].values
    y = df[target_col].values

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

# --- Step 1: Predict test data ---
y_pred = model.predict(X_test)

# --- Step 2: Calculate evaluation metrics ---
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

# --- Step 3: Print results ---
print("======================================")
print("📈 Model Evaluation Metrics")
print("======================================")
print(f"R² Score (Accuracy):       {r2:.4f}")
print(f"Mean Squared Error (MSE):  {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print("======================================\n")

# --- Step 4: Create results table ---
evaluation_table = pd.DataFrame({
    "Metric": ["R² Score", "Mean Squared Error", "Mean Absolute Error"],
    "Value": [r2, mse, mae]
})

from IPython.display import display
display(evaluation_table)

# --- Step 5: Save metrics ---
os.makedirs("../data", exist_ok=True)
evaluation_table.to_csv("../data/model_evaluation_results.csv", index=False)
print("✅ Evaluation metrics saved to '../data/model_evaluation_results.csv'")


📈 Model Evaluation Metrics
R² Score (Accuracy):       0.6561
Mean Squared Error (MSE):  0.0015
Mean Absolute Error (MAE): 0.0303



Unnamed: 0,Metric,Value
0,R² Score,0.656088
1,Mean Squared Error,0.001498
2,Mean Absolute Error,0.030275


✅ Evaluation metrics saved to '../data/model_evaluation_results.csv'
