<a href="https://colab.research.google.com/github/rb7488/SIC_Project/blob/main/02_baseline_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**This notebook focuses on:**

*  Loading preprocessed UCI HAR data
*   Training baseline machine learning models
*   Evaluating their performance
*   Creating a reference benchmark for adaptive inference
*   These baseline models help us understand accuracy vs complexity trade-offs.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
BASE_PATH = "/content/drive/MyDrive/Adaptive_Inference_Project"

In [None]:
import numpy as np
import os

required_files = [
    f"{BASE_PATH}/processed_data/X_train.npy",
    f"{BASE_PATH}/processed_data/X_test.npy",
    f"{BASE_PATH}/processed_data/y_train.npy",
    f"{BASE_PATH}/processed_data/y_test.npy"
]

for f in required_files:
    if not os.path.exists(f):
        raise FileNotFoundError(f"{f} not found. Please run Notebook-1 first.")

X_train = np.load(f"{BASE_PATH}/processed_data/X_train.npy")
X_test  = np.load(f"{BASE_PATH}/processed_data/X_test.npy")
y_train = np.load(f"{BASE_PATH}/processed_data/y_train.npy")
y_test  = np.load(f"{BASE_PATH}/processed_data/y_test.npy")

print("Data loaded successfully")
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Data loaded successfully
Train shape: (7352, 561)
Test shape: (2947, 561)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# ðŸ§ª Baseline Model 1: Logistic Regression

Low complexity, fast inference.

In [None]:
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

y_pred_lr = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, y_pred_lr)

print("Logistic Regression Accuracy:", lr_accuracy)

Logistic Regression Accuracy: 0.9552086868001357


In [None]:
print(classification_report(y_test, y_pred_lr))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00       537
           1       0.97      0.88      0.92       491
           2       0.89      0.97      0.93       532
           3       0.94      0.99      0.97       496
           4       0.99      0.94      0.96       420
           5       0.96      0.95      0.95       471

    accuracy                           0.96      2947
   macro avg       0.96      0.95      0.95      2947
weighted avg       0.96      0.96      0.96      2947



# ðŸ§ª Baseline Model 2: Support Vector Machine (SVM)

Higher accuracy, higher computational cost.

In [None]:
svm_model = SVC(kernel="rbf")
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)

print("SVM Accuracy:", svm_accuracy)

SVM Accuracy: 0.9518154054971157


In [None]:
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       537
           1       0.94      0.90      0.92       491
           2       0.92      0.95      0.93       532
           3       0.96      0.97      0.97       496
           4       0.98      0.92      0.95       420
           5       0.93      0.97      0.95       471

    accuracy                           0.95      2947
   macro avg       0.95      0.95      0.95      2947
weighted avg       0.95      0.95      0.95      2947



# ðŸ§ª Baseline Model 3: Random Forest

Robust and handles feature interactions well.

In [None]:
rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)

print("Random Forest Accuracy:", rf_accuracy)

Random Forest Accuracy: 0.9260264675941635


In [None]:
print(classification_report(y_test, y_pred_rf))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.91      0.89      0.90       491
           2       0.90      0.92      0.91       532
           3       0.89      0.96      0.92       496
           4       0.97      0.86      0.91       420
           5       0.89      0.90      0.90       471

    accuracy                           0.93      2947
   macro avg       0.93      0.92      0.92      2947
weighted avg       0.93      0.93      0.93      2947



# ðŸ“Š Model Comparison

In [None]:
results = pd.DataFrame({
    "Model": ["Logistic Regression", "SVM", "Random Forest"],
    "Accuracy": [lr_accuracy, svm_accuracy, rf_accuracy]
})

results

Unnamed: 0,Model,Accuracy
0,Logistic Regression,0.955209
1,SVM,0.951815
2,Random Forest,0.926026


In [None]:
import joblib
import os

os.makedirs(f"{BASE_PATH}/models", exist_ok=True)

joblib.dump(lr_model, f"{BASE_PATH}/models/logistic_regression.pkl")
joblib.dump(svm_model, f"{BASE_PATH}/models/svm.pkl")
joblib.dump(rf_model, f"{BASE_PATH}/models/random_forest.pkl")

print("Baseline models saved successfully")

Baseline models saved successfully
