In [1]:
import pandas as pd
import os
import joblib
from sklearn.metrics import classification_report, accuracy_score

In [None]:
def evaluate_model():
    dataset_path = os.path.join("..", "Datasets", "mitbih_test.csv")
    model_path = os.path.join("..", "models", "heart_model.pkl")

    if not os.path.exists(dataset_path):
        raise FileNotFoundError(f"Dataset not found at {dataset_path}")
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Trained model not found at {model_path}. Run train.py first.")

In [None]:
# Define dataset path
dataset_path = os.path.join("..", "Datasets", "mitbih_test.csv")

# Load dataset
data = pd.read_csv(dataset_path)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

In [None]:
# load model
model = joblib.load(os.path.join("..", "models", "heart_model.pkl"))


In [None]:
#predict
y_pred = model.predict(x)


In [None]:
# Evaluate
    print("✅ Evaluation Results:")
    print("Accuracy:", accuracy_score(y, y_pred))
    print(classification_report(y, y_pred))

if __name__ == "__main__":
    evaluate_model()

In [2]:
# notebooks/train_and_evaluate.ipynb

import pandas as pd
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

#  Load dataset
dataset_path = "../Datasets/mitbih_test.csv"
data = pd.read_csv(dataset_path)

# Features & Target
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model
os.makedirs("../models", exist_ok=True)
joblib.dump(model, "../models/heart_model.pkl")

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# ✅ Example prediction on one row
sample = X_test.iloc[0].values.reshape(1, -1)
print("Sample Prediction:", model.predict(sample))


Accuracy: 0.9652888787394382
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98      3625
         1.0       0.96      0.51      0.67       106
         2.0       0.97      0.84      0.90       289
         3.0       0.87      0.38      0.53        34
         4.0       0.99      0.92      0.96       325

    accuracy                           0.97      4379
   macro avg       0.95      0.73      0.81      4379
weighted avg       0.97      0.97      0.96      4379

Sample Prediction: [0.]




In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load dataset
data = pd.read_csv("../Datasets/mitbih_test.csv")

X = data.iloc[:, :-1]   # features: all columns except last
y = data.iloc[:, -1]    # labels: last column

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model + scaler
joblib.dump(model, "../models/heart2_model.pkl")
joblib.dump(scaler, "../models/scaler2.pkl")

print(" Model and Scaler saved in models/ folder")


 Model and Scaler saved in models/ folder


### Train and evaluate models from datasets


In [4]:
# imports and load
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import joblib
from sklearn.metrics import accuracy_score, classification_report

root = Path("..")  # if notebook is in notebooks/
df = pd.read_csv(root / "Datasets" / "mitbih_test.csv", header=None)
print(df.shape)
df.head()



(21892, 188)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,1.0,0.758264,0.11157,0.0,0.080579,0.078512,0.066116,0.049587,0.047521,0.035124,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.908425,0.783883,0.531136,0.362637,0.3663,0.344322,0.333333,0.307692,0.296703,0.300366,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.730088,0.212389,0.0,0.119469,0.10177,0.10177,0.110619,0.123894,0.115044,0.132743,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.910417,0.68125,0.472917,0.229167,0.06875,0.0,0.004167,0.014583,0.054167,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.57047,0.399329,0.238255,0.147651,0.0,0.003356,0.040268,0.080537,0.07047,0.090604,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# prepare X,y
X = df.iloc[:, :-1].astype(float).values
y = df.iloc[:, -1].astype(int).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)



In [6]:
# Train
clf = RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1)
clf.fit(X_train_s, y_train)
y_pred = clf.predict(X_test_s)
print("Acc:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))



Acc: 0.964832153459694
              precision    recall  f1-score   support

           0     0.9638    0.9981    0.9806      3624
           1     0.9394    0.5586    0.7006       111
           2     0.9710    0.8069    0.8814       290
           3     0.8421    0.5000    0.6275        32
           4     0.9867    0.9193    0.9518       322

    accuracy                         0.9648      4379
   macro avg     0.9406    0.7566    0.8284      4379
weighted avg     0.9644    0.9648    0.9622      4379



In [7]:
# save models
joblib.dump(clf, root / "models" / "mitbih_1_rf.pkl")
joblib.dump(scaler, root / "models" / "mitbih_1_scaler.pkl")
print("saved models/")


saved models/
