In [5]:
import pandas as pd, numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


df = pd.read_csv('vag_dataset.csv')
df.head()



Unnamed: 0,knee_condition,severity_level,treatment_advised,rms_amplitude,peak_frequency,spectral_entropy,zero_crossing_rate,mean_frequency
0,normal,,No Treatment,1.051421,20.0,-1799.219746,0.001,39.864485
1,normal,,No Treatment,1.071817,20.0,-2010.82733,0.0,40.652874
2,osteoarthritis,Mild,Physiotherapy,1.098748,20.0,-2172.177329,0.0,39.961072
3,osteoarthritis,Severe,Surgery,1.063046,20.0,-1915.991388,0.0,39.86016
4,ligament_injury,Severe,Surgery,1.222318,20.0,-3331.987202,0.001,44.74768


In [6]:
NUM_FEATS = [
    "rms_amplitude",
    "peak_frequency",
    "spectral_entropy",
    "zero_crossing_rate",
    "mean_frequency",
]

X = df[NUM_FEATS].copy()
y = df["knee_condition"].str.strip().str.lower()   # target string labels


In [7]:
le = LabelEncoder()
y_enc = le.fit_transform(y)               # 0,1,2…

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_tr, X_val, y_tr, y_val = train_test_split(
    X_scaled, y_enc, test_size=0.2, random_state=42, stratify=y_enc
)

In [8]:
clf = RandomForestClassifier(
        n_estimators=200,
        max_depth=None,
        random_state=42,
)
clf.fit(X_tr, y_tr)

y_pred = clf.predict(X_val)
print(classification_report(y_val, y_pred, target_names=le.classes_))

                 precision    recall  f1-score   support

ligament_injury       1.00      1.00      1.00       174
         normal       0.78      0.82      0.80       159
 osteoarthritis       0.82      0.78      0.80       167

       accuracy                           0.87       500
      macro avg       0.87      0.87      0.87       500
   weighted avg       0.87      0.87      0.87       500



In [9]:
import joblib
joblib.dump({"model": clf, "scaler": scaler, "encoder": le},
            "vag_feature_classifier.pkl")


['vag_feature_classifier.pkl']