In [6]:
dataset = "dataset"

In [2]:
%pip install librosa

Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.1.0-py3-none-any.whl.metadata (9.0 kB)
Collecting numba>=0.51.0 (from librosa)
  Downloading numba-0.63.1-cp311-cp311-win_amd64.whl.metadata (2.8 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-1.0.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.1.2-cp311-cp311-win_amd64.whl.metadata (8.4 kB)
Collecting llvmlite<0.47,>=0.46.0dev0 (from numba>=0.51.0->librosa)
  Downloading llvmlite-0.46.0-cp311-cp311-win_amd64.whl.metadata (4.9 kB)
Collecting num

In [3]:


import os
import librosa
import numpy as np
import joblib

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [7]:
DATASET_PATH = dataset
SAMPLE_RATE = 16000
N_MFCC = 40

X = []
y = []

In [8]:
# -------- LOAD DATA --------
for label in os.listdir(DATASET_PATH):
    label_path = os.path.join(DATASET_PATH, label)
    if not os.path.isdir(label_path):
        continue

    for file in os.listdir(label_path):
        if not file.lower().endswith(".wav"):
            continue

        file_path = os.path.join(label_path, file)
        audio, sr = librosa.load(file_path, sr=SAMPLE_RATE)

        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC)
        mfcc_mean = np.mean(mfcc.T, axis=0)

        X.append(mfcc_mean)
        y.append(label)

X = np.array(X)
y = np.array(y)

print("Dataset shape:", X.shape, y.shape)

Dataset shape: (48, 40) (48,)


In [9]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# -------- TRAIN / TEST SPLIT --------
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.25, random_state=42, stratify=y_encoded
)

In [10]:
# -------- SCALING (EXPLICIT) --------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# -------- MODEL (RAW) --------
model = LogisticRegression(
    max_iter=1000,
    class_weight="balanced"
)
model.fit(X_train_scaled, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [11]:
y_pred = model.predict(X_test_scaled)

In [12]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))



Classification Report:
              precision    recall  f1-score   support

       click       1.00      1.00      1.00         3
 open_chrome       0.75      1.00      0.86         3
open_notepad       1.00      0.50      0.67         2
 scroll_down       1.00      1.00      1.00         2
   scroll_up       1.00      1.00      1.00         2

    accuracy                           0.92        12
   macro avg       0.95      0.90      0.90        12
weighted avg       0.94      0.92      0.91        12


Confusion Matrix:
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 1 1 0 0]
 [0 0 0 2 0]
 [0 0 0 0 2]]


In [13]:
import pickle

model_bundle = {
    "model": model,
    "scaler": scaler,
    "label_encoder": label_encoder
}

with open("voice_command_raw_model.pkl", "wb") as f:
    pickle.dump(model_bundle, f)

print("✅ Pickle file created: voice_command_raw_model.pkl")


✅ Pickle file created: voice_command_raw_model.pkl


In [None]:
import pickle

with open("voice_command_raw_model.pkl", "rb") as f:
    data = pickle.load(f)

model = data["model"]
scaler = data["scaler"]
label_encoder = data["label_encoder"]

print("Loaded classes:", label_encoder.classes_)
