In [1]:
# Import the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
df_mv = pd.read_csv("mfcc_mean_variance.csv")

In [6]:
# Data Exploration:
# Display the first few rows of the dataset
df_mv.head()

# Check for missing values
df_mv.isnull().sum()

# Summary statistics
df_mv.describe()

# Count unique labels
df_mv['label'].value_counts()

S1    18994
N     18452
S2      865
S3       81
Name: label, dtype: int64

In [7]:
# Data Preprocessing:
# Split into X (features) and y (target)
X = df_mv.drop('label', axis=1)
y = df_mv['label']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Feature Scaling:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Model Training:
model = LogisticRegression(max_iter=1000)  # max iter = 1000
model.fit(X_train, y_train)

In [10]:
# Model Evaluation:

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display the classification report
print(classification_report(y_test, y_pred))

# Display the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.86
              precision    recall  f1-score   support

           N       0.85      0.88      0.87      3730
          S1       0.87      0.84      0.85      3741
          S2       0.75      0.66      0.70       190
          S3       0.62      0.56      0.59        18

    accuracy                           0.86      7679
   macro avg       0.77      0.73      0.75      7679
weighted avg       0.86      0.86      0.86      7679

Confusion Matrix:
[[3295  421   12    2]
 [ 579 3140   22    0]
 [   5   56  125    4]
 [   0    0    8   10]]


In [11]:
# Save the Model
import joblib
joblib.dump(model, 'logistic_regression_model.pkl')

['logistic_regression_model.pkl']