In [18]:
import pandas as pd
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,classification_report, precision_score, recall_score, f1_score, confusion_matrix

In [19]:
data = pd.read_csv('/content/vocal_gender_features_new.csv')

In [20]:
data.head(5)

Unnamed: 0,mean_spectral_centroid,std_spectral_centroid,mean_spectral_bandwidth,std_spectral_bandwidth,mean_spectral_contrast,mean_spectral_flatness,mean_spectral_rolloff,zero_crossing_rate,rms_energy,mean_pitch,...,mfcc_9_std,mfcc_10_mean,mfcc_10_std,mfcc_11_mean,mfcc_11_std,mfcc_12_mean,mfcc_12_std,mfcc_13_mean,mfcc_13_std,label
0,2247.331739,1158.537748,1870.415462,370.405241,21.44071,0.036879,4419.438073,0.169241,0.082552,1592.1033,...,21.73624,2.303085,8.983318,-17.410305,9.115154,0.301804,10.452693,-3.080832,10.146248,0
1,1790.719889,996.554825,1757.898617,410.710318,21.513383,0.018936,3635.742188,0.108068,0.055477,1112.6351,...,13.937135,-0.953942,10.831742,-0.088775,10.29769,-7.281142,10.926579,-0.450248,8.489134,0
2,1977.923363,1010.148667,1747.099555,461.458379,20.476283,0.032616,3873.291016,0.144633,0.060388,1557.5225,...,14.900779,0.260098,14.031009,-0.42067,10.810292,-0.199829,11.986182,3.372986,9.285437,0
3,2037.76555,1311.44063,1745.224852,419.056484,19.516014,0.028482,3826.584507,0.148933,0.029559,1481.0868,...,15.957924,-1.486122,14.461978,-8.479608,12.550333,3.997028,9.912608,-6.946966,10.574301,0
4,1739.383829,1092.623322,1623.135563,450.088465,20.26062,0.025737,3452.903892,0.117961,0.069078,1424.5352,...,15.105562,-3.366364,13.943447,-6.561539,11.944948,1.410639,13.110976,-3.252258,9.866687,0


In [21]:
data.shape

(16148, 44)

In [22]:
data = data.drop_duplicates()

In [23]:
data = data.loc[:, ~data.columns.duplicated()]
print(f"\nDataset shape after removing duplicate columns: {data.shape}")


Dataset shape after removing duplicate columns: (15070, 44)


In [24]:
missing_values = data.isnull().sum()
print("\nMissing Values:\n", missing_values)
data.fillna(data.mean(numeric_only=True), inplace=True)


Missing Values:
 mean_spectral_centroid     0
std_spectral_centroid      0
mean_spectral_bandwidth    0
std_spectral_bandwidth     0
mean_spectral_contrast     0
mean_spectral_flatness     0
mean_spectral_rolloff      0
zero_crossing_rate         0
rms_energy                 0
mean_pitch                 0
min_pitch                  0
max_pitch                  0
std_pitch                  0
spectral_skew              0
spectral_kurtosis          0
energy_entropy             0
log_energy                 0
mfcc_1_mean                0
mfcc_1_std                 0
mfcc_2_mean                0
mfcc_2_std                 0
mfcc_3_mean                0
mfcc_3_std                 0
mfcc_4_mean                0
mfcc_4_std                 0
mfcc_5_mean                0
mfcc_5_std                 0
mfcc_6_mean                0
mfcc_6_std                 0
mfcc_7_mean                0
mfcc_7_std                 0
mfcc_8_mean                0
mfcc_8_std                 0
mfcc_9_mean              

In [26]:
import numpy as np

def remove_outliers(data, cols):
    data_cleaned = data.copy()  # Keep original dataset intact
    for col in cols:
        Q1 = data_cleaned[col].quantile(0.25)
        Q3 = data_cleaned[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        data_cleaned = data_cleaned[(data_cleaned[col] >= lower_bound) & (data_cleaned[col] <= upper_bound)]
    return data_cleaned

In [27]:
data.shape


(15070, 44)

In [47]:
def extract_features(file_path, mfcc=True, chroma=True, mel=True):
    y, sr = librosa.load(file_path, mono=True)
    features = []
    if mfcc:
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13), axis=1)
        features.extend(mfccs)
    if chroma:
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1)
        features.extend(chroma)
    if mel:
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr), axis=1)
        features.extend(mel)
    return features

In [48]:
female ="/content/female.wav"
male ="/content/male1.wav.wav"

In [49]:
# Function to load and preprocess the dataset
def load_and_preprocess_data(female, male):
    features = []
    labels = []

    # Load scream data
    import os
    for filename in os.listdir(female):
        path = os.path.join(feamale, filename)
        feature = extract_features(path)
        features.append(feature)
        labels.append(1)  # Use integer labels, e.g., 1 for scream

    # Load non-scream data
    for filename in os.listdir(male):
        path = os.path.join(male, filename)
        feature = extract_features(path)
        features.append(feature)
        labels.append(0)  # Use integer labels, e.g., 0 for non-scream

    X = np.array(features)
    y = np.array(labels)

    # Encode labels using LabelEncoder
    le = LabelEncoder()
    y = le.fit_transform(y)

    # Convert to one-hot encoding
    y = to_categorical(y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    return X_train, X_test, y_train, y_test, le

In [51]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f'Test loss: {loss:.4f}')
    print(f'Test accuracy: {accuracy:.4f}')

In [52]:
from sklearn.preprocessing import StandardScaler

# Select numerical columns
numerical_cols = data.select_dtypes(include=[np.number]).columns

# Standardize the numerical features
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

In [53]:
x = data.drop(columns=['label'])
y = data['label']

In [54]:
data.columns

Index(['mean_spectral_centroid', 'std_spectral_centroid',
       'mean_spectral_bandwidth', 'std_spectral_bandwidth',
       'mean_spectral_contrast', 'mean_spectral_flatness',
       'mean_spectral_rolloff', 'zero_crossing_rate', 'rms_energy',
       'mean_pitch', 'min_pitch', 'max_pitch', 'std_pitch', 'spectral_skew',
       'spectral_kurtosis', 'energy_entropy', 'log_energy', 'mfcc_1_mean',
       'mfcc_1_std', 'mfcc_2_mean', 'mfcc_2_std', 'mfcc_3_mean', 'mfcc_3_std',
       'mfcc_4_mean', 'mfcc_4_std', 'mfcc_5_mean', 'mfcc_5_std', 'mfcc_6_mean',
       'mfcc_6_std', 'mfcc_7_mean', 'mfcc_7_std', 'mfcc_8_mean', 'mfcc_8_std',
       'mfcc_9_mean', 'mfcc_9_std', 'mfcc_10_mean', 'mfcc_10_std',
       'mfcc_11_mean', 'mfcc_11_std', 'mfcc_12_mean', 'mfcc_12_std',
       'mfcc_13_mean', 'mfcc_13_std', 'label'],
      dtype='object')

In [55]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=42)

In [56]:
# If labels are numbers (e.g., 0.0, 1.0) but should be categories:
data['label'] = data['label'].astype(str)  # Convert to string

# Apply Label Encoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(data['label'])  # Convert categorical labels to integers

print("Encoded labels:", dict(enumerate(le.classes_)))  # Show mapping

Encoded labels: {0: '-1.371861584601527', 1: '0.7289365131471784'}


In [57]:
from sklearn.preprocessing import LabelEncoder

# Convert target column to categorical labels
le = LabelEncoder()
y = le.fit_transform(y)  # This will convert text labels to numbers (e.g., 'Male' -> 0, 'Female' -> 1, etc.)

print("Unique target values after encoding:", set(y))

Unique target values after encoding: {0, 1}


In [58]:

print("final unique values in y:", set(y))


final unique values in y: {0, 1}


In [60]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

models = {
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}

# Train and evaluate models
for name, model in models.items():
    scores = cross_val_score(model, x, y, cv=cv, scoring='accuracy')
    print(f"{name} Accuracy: {scores.mean():.4f}")

Logistic Regression Accuracy: 0.9921
K-Nearest Neighbors Accuracy: 0.9989
Support Vector Machine Accuracy: 0.9994
Naive Bayes Accuracy: 0.9270
Decision Tree Accuracy: 0.9564
Random Forest Accuracy: 0.9947


In [61]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Split dataset
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

evaluation_results = {}

for name, model in models.items():
    model.fit(x_train, y_train)  # Train model
    y_pred = model.predict(x_test)  # Predict labels

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    evaluation_results[name] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-score": f1
    }

# Print results
for name, metrics in evaluation_results.items():
    print(f"{name}: {metrics}")

Logistic Regression: {'Accuracy': 0.9927007299270073, 'Precision': 0.9927007299270073, 'Recall': 0.9927007299270073, 'F1-score': 0.9927007299270073}
K-Nearest Neighbors: {'Accuracy': 0.9986728599867286, 'Precision': 0.9986732348959745, 'Recall': 0.9986728599867286, 'F1-score': 0.998672562000144}
Support Vector Machine: {'Accuracy': 0.9993364299933643, 'Precision': 0.999337103668498, 'Recall': 0.9993364299933643, 'F1-score': 0.999336281000072}
Naive Bayes: {'Accuracy': 0.9366290643662907, 'Precision': 0.9393723721614109, 'Recall': 0.9366290643662907, 'F1-score': 0.9371779611752948}
Decision Tree: {'Accuracy': 0.9552090245520902, 'Precision': 0.9551992105058291, 'Recall': 0.9552090245520902, 'F1-score': 0.9552040025975863}
Random Forest: {'Accuracy': 0.9950232249502322, 'Precision': 0.9950401468738806, 'Recall': 0.9950232249502322, 'F1-score': 0.9950170064861044}


In [62]:
best_model = max(evaluation_results, key=lambda k: evaluation_results[k]["Accuracy"])
print(f"Best Model: {best_model} with Accuracy: {evaluation_results[best_model]['Accuracy']:.4f}")

Best Model: Support Vector Machine with Accuracy: 0.9993


In [63]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid for SVM
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto'],
}

# Initialize GridSearchCV with SVM model
grid_search = GridSearchCV(estimator=SVC(), param_grid=param_grid, cv=4, scoring='accuracy')

# Fit the grid search
grid_search.fit(x_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

# Train the model with best parameters
svm_model = grid_search.best_estimator_

Best parameters found:  {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}


In [64]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Make predictions
y_pred = svm_model.predict(x_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0


In [82]:
# Ensure your target variable 'y' is correctly defined
y = data['label']  # Replace 'gender' with the actual column name of your target variable

# Check the distribution of the target classes
class_distribution = y.value_counts()

print(class_distribution)

label
Male      1
Female    1
Name: count, dtype: int64


In [67]:
svm_model = SVC(probability=True)
class_weight='balanced'

In [68]:
from sklearn.model_selection import train_test_split

# Assuming 'gender' is the target column and the rest are features
X = data.drop('label', axis=1)  # Remove the target column from features
y = data['label']  # Target column

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from imblearn.over_sampling import SMOTE

# Apply SMOTE to balance the dataset
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Check the new class distribution
print(y_resampled.value_counts())



label
-1.371861584601527    9841
0.7289365131471784    9841
Name: count, dtype: int64


In [69]:
print(y_resampled.value_counts())

label
-1.371861584601527    9841
0.7289365131471784    9841
Name: count, dtype: int64


In [75]:
import os
import pandas as pd

# Folder where your audio files are stored
audio_folder = "/content/voice dataset"  # Change this to your actual folder path

# Get list of all audio files
audio_files = [f for f in os.listdir(audio_folder) if f.endswith(".wav")]

# Create a DataFrame with file paths and manual labels
data = pd.DataFrame({
    "file_path": [os.path.join(audio_folder, f) for f in audio_files],
    "label": ["Male", "Female",]  # Replace this with actual labels
})

# Save to CSV for future use
data.to_csv("updated_dataset.csv", index=False)

In [78]:
import numpy as np
import pandas as pd
import librosa
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# Function to extract 43 features from an audio file
def extract_features(audio_path):
    y, sr = librosa.load(audio_path, sr=None)
    features = np.hstack([
        np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=43), axis=1),  # 13 MFCCs
        np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1),  # 12 Chroma
        np.mean(librosa.feature.melspectrogram(y=y, sr=sr), axis=1)[:10],  # 10 Mel Spectrogram
        np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),  # 1 Spectral Centroid
        np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),  # 1 Spectral Rolloff
        np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),  # 1 Spectral Bandwidth
        np.mean(librosa.feature.zero_crossing_rate(y)),  # 1 Zero Crossing Rate
        np.mean(librosa.feature.rms(y=y)),  # 1 Root Mean Square Energy
        np.var(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=3), axis=1)  # 3 MFCC Variances
    ])
    return features

# Load dataset (Make sure it has correct file paths and labels)
  # Update with the actual dataset file

# Extract features for all audio files
x = np.array([extract_features(file) for file in data['file_path']])  # file_path = column name with audio file paths
y = data['label'].values  # Gender labels (Male/Female/Other)

# Train-Test Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Train SVM Model
model = SVC(kernel='linear')
model.fit(x, y)

# Save the trained model
joblib.dump(model, 'modelfile.pkl')

# Download the model file for VS Code
from google.colab import files
files.download('modelfile.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>