#Installing and Importing Required Libraries

In [None]:
pip install librosa

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
import librosa.display
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from google.colab import drive
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import numpy as np
import pandas as pd



#Loading the Preprocessed File and Checking the Labels

In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Loading the CSV file
file_path = '/content/drive/My Drive/deepvoiceguard/dataset.csv'
df = pd.read_csv(file_path)

# Checking for any missing labels
missing_labels = df[df['label'].isna()]

if not missing_labels.empty:
    print("There are files with missing labels:")
    print(missing_labels)
else:
    print("All files are labeled correctly as 'spoof' or 'bona-fide'.")

# Displaying the distribution of the labels
label_distribution = df['label'].value_counts()
print("\nLabel distribution:")
print(label_distribution)

Having the File paths of all files wrt to the CSV file and labeling as SPoof and Bonafide

In [None]:
file_path = []
speaker = []
label = []

for i in range(len(df)):
  path = f"/content/drive/MyDrive/deepvoiceguard/preprocessed_dataset/{df['file'][i]}.wav"
  file_path.append(path)
  speaker.append(df['speaker'][i])
  label.append(df['label'][i])

In [None]:
#To check if the Lists Match the CSV File Format
print(file_path[8],speaker[8],label[8])

#Extracting Features and Creating Spectrograms

 In this project, we are extracting three primary features: MFCC, Chroma, and Zero Crossing Rate.

 MFCCs are a representation of the short-term power spectrum of a sound signal.

 Chroma features, or chromagrams, represent the 12 different pitch classes (semitones) of the musical octave. Each pitch class corresponds to a specific frequency range, regardless of the octave.

 ZCR is the rate at which the audio signal changes sign from positive to negative or vice versa. It is a measure of the frequency content of the signal.


 By combining MFCC, Chroma, and Zero Crossing Rate features, we can capture a comprehensive set of characteristics from the audio signal. MFCCs provide detailed information about the spectral properties, Chroma features capture harmonic content, and ZCR gives insights into the frequency content and noisiness. Together, these features form a robust foundation for training machine learning models to recognize and classify different types of audio signals, whether it be for spoof detection, speaker recognition, or other audio analysis tasks.

In [None]:
# Function for extracting MFCC, Chroma, and ZCR features
def extract_features(file_path, duration=3, sr=22050, n_mfcc=13):
    try:
        # Load audio file
        y, sr = librosa.load(file_path, sr=sr, duration=duration)

        # Extract MFCC features
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs.T, axis=0)

        # Extract Chroma features
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma.T, axis=0)

        # Extract Zero Crossing Rate
        zcr = librosa.feature.zero_crossing_rate(y)
        zcr_mean = np.mean(zcr)

        # Combine features
        features = np.hstack((mfccs_mean, chroma_mean, zcr_mean))

        return features

    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        print(str(e))
        return None

In [None]:
# Extracting the Features and saving the array of features and the Label in a List named "data"
data = []
c=0
for i in range(len(df)) :
    try:
        features = extract_features(file_path[i])
        if features is not None:
            data.append([features,label[i]])
            print(f"file {i} extraction Done....\n")
        else:
            c+=1
            print(f"Failed to extract features for file: {file_path[i]}")

    except Exception as e:
        print(f"Error processing file: {file_path[i]}")
        print(str(e))

In [None]:
# Directory to save spectrogram images
spectrogram_dir = '/content/spectrograms'
os.makedirs(spectrogram_dir, exist_ok=True)

def create_spectrogram(file_path, output_path):
    try:
        # Load audio file
        y, sr = librosa.load(file_path, sr=22050)

        # Compute Mel spectrogram
        S = librosa.feature.melspectrogram(y=y, sr=sr)
        S_dB = librosa.power_to_db(S, ref=np.max)

        # Create a plot
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel-frequency spectrogram')
        plt.tight_layout()

        # Save plot to file
        plt.savefig(output_path)
        plt.close()
    except Exception as e:
        print(f"Error creating spectrogram for {file_path}: {str(e)}")

# Assuming `file_path` is a list of file paths
for i in range(len(df)):
    files_path = file_path[i]
    output_path = os.path.join(spectrogram_dir, f"{i}.png")
    create_spectrogram(files_path, output_path)


#Extracting Features To Create a Dataset and Train Models

In [None]:
# Extracting features and labels
X = np.array([np.hstack(features) for features, _ in data])
y = np.array([label for _, label in data])

# Encoding labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Spliting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

##Random Forest Classifier

In [None]:
# Hyperparameter tuning for Random Forest
rf_param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf_grid = GridSearchCV(RandomForestClassifier(), rf_param_grid, refit=True, verbose=2, cv=5)
rf_grid.fit(X_train, y_train)

print(f"Best parameters for Random Forest: {rf_grid.best_params_}")
rf_best = rf_grid.best_estimator_


In [None]:
# Evaluate Random Forest model
y_pred_rf = rf_best.predict(X_test)
print("Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))


##SVM

In [None]:
# Hyperparameter tuning for SVM
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}

svm_grid = GridSearchCV(SVC(), svm_param_grid, refit=True, verbose=2, cv=5)
svm_grid.fit(X_train, y_train)

print(f"Best parameters for SVM: {svm_grid.best_params_}")
svm_best = svm_grid.best_estimator_

In [None]:
# Evaluate SVM model
y_pred_svm = svm_best.predict(X_test)

print("SVM Classification Report:")
print(classification_report(y_test, y_pred_svm))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))



##Using Models like KNN , XGB and GBM.... to check if they can Give More Accuracy When Compared to SVM and RF

In [None]:
# Function to perform Grid Search and return the best model
def grid_search_model(model, param_grid, X_train, y_train):
    grid_search = GridSearchCV(model, param_grid, cv=5, verbose=2, n_jobs=-1)
    grid_search.fit(X_train, y_train)
    print(f"Best parameters for {model.__class__.__name__}: {grid_search.best_params_}")
    return grid_search.best_estimator_

# KNN
knn_param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance']
}
knn_best = grid_search_model(KNeighborsClassifier(), knn_param_grid, X_train, y_train)

# Gradient Boosting
gbm_param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}
gbm_best = grid_search_model(GradientBoostingClassifier(), gbm_param_grid, X_train, y_train)

# XGBoost
xgb_param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}
xgb_best = grid_search_model(XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'), xgb_param_grid, X_train, y_train)

# Evaluate all models
models = {
    "KNN": knn_best,
    "Gradient Boosting": gbm_best,
    "XGBoost": xgb_best
}

for model_name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"\n{model_name} Classification Report:")
    print(classification_report(y_test, y_pred))
    print(f"{model_name} Accuracy: {accuracy_score(y_test, y_pred)}")
