Import Libraries

In [None]:
import librosa
import numpy as np
import pandas as pd
import os

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def extract_features(audio_path):
    audio, sample_rate = librosa.load(audio_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=20)
    mfccs_processed = np.mean(mfccs.T, axis=0)
    return mfccs_processed

def augment_audio(audio, sample_rate):
    # Add random noise
    noise = np.random.randn(len(audio))
    augmented_audio = audio + 0.005 * noise

    # Pitch shifting
    pitch_shifted = librosa.effects.pitch_shift(augmented_audio, sr=sample_rate, n_steps=4)

    # Time stretching
    time_stretched = librosa.effects.time_stretch(pitch_shifted, rate=0.8)
    
    return time_stretched

def extract_augmented_features(file_path):
    audio, sample_rate = librosa.load(file_path, sr=None)
    augmented_audio = augment_audio(audio, sample_rate)
    mfccs = librosa.feature.mfcc(y=augmented_audio, sr=sample_rate, n_mfcc=20)
    mfccs_processed = np.mean(mfccs.T, axis=0)
    return mfccs_processed

In [None]:
def load_data(base_dir, protocol_path, augment = False):
    data = pd.read_csv(protocol_path, sep=' ', header=None, names=['speaker_id','filename','system_id','null','class_name'])
    features, labels = [], []
    for index, row in data.iterrows():
        file_path = os.path.join(base_dir, row['filename'] + '.flac')
        # print(file_path)
        if augment:
            mfcc = extract_augmented_features(file_path)
        else:
            mfcc = extract_features(file_path)
        features.append(mfcc)
        labels.append(1 if row['class_name'] == 'bonafide' else 0)
    return np.array(features), np.array(labels)

In [None]:
def plot_mfccs(features, labels):
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=features[:, 0], y=features[:, 1], hue=labels, palette='viridis', alpha=0.5)
    plt.title('MFCCs Feature Distribution')
    plt.xlabel('MFCC 1')
    plt.ylabel('MFCC 2')
    plt.legend(title='Label', labels=['Fake', 'Real'])
    plt.show()

def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout

def create_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(40, 44, 1)),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])
    return model


With Augmentation

In [None]:
# Path to the training dataset and protocol
base_dir = './input/asvspoof-2019-dataset/LA/LA/ASVspoof2019_LA_train/flac'
protocol_path = './input/asvspoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'

# Load and prepare the dataset
AugX, Augy = load_data(base_dir, protocol_path, True)

In [None]:
plot_mfccs(AugX,Augy)
# Split the dataset into training and test sets
AugX_train, AugX_test, Augy_train, Augy_test = train_test_split(AugX, Augy, test_size=0.25, random_state=42)

# Create and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(AugX_train, Augy_train)

# Evaluate the model
Aug_predictions = model.predict(AugX_test)
Aug_accuracy = accuracy_score(Augy_test, Aug_predictions)

print(f'Accuracy for logistical Regression Model with Augmented Audio: {Aug_accuracy:.2f}')

plot_confusion_matrix(Augy_test, Aug_predictions)

Without Augmentation

In [None]:
# Load and prepare the dataset without Audio Augmentation
X, y = load_data(base_dir, protocol_path)

Logistic Regression Model

In [None]:
plot_mfccs(X,y)
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Create and train the logistic regression model
model = LogisticRegression(max_iter=1000)

model.fit(X_train, y_train)



# Evaluate the model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy:.2f}')
plot_confusion_matrix(y_test, predictions)

CNN Model