<a href="https://colab.research.google.com/github/surajvarma1/Deepfake-audio-detection/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load datasetvoice_dataset.csv
data = pd.read_csv('/content/DATASET-balanced.csv')

# Separate features and labels
X = data.drop(columns=['LABEL'])
y = data['LABEL']

# Encode labels: 'fake' -> 0, 'real' -> 1
y = y.map({'FAKE': 0, 'REAL': 1})

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
import librosa
import numpy as np

def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    features = {
        'mel_freq': np.mean(librosa.feature.melspectrogram(y=y, sr=sr), axis=1),
        'rolloff': np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
        'spectral_bandwidth': np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        'spectral_centroid': np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
        'rms': np.mean(librosa.feature.rms(y=y)),
        'chroma_stft': np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
    }
    feature_vector = np.concatenate([features['mel_freq'], [features['rolloff'], features['spectral_bandwidth'], features['spectral_centroid'], features['rms']], features['chroma_stft']])
    return feature_vector

# Example usage:
# features = extract_features('path_to_audio_file.mp3')
# features = scaler.transform([features])


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Train SVM classifier
svm_classifier = SVC(kernel='linear', probability=True)
svm_classifier.fit(X_train, y_train)

# Evaluate SVM classifier
y_pred_svm = svm_classifier.predict(X_test)
print('SVM Classifier Report:')
print(classification_report(y_test, y_pred_svm))
print('SVM Classifier Accuracy:', accuracy_score(y_test, y_pred_svm))


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the deep learning model
def create_model(input_shape):
    model = Sequential()
    model.add(Dense(256, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train the model
input_shape = X_train.shape[1]
model = create_model(input_shape)
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Deep Learning Model Accuracy: {accuracy}')


In [None]:
def predict(file_path, model, svm_classifier, scaler):
    features = extract_features(file_path)
    features_scaled = scaler.transform([features])

    # SVM prediction
    svm_pred = svm_classifier.predict(features_scaled)
    svm_prob = svm_classifier.predict_proba(features_scaled)

    # Deep learning prediction
    dl_pred = model.predict(features_scaled)

    result = {
        'svm_prediction': 'real' if svm_pred[0] == 1 else 'fake',
        'svm_probability': svm_prob[0],
        'dl_prediction': 'real' if dl_pred[0] > 0.5 else 'fake',
        'dl_probability': dl_pred[0]
    }

    return result

# Example usage:
# result = predict('path_to_audio_file.mp3', model, svm_classifier, scaler)
# print(result)
