## scaling the features and training ML model

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import librosa
import numpy as np

## loading features and labels extracted previously

In [2]:
import pickle
with open('features.pkl', 'rb') as f:
    final_features = pickle.load(f)
with open('labels.pkl', 'rb') as f:
    labels = pickle.load(f)

In [3]:
scaler = StandardScaler()
mfcc_features_scaled = scaler.fit_transform(final_features)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(final_features, labels, test_size=0.2, random_state=42, stratify=labels)


## training svm model (93% accuracy)

In [5]:
svm_model = SVC(kernel='linear') 
svm_model.fit(X_train, y_train)



In [7]:
# Step 5: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
precision = precision_score(y_test, y_pred, average='binary')  # Assumes binary classification
conf_matrix = confusion_matrix(y_test, y_pred)
print (precision)
print(conf_matrix)

Model Accuracy: 92.59%
0.9264069264069265
[[1208   51]
 [  97  642]]


## loading the pre-written functions

In [8]:
from transformers import Wav2Vec2Model, Wav2Vec2Processor
import torch
import transformers
transformers.logging.set_verbosity_error()

In [9]:
from sklearn.decomposition import PCA

In [10]:
import dill
with open('mfcc_extraction.pkl', 'rb') as f:
    mfcc_extraction = dill.load(f)

with open('wav2vec2_extraction.pkl', 'rb') as f:
    wav2vec2_extraction = dill.load(f)

with open('reduce_features.pkl', 'rb') as f:
    reduce_features = dill.load(f)



In [11]:
def preprocess_checkaudio(path):
    f1 = mfcc_extraction(path)
    f2=wav2vec2_extraction(path)
    f2=reduce_features(f2, max_components=50)
    combine_features=np.hstack((f1, f2))
    ff = scaler.transform(combine_features.reshape(1, -1))  # Convert 1D to 2D
    return ff
    

## testing the model on some human and AI voices

In [12]:
path=r"check_data\fakeaud1.wav"
f= preprocess_checkaudio(path)
pred=svm_model.predict(f)
if pred==1:
    print("This is human voice")
else:
    print("This is fake voice")

This is fake voice


In [13]:
path=r"check_data\fakeaud3.wav"
f= preprocess_checkaudio(path)
pred=svm_model.predict(f)
if pred==1:
    print("This is human voice")
else:
    print("This is fake voice")

This is fake voice


In [14]:
path=r"check_data\checkaudio.wav"
f= preprocess_checkaudio(path)
pred=svm_model.predict(f)
if pred==1:
    print("This is human voice")
else:
    print("This is fake voice")

This is fake voice


In [15]:
path=r"check_data\checkaudio2.wav"
f= preprocess_checkaudio(path)
pred=svm_model.predict(f)
if pred==1:
    print("This is human voice")
else:
    print("This is fake voice")

This is fake voice


## training random-forest ML model(98% accuracy)

In [16]:
rf_model = RandomForestClassifier(n_estimators=50, random_state=42)  # n_estimators is the number of trees in the forest
rf_model.fit(X_train, y_train)

In [21]:
with open('rf_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

In [17]:
y_pred = rf_model.predict(X_test)

# Step 5: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='binary')  # Assumes binary classification
conf_matrix = confusion_matrix(y_test, y_pred)
print(accuracy)
print(precision)
conf_matrix

0.9704704704704705
0.9885057471264368


array([[1251,    8],
       [  51,  688]], dtype=int64)

In [18]:
path1= r"check_data\fakeaud1.wav"
f1= preprocess_checkaudio(path1)
pred= rf_model.predict(f1)
if pred==0:
    print("this is real audio")
else:
    print("this is fake audio")



this is real audio


In [19]:
path1= r"C:\Users\U$ER\Documents\audio processing\check_data\checkaudio.wav"
f1= preprocess_checkaudio(path1)
pred= rf_model.predict(f1)
if pred==0:
    print("this is real audio")
else:
    print("this is fake audio")



this is real audio


In [20]:
path1= r"C:\Users\U$ER\Documents\audio processing\check_data\checkaudio2.wav"
f1= preprocess_checkaudio(path1)
pred= rf_model.predict(f1)
if pred==0:
    print("this is real audio")
else:
    print("this is fake audio")



this is real audio
