In [None]:
#!pip install pydub
#!pip install ffmpeg
!pip show scikit-learn

Name: scikit-learn
Version: 1.2.2
Summary: A set of python modules for machine learning and data mining
Home-page: http://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: /usr/local/lib/python3.10/dist-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: bigframes, fastai, imbalanced-learn, librosa, mlxtend, qudida, sklearn-pandas, yellowbrick


In [165]:
import os
import glob
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib
import pydub
from sklearn.calibration import CalibratedClassifierCV
from tempfile import TemporaryFile

In [None]:
def create_dataset(directory, label):
    X, y = [], []
    audio_files = glob.glob(os.path.join(directory, "*.wav"))
    for audio_path in audio_files:
        mfcc_features = extract_mfcc_features(audio_path)
        if mfcc_features is not None:
            X.append(mfcc_features)
            y.append(label)
        else:
            print(f"Skipping audio file {audio_path}")

    print("Number of samples in", directory, ":", len(X))
    print("Filenames in", directory, ":", [os.path.basename(path) for path in audio_files])
    return X, y

1 .  Get the data into arrays and label them

In [None]:
def extract_mfcc_features(audio_path, n_mfcc=13, n_fft=2048, hop_length=512):
    try:
        audio_data, sr = librosa.load(audio_path, sr=None)
    except Exception as e:
        print(f"Error loading audio file {audio_path}: {e}")
        return None

    mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
    return np.mean(mfccs.T, axis=0)

**2.Train the model**

In [None]:
def train_model(X, y):
    unique_classes = np.unique(y)
    print("Unique classes in y_train:", unique_classes)

    if len(unique_classes) < 2:
        raise ValueError("Atleast 2 set is required to train")

    print("Size of X:", X.shape)
    print("Size of y:", y.shape)

    class_counts = np.bincount(y)
    if np.min(class_counts) < 2:
        print("Combining both classes into one for training")
        X_train, y_train = X, y
        X_test, y_test = None, None
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

        print("Size of X_train:", X_train.shape)
        print("Size of X_test:", X_test.shape)
        print("Size of y_train:", y_train.shape)
        print("Size of y_test:", y_test.shape)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    if X_test is not None:
        X_test_scaled = scaler.transform(X_test)

        svm_classifier = SVC(kernel='linear', random_state=42)

#  To capture the probabilities along with classes
        probs_classifier = CalibratedClassifierCV(svm_classifier)
        probs_classifier.fit(X_train_scaled, y_train)

        y_pred = probs_classifier.predict(X_test_scaled)
        probs = probs_classifier.predict_proba(X_test_scaled)

        accuracy = accuracy_score(y_test, y_pred)
        confusion_mtx = confusion_matrix(y_test, y_pred)

        print("Accuracy:", accuracy)
        print("Confusion Matrix:")
        print(confusion_mtx)

    else:
        print("Insufficient samples for stratified splitting. Combine both classes into one for training.")
        print("Train on all available data.")

        svm_classifier = SVC(kernel='linear', random_state=42)
        probs_classifier = CalibratedClassifierCV(svm_classifier)
        probs_classifier.fit(X_train_scaled, y_train)

    # Save the trained SVM model and scaler

    model_filename = "/content/ad/models/svm/svm_model.pkl"
    scaler_filename = "/content/ad/models/svm/svm_scaler.pkl"
    print("Saving the model at location:", model_filename)
    joblib.dump(probs_classifier, model_filename)
    joblib.dump(scaler, scaler_filename)
    return y_pred, probs

**3.Predict Single audio input**

In [None]:
def analyze_audio(input_audio_path):
    model_filename = "/content/ad/models/svm/svm_model.pkl"
    scaler_filename = "/content/ad/models/svm/svm_scaler.pkl"
    svcprb_classifier = joblib.load(model_filename)
    scaler = joblib.load(scaler_filename)

    if not os.path.exists(input_audio_path):
        print("Error: The specified file does not exist.")
        return
    elif not input_audio_path.lower().endswith(".wav"):
        print("Error: The specified file is not a .wav file.")
        return

    mfcc_features = extract_mfcc_features(input_audio_path)

    if mfcc_features is not None:
        mfcc_features_scaled = scaler.transform(mfcc_features.reshape(1, -1))

        prediction = svcprb_classifier.predict(mfcc_features_scaled)
        probs= svcprb_classifier.predict_proba(mfcc_features_scaled)

        if prediction[0] == 1:
            print("Classified : Human")
            print("Human Probability score: {:.2f}%".format(probs[0][1]*100))
        else:
            print("Classified : AI")
            print("AI Probability score: {:.2f}%".format(probs[0][0]*100))
    else:
        print("Error: Unable to process the input audio.")

In [None]:
#!mkdir /content/ad/auracle_hackathon_2024_data/for/for2sec
#!gsutil -m cp -r gs://auracle_fakeoreal_data/for-2sec-complete/for-2seconds/training /content/ad/auracle_hackathon_2024_data/for/for2sec

In [None]:
human_dir = r"/content/ad/auracle_hackathon_2024_data/for/for2sec/training/real"
ai_dir = r"/content/ad/auracle_hackathon_2024_data/for/for2sec/training/fake"

X_human, y_human = create_dataset(human_dir, label=1)
X_ai, y_ai = create_dataset(ai_dir, label=0)

# Check if each class has at least two samples
if len(X_human) < 2 or len(X_ai) < 2:
    print("Each class should have at least two samples for stratified splitting.")
    print("Combining both classes into one for training.")
    X = np.vstack((X_human, X_ai))
    y = np.hstack((y_human, y_ai))
else:
    X = np.vstack((X_human, X_ai))
    y = np.hstack((y_human, y_ai))


Number of samples in /content/ad/auracle_hackathon_2024_data/for/for2sec/training/real : 6978
Filenames in /content/ad/auracle_hackathon_2024_data/for/for2sec/training/real : ['file21218.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file2127.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file1094.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file15498.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file8943.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file28239.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file7182.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file27936.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file7893.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file5340.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file5633.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file968.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', 'file21629.wav_16k.wav_norm.wav_mono.wav_silence.wav_2sec.wav', '

In [None]:
train_model(X, y)

Unique classes in y_train: [0 1]
Size of X: (13956, 13)
Size of y: (13956,)
Size of X_train: (11164, 13)
Size of X_test: (2792, 13)
Size of y_train: (11164,)
Size of y_test: (2792,)
Accuracy: 0.791189111747851
Confusion Matrix:
[[1124  272]
 [ 311 1085]]
Saving the model at location: /content/ad/models/svm/svm_model.pkl


(array([1, 0, 1, ..., 0, 1, 1]),
 array([[0.04370632, 0.95629368],
        [0.54737542, 0.45262458],
        [0.44615389, 0.55384611],
        ...,
        [0.5729446 , 0.4270554 ],
        [0.19014824, 0.80985176],
        [0.47454614, 0.52545386]]))

**MP3 to WAV for Model scoring**

In [163]:
!gsutil -m cp -r gs://auracle_hackathon_2024_data/test_human/wavs_human_LJ017-0276.mp3 /content/ad/auracle_hackathon_2024_data/test_human
example_file_path = '/content/ad/auracle_hackathon_2024_data/test_human/wavs_human_LJ017-0276.mp3'


Copying gs://auracle_hackathon_2024_data/test_human/wavs_human_LJ017-0276.mp3...
/ [0/1 files][    0.0 B/225.8 KiB]   0% Done                                    / [1/1 files][225.8 KiB/225.8 KiB] 100% Done                                    
Operation completed over 1 objects/225.8 KiB.                                    


In [None]:
wav_output_file = '/content/ad/staging/mp3_wav_svm/converted.wav'

In [None]:
from pydub import AudioSegment
from pydub.utils import make_chunks

sound = AudioSegment.from_file(example_file_path)
sound.export(wav_output_file, format="wav")

myaudio = AudioSegment.from_file(wav_output_file , "wav")
chunk_length_ms = 2000 # pydub calculates in millisec
chunks = make_chunks(myaudio, chunk_length_ms) #Make chunks of two sec
exported_chunk = ''

#Export all of the individual chunks as wav files
for i, chunk in enumerate(chunks):
    chunk_name = '/content/ad/staging/mp3_wav_svm/' + "chunk{0}.wav".format(i)
    if len(chunks) == 1 :
      exported_chunk=chunk_name
      break
    elif len(chunks) > 1 and i == 2:
      exported_chunk=chunk_name
      chunk.export(chunk_name, format="wav")
    elif i > 2:
      break

In [None]:
analyze_audio(exported_chunk)

Classified : AI
AI Probability score: 85.13%
