In [3]:
import numpy as np
import os
import pickle
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Function to extract MFCC features from audio data
def extract_features(audio_data, sample_rate=22050, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

In [5]:
# Function to load audio data and extract features
def load_audio_data(directory):
    X = []
    y = []

    for speaker_dir in os.listdir(directory):
        speaker_path = os.path.join(directory, speaker_dir)
        if os.path.isdir(speaker_path):
            for audio_file in os.listdir(speaker_path):
                audio_path = os.path.join(speaker_path, audio_file)
                audio_data, sample_rate = librosa.load(audio_path, sr=None)
                features = extract_features(audio_data, sample_rate)
                X.append(features)
                y.append(speaker_dir)

    return np.array(X), np.array(y)

In [6]:
# Load audio data
data_directory = "/content/drive/MyDrive/Colab Notebooks/DatasetSpeaker/speeches"
X, y = load_audio_data(data_directory)

In [7]:
# Split data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.1, random_state=42)

In [8]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [9]:
train_accuracy = accuracy_score(y_train, model.predict(X_train))
valid_accuracy = accuracy_score(y_valid, model.predict(X_valid))
print("Training accuracy:", train_accuracy)
print("Validation accuracy:", valid_accuracy)

Training accuracy: 1.0
Validation accuracy: 0.975609756097561


In [10]:
with open("speaker_identification_model.pkl", "wb") as f:
    pickle.dump(model, f)

In [None]:
import librosa
import numpy as np
import pickle

# Function to extract MFCC features from audio data
def extract_features(audio_data, sample_rate=22050, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

# Function to load the external audio file, extract features, and predict the speaker
def predict_speaker(audio_file, model_path="speaker_identification_model.pkl"):
    # Load the trained model
    with open(model_path, "rb") as f:
        model = pickle.load(f)

    # Load the audio file and extract features
    audio_data, sample_rate = librosa.load(audio_file, sr=None)
    features = extract_features(audio_data, sample_rate)

    # Predict the speaker
    predicted_speaker = model.predict([features])[0]

    return predicted_speaker

# Test with an external audio file
audio_file_path = "/home/rahul/Desktop/BeatBot/uploads/lewis22.wav"  # Update with the path to your audio file
predicted_speaker = predict_speaker(audio_file_path)
print("Predicted speaker:", predicted_speaker)


Predicted speaker: Lewis_Capaldi
