In [None]:
pip install pyunicorn

In [None]:
pip install netCDF4

In [None]:
pip install audiomentations

**Recurrence Plot**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pyunicorn.timeseries.recurrence_plot import RecurrencePlot
from scipy.io import wavfile

# Read the wav file
sample_rate, audio_data = wavfile.read('03.wav')

# Convert to mono if necessary
if audio_data.ndim > 1:
    audio_data = np.mean(audio_data, axis=1)

# Define the segment size
segment_size = 600

# Loop over the audio data in segment_size increments
for i in range(0, len(audio_data), segment_size):
    # Extract the current segment
    audio_data_segment = audio_data[i:i+segment_size]

    # Create a recurrence plot with a distance threshold of 0.1
    rp = RecurrencePlot(audio_data_segment, dim=1, tau=10, threshold=0.1, normalize=True)

    # Plot the recurrence plot using imshow
    plt.imshow(rp.recurrence_matrix(), cmap='binary', origin='lower')
    plt.title(f'Recurrence plot for segment {i+1}')
    plt.xlabel('Time (samples)')
    plt.ylabel('Time (samples)')
    plt.show()

**Convo 2D MODEL**

In [None]:
import zipfile

# Extract the zip file
with zipfile.ZipFile('dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content')

In [None]:
import numpy as np
import os
from pyunicorn.timeseries.recurrence_plot import RecurrencePlot
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

def extract_recurrence_plot_features(audio_data, segment_size):
    features = []
    num_segments = len(audio_data) // segment_size
    for i in range(num_segments):
        start = i * segment_size
        end = start + segment_size
        audio_data_segment = audio_data[start:end]

        # Perform recurrence plot feature extraction
        # Example using pyunicorn recurrence plot
        rp = RecurrencePlot(audio_data_segment, dim=1, tau=10, threshold=0.1, normalize=True)
        recurrence_matrix = rp.recurrence_matrix()

        features.append(recurrence_matrix)

    return features

# Function to load audio data and extract recurrence plot features
def load_data_and_extract_features(data_dir, segment_size):
    features = []
    labels = []
    speakers = os.listdir(data_dir)
    for speaker in speakers:
        speaker_dir = os.path.join(data_dir, speaker)
        if os.path.isdir(speaker_dir):
            words = os.listdir(speaker_dir)
            for word in words:
                word_dir = os.path.join(speaker_dir, word)
                if os.path.isdir(word_dir):
                    audio_files = os.listdir(word_dir)
                    for audio_file in audio_files:
                        if audio_file.endswith('.wav'):
                            audio_path = os.path.join(word_dir, audio_file)
                            sample_rate, audio_data = wavfile.read(audio_path)
                            if audio_data.ndim > 1:
                                audio_data = np.mean(audio_data, axis=1)
                            extracted_features = extract_recurrence_plot_features(audio_data, segment_size)
                            features.extend(extracted_features)
                            labels.extend([speaker] * len(extracted_features))
    return np.array(features), np.array(labels)

# Set the directory containing the audio data
data_dir = '/content/dataset'

# Set the segment size
segment_size = 600

# Load the audio data and extract recurrence plot features
features, labels = load_data_and_extract_features(data_dir, segment_size)

# Check if features are extracted
if len(features) > 0:
    # Get the size of the first feature
    feature_shape = features[0].shape

    # Check if all features have the same shape
    if all(feature.shape == feature_shape for feature in features):
        # Convert features to numpy array
        features = np.array(features)

        # Preprocess the features (e.g., normalization)
        # ...

        # Encode labels as categorical
        label_names = np.unique(labels)
        num_classes = len(label_names)
        label_mapping = {label: i for i, label in enumerate(label_names)}
        labels = np.array([label_mapping[label] for label in labels])
        labels = to_categorical(labels, num_classes=num_classes)

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

        # Reshape the features for 2D convolution
        input_shape = (feature_shape[0], feature_shape[1], 1)
        X_train = X_train.reshape((*X_train.shape, 1))
        X_test = X_test.reshape((*X_test.shape, 1))

        # Define the convolutional neural network model
        model = Sequential()
        model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dense(num_classes, activation='softmax'))

        # Compile the model
        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

        # Train the model
        model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

        # Evaluate the model
        loss, accuracy = model.evaluate(X_test, y_test)
        print(f'Test Loss: {loss:.4f}')
        print(f'Test Accuracy: {accuracy:.4f}')

        # Save the model
        model.save('speaker_recognition_model.h5')
    else:
        print("Features have different shapes. Please ensure all features have the same shape.")
else:
    print("No features extracted from the audio data.")


**Delete Folder**

In [None]:
import shutil

# Specify the path of the folder to be deleted
folder_path = '/content/dataset'

# Use shutil.rmtree() to delete the folder and its contents
shutil.rmtree(folder_path)

**ALEX NET**

In [None]:
import numpy as np
import os
from pyunicorn.timeseries.recurrence_plot import RecurrencePlot
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

def extract_recurrence_plot_features(audio_data, segment_size):
    features = []
    num_segments = len(audio_data) // segment_size
    for i in range(num_segments):
        start = i * segment_size
        end = start + segment_size
        audio_data_segment = audio_data[start:end]

        # Perform recurrence plot feature extraction
        # Example using pyunicorn recurrence plot
        rp = RecurrencePlot(audio_data_segment, dim=1, tau=10, threshold=0.1, normalize=True)
        recurrence_matrix = rp.recurrence_matrix()

        features.append(recurrence_matrix)

    return features

# Function to load audio data and extract recurrence plot features
def load_data_and_extract_features(data_dir, segment_size):
    features = []
    labels = []
    speakers = os.listdir(data_dir)
    for speaker in speakers:
        speaker_dir = os.path.join(data_dir, speaker)
        if os.path.isdir(speaker_dir):
            words = os.listdir(speaker_dir)
            for word in words:
                word_dir = os.path.join(speaker_dir, word)
                if os.path.isdir(word_dir):
                    audio_files = os.listdir(word_dir)
                    for audio_file in audio_files:
                        if audio_file.endswith('.wav'):
                            audio_path = os.path.join(word_dir, audio_file)
                            sample_rate, audio_data = wavfile.read(audio_path)
                            if audio_data.ndim > 1:
                                audio_data = np.mean(audio_data, axis=1)
                            extracted_features = extract_recurrence_plot_features(audio_data, segment_size)
                            features.extend(extracted_features)
                            labels.extend([speaker] * len(extracted_features))
    return np.array(features), np.array(labels)

# Set the directory containing the audio data
data_dir = '/content/dataset'

# Set the segment size
segment_size = 600

# Load the audio data and extract recurrence plot features
features, labels = load_data_and_extract_features(data_dir, segment_size)

# Check if features are extracted
if len(features) > 0:
    # Get the shape of the first feature
    feature_shape = features[0].shape

    # Check if all features have the same shape
    if all(feature.shape == feature_shape for feature in features):
        # Convert features to numpy array
        features = np.array(features)

        # Preprocess the features (e.g., normalization)
        # ...

        # Encode labels as categorical
        label_names = np.unique(labels)
        num_classes = len(label_names)
        label_mapping = {label: i for i, label in enumerate(label_names)}
        labels = np.array([label_mapping[label] for label in labels])
        labels = to_categorical(labels, num_classes=num_classes)

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

        # Reshape the features for 2D convolution
        input_shape = (feature_shape[0], feature_shape[1], 1)
        X_train = X_train.reshape((*X_train.shape, 1))
        X_test = X_test.reshape((*X_test.shape, 1))

        # Define the AlexNet architecture
        model = Sequential()
        model.add(Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation='relu', input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
        model.add(Conv2D(256, kernel_size=(5, 5), padding='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
        model.add(Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'))
        model.add(Conv2D(384, kernel_size=(3, 3), padding='same', activation='relu'))
        model.add(Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
        model.add(Flatten())
        model.add(Dense(4096, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(4096, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(num_classes, activation='softmax'))

        # Compile the model
        model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

        # Train the model
        model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

        # Evaluate the model
        loss, accuracy = model.evaluate(X_test, y_test)
        print(f'Test Loss: {loss:.4f}')
        print(f'Test Accuracy: {accuracy:.4f}')

        # Save the model
        model.save('speaker_recognition_model.h5')
    else:
        print("Features have different shapes. Please ensure all features have the same shape.")
else:
    print("No features extracted from the audio data.")
