In [37]:
# load some library needed and functions

import os
import json
import sounddevice as sd
import soundfile as sf
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import lbg

# Definition of Variables


In [38]:
# Audio constants
duration_s = 2
fs = 44100
## Change variables for optimization 
# MFCC
mfcc = [20]
cb_depths = [4]
frame_lenght = 1024
hop_lenght = 512
with open('results.txt', 'a') as file:
    file.write(f"frame lenght\t{frame_lenght}\n")
    file.write(f"hop lenght\t{hop_lenght}\n")
    file.write("\n")

In [39]:
def record_audio(seconds, user_name, entry):
    fs = 44100  # Sampling frequency
    duration = seconds  # Duration of recording in seconds
    
    
    print("Recording...")

    
    # Record audio from the microphone
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype=np.float32)
    sd.wait()

    print("Finished recording.")

    if user_name:
        filename = f"voice_{user_name}_{entry}.wav"
        folder_path = os.path.join(os.getcwd(), 'Users_samples', user_name, filename)
        # Save audio to file
        sf.write(folder_path, audio, fs, subtype='PCM_24')
        print(f"Audio recorded and saved as {folder_path}")
    
    
    return audio, fs

def extract_mfcc_file(audio_file, num_mfcc, frame_length, hop_length):
    # Load audio file
    y, sr = librosa.load(audio_file)

    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=num_mfcc, n_fft=frame_length, hop_length=hop_length)

    return mfccs

def extract_mfcc(audio_waveform, sr, num_mfcc, frame_length, hop_length):
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio_waveform, sr=sr, n_mfcc=num_mfcc, n_fft=frame_length, hop_length=hop_length)

    return mfccs

def calculate_distortion(sample_mfcc, codebook):
    # Initialize total distortion
    total_distortion = 0.0
    
    # Assign each sample MFCC to its nearest centroid in the codebook
    nearest_centroids = np.argmin(np.linalg.norm(sample_mfcc[:, None, :] - codebook, axis=2), axis=1)
    
    # Calculate the Euclidean distance between each sample MFCC and its assigned centroid
    for i, centroid_index in enumerate(nearest_centroids):
        centroid = codebook[centroid_index]
        distortion = np.linalg.norm(sample_mfcc[i] - centroid)
        total_distortion += distortion
        # for average distortion, divide the total by sample_mfcc.shape[0]
    return total_distortion

def create_user_folder(user_name):
    # Create folder path
    user_folder='Users_samples'
    folder_path = os.path.join(os.getcwd(), user_folder, user_name)

    # Check if folder exists
    if not os.path.exists(folder_path):
        # Create folder if it doesn't exist
        os.makedirs(folder_path)
        print(f"Folder '{user_name}' created successfully.")
    else:
        print(f"Folder '{user_name}' already exists.")
    
    file_count = len([name for name in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, name))])
    return file_count, folder_path

def scale_number(unscaled, to_min, to_max, from_min, from_max):
    return (to_max-to_min)*(unscaled-from_min)/(from_max-from_min)+to_min

def scale_list(l, to_min, to_max):
    return [scale_number(i, to_min, to_max, min(l), max(l)) for i in l]


# Add users to database and create filesystem

In [40]:
# user_name = input("Enter your name: ")
# entry, folder = create_user_folder(user_name)
# c=0
# while c < 5:
#     input()
#     audio_file, fs = record_audio(duration_s, user_name, entry+1)
#     audio = audio_file.reshape(-1)
#     c+=1
#     entry+=1

In [41]:
for num_mfcc in mfcc:
    for cb_depth in cb_depths:
        # Create codebooks
        user_folder='Users_samples'
        users_folder= os.path.join(os.getcwd(), user_folder)

        codebook_folder_rel='Codebooks'
        codebook_folder= os.path.join(os.getcwd(), codebook_folder_rel)

        folder_names = [name for name in os.listdir(users_folder) if os.path.isdir(os.path.join(users_folder, name))]


        audio_per_user = {}
        mfccs_per_user = {}
        codebook_per_user = {}

        for folder_name in folder_names:
            folder_path = os.path.join(users_folder, folder_name)
            # Check if folder exists
            if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
                print(f"Folder '{folder_name}' does not exist or is not a directory. Skipping.")
                continue

            # Read audio files in the folder
            concatenated_audio = []
            for filename in os.listdir(folder_path):
                file_path = os.path.join(folder_path, filename)

                # Check if file is an audio file
                if filename.endswith('.wav') or filename.endswith('.mp3'):
                    print(f"Reading audio file: {filename}")
                    audio, sr = librosa.load(file_path, sr=fs)
                    normalized_audio = audio / np.max(np.abs(audio))
                    concatenated_audio.extend(audio)


            audio_per_user[folder_name] = concatenated_audio
        
        for user in audio_per_user:
            num_mfcc = num_mfcc  # Number of MFCC coefficients, global variable
            frame_length = frame_lenght  # Frame length for STFT
            hop_length = hop_lenght  # Hop length for STFT
            audio_user=np.array(audio_per_user[user])
            
            mfccs_per_user[user] = extract_mfcc(audio_user, fs, num_mfcc, frame_length, hop_length)
            cb, cb_abs_w, cb_rel_w = lbg.generate_codebook(mfccs_per_user[user].T, cb_depth)
            codebook_per_user[user] = cb
            
        with open(os.path.join(codebook_folder, 'Codebook'), 'w') as f:
            json.dump(codebook_per_user, f)  
            
        print("Finished creating Codebook.")    




        #Read codebooks and identify speaker
        sample_filenames = [['Katia_0.wav', 'Katia_1.wav', 'Katia_2.wav', 'Katia_3.wav', 'Katia_4.wav', 'Katia_5.wav', 'Katia_6.wav', 'Katia_7.wav', 'Katia_8.wav', 'Katia_9.wav', 'Katia_10.wav', 'Katia_11.wav', 'Katia_12.wav', 'Katia_13.wav', 'Katia_14.wav', 'Katia_15.wav', 'Katia_16.wav', 'Katia_17.wav', 'Katia_18.wav', 'Katia_19.wav'], ['Facundo_0.wav', 'Facundo_1.wav', 'Facundo_2.wav', 'Facundo_3.wav', 'Facundo_4.wav', 'Facundo_5.wav', 'Facundo_6.wav', 'Facundo_7.wav', 'Facundo_8.wav', 'Facundo_9.wav'], ['Cesar_0.wav', 'Cesar_1.wav', 'Cesar_2.wav', 'Cesar_3.wav', 'Cesar_4.wav', 'Cesar_5.wav', 'Cesar_6.wav', 'Cesar_7.wav', 'Cesar_8.wav', 'Cesar_9.wav', 'Cesar_10.wav', 'Cesar_11.wav', 'Cesar_12.wav', 'Cesar_13.wav', 'Cesar_14.wav', 'Cesar_15.wav', 'Cesar_16.wav', 'Cesar_17.wav', 'Cesar_18.wav', 'Cesar_19.wav'], ['Alanny_0.wav', 'Alanny_1.wav', 'Alanny_2.wav', 'Alanny_3.wav', 'Alanny_4.wav', 'Alanny_5.wav', 'Alanny_6.wav', 'Alanny_7.wav', 'Alanny_8.wav', 'Alanny_9.wav']]
        users = ['Katia', 'Facundo', 'Cesar', 'Alanny']
        sample_folder= os.path.join(os.getcwd(), 'Sample')

        # entry = input("Press a key to enter a voice sample for identification: ")
        # audio_file, fs = record_audio(2, "", "")
        
        parameter_accuracy = 0
        parameter_thresholds = []
        cb_users_min_distortions = []
        out_cb_min_distortions = []
        c=0 #Counter for

        print(f"mfcc\t{num_mfcc}")
        print(f"codebook depth\t{cb_depth}")
        print("----------------")
        print("----------------")
        with open('results.txt', 'a') as file:
            file.write(f"mfcc\t{num_mfcc}\n")
            file.write(f"codebook depth\t{cb_depth}\n")
            file.write("\n")
        with open('AllResults.txt', 'a') as file:
            file.write(f"mfcc\t{num_mfcc}\n")
            file.write(f"codebook depth\t{cb_depth}\n")
            file.write("\n")
        
        for user_samples in sample_filenames: #for usuario
            user1 = users[c]
            user_accuracy = 0
            
            for filename in user_samples: #for archivo del usuario
                sample_path= os.path.join(sample_folder, filename)
                audio_file, sr = librosa.load(sample_path, sr=fs)
                print(filename)
                with open('AllResults.txt', 'a') as file:
                    file.write(f'{filename}\n')

                # entry = input("Press a key to enter a voice sample for identification: ")
                # audio_file, fs = record_audio(2, "", "")

                audio = audio_file.reshape(-1)

                num_mfcc = num_mfcc  # Number of MFCC coefficients
                frame_length = frame_lenght  # Frame length for STFT
                hop_length = hop_lenght  # Hop length for STFT

                mfccs = extract_mfcc(audio, fs, num_mfcc, frame_length, hop_length)

                print("Loading Codebook ... ")

                with open(os.path.join(codebook_folder, 'Codebook'), 'r') as f:
                        stored_codebook = json.load(f)

                # calculate distortions

                print("User\tDistortion")
                print("----------------")
                user_distortion = {}
                for user, cb in stored_codebook.items():
                    distortion = calculate_distortion(mfccs.T, cb)
                    # print(f"{user}\t{distortion}")
                    # with open('AllResults.txt', 'a') as file:
                    #     file.write(f"{user}\t{distortion}\n")
                    if user not in user_distortion:
                        user_distortion[user] = distortion
                min_key = min(user_distortion, key=user_distortion.get)
                with open('AllResults.txt', 'a') as file:
                    file.write(f"{min_key}\t{user_distortion.get(min_key)}\n")
                if min_key == user1:
                    user_accuracy += 1
                print(min_key)
                print(user_distortion.get(min_key))
                if c<3:
                    cb_users_min_distortions.append(user_distortion.get(min_key))
                else:
                    out_cb_min_distortions.append(user_distortion.get(min_key))
                print("----------------")      
                #end for filename 
            
            with open('results.txt', 'a') as file:
                file.write(f'{user1}\t{user_accuracy*100/20}%\n')            
            c += 1
            parameter_accuracy += user_accuracy
            #end for usuario
        
        with open('results.txt', 'a') as file:
            file.write(f'Parameter accuracy \t{parameter_accuracy*100/40}%\n')
            file.write(f'Distortion thresholds \t CB max = {max(cb_users_min_distortions)}\tOut of CB min = {min(out_cb_min_distortions)}\n')
            file.write('---------------------------------------\n\n\n')
        
        


        # Plot
            feature1_idx = 1  
            feature2_idx = 2

            plt.figure(figsize=(12, 8)) 

            for user, sample_mfcc in mfccs_per_user.items():
                # Plot MFCC
                #plt.scatter(sample_mfcc[feature1_idx,:], sample_mfcc[feature2_idx,:], label=user)
                # Plot centroid
                cb_stored=np.array(stored_codebook[user])
                plt.scatter(cb_stored[:,feature1_idx], cb_stored[:,feature2_idx], label=f'Centroid of {user}', marker='x')
            
            plt.xlabel(f'Feature {feature1_idx+1}')
            plt.ylabel(f'Feature {feature2_idx+1}')
            plt.title(f'MFCC and Centroid')
            plt.legend()
            plt.grid(True)
            plt.savefig(f"mfcc_{num_mfcc}_cb_{cb_depth}.png")
            plt.close()
            plt.show()            

Reading audio file: voice_Cesar_1.wav
Reading audio file: voice_Cesar_2.wav
Reading audio file: voice_Cesar_3.wav
Reading audio file: voice_Cesar_4.wav
Reading audio file: voice_Cesar_5.wav
Reading audio file: voice_Facundo_1.wav
Reading audio file: voice_Facundo_2.wav
Reading audio file: voice_Facundo_3.wav
Reading audio file: voice_Facundo_4.wav
Reading audio file: voice_Facundo_5.wav
Reading audio file: voice_Katia_1.wav
Reading audio file: voice_Katia_2.wav
Reading audio file: voice_Katia_3.wav
Reading audio file: voice_Katia_4.wav
Reading audio file: voice_Katia_5.wav
Finished creating Codebook.
mfcc	20
codebook depth	4
----------------
----------------
Katia_0.wav
Loading Codebook ... 
User	Distortion
----------------
Katia
7139.4021956480165
----------------
Katia_1.wav
Loading Codebook ... 
User	Distortion
----------------
Facundo
10476.919398090839
----------------
Katia_2.wav
Loading Codebook ... 
User	Distortion
----------------
Katia
8294.276642354904
----------------
Katia

# Create codebooks

In [42]:
# user_folder='Users_samples'
# users_folder= os.path.join(os.getcwd(), user_folder)

# codebook_folder_rel='Codebooks'
# codebook_folder= os.path.join(os.getcwd(), codebook_folder_rel)

# folder_names = [name for name in os.listdir(users_folder) if os.path.isdir(os.path.join(users_folder, name))]


# audio_per_user = {}
# mfccs_per_user = {}
# codebook_per_user = {}

# for folder_name in folder_names:
#     folder_path = os.path.join(users_folder, folder_name)
#     # Check if folder exists
#     if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
#         print(f"Folder '{folder_name}' does not exist or is not a directory. Skipping.")
#         continue

#     # Read audio files in the folder
#     concatenated_audio = []
#     for filename in os.listdir(folder_path):
#         file_path = os.path.join(folder_path, filename)

#         # Check if file is an audio file
#         if filename.endswith('.wav') or filename.endswith('.mp3'):
#             print(f"Reading audio file: {filename}")
#             audio, sr = librosa.load(file_path, sr=fs)
#             normalized_audio = audio / np.max(np.abs(audio))
#             concatenated_audio.extend(audio)


#     audio_per_user[folder_name] = concatenated_audio
  
# for user in audio_per_user:
#     num_mfcc = num_mfcc  # Number of MFCC coefficients, global variable
#     frame_length = frame_lenght  # Frame length for STFT
#     hop_length = hop_lenght  # Hop length for STFT
#     audio_user=np.array(audio_per_user[user])
    
#     mfccs_per_user[user] = extract_mfcc(audio_user, fs, num_mfcc, frame_length, hop_length)
#     cb, cb_abs_w, cb_rel_w = lbg.generate_codebook(mfccs_per_user[user].T, cb_depth)
#     codebook_per_user[user] = cb
    
# with open(os.path.join(codebook_folder, 'Codebook'), 'w') as f:
#     json.dump(codebook_per_user, f)  
    
# print("Finished creating Codebook.")    

# Read the codebook and identify speaker

In [43]:
# sample_filenames = ['Alanny.wav', 'Facundo.wav', 'Cesar.wav', 'Katia.wav']
# sample_folder_rel='Sample'
# sample_folder= os.path.join(os.getcwd(), sample_folder_rel)
# print(f"mfcc\t{num_mfcc}")
# print(f"codebook depth\t{cb_depth}")
# # print("----------------")
# # print("----------------")
# # with open('results.txt', 'a') as file:
# #     file.write(f"mfcc\t{num_mfcc}\n")
# #     file.write(f"codebook depth\t{cb_depth}\n")
# #     file.write("----------------\n")
# #     file.write("----------------\n")
# for filename in sample_filenames:

#     sample_path= os.path.join(sample_folder, filename)
#     audio_file, sr = librosa.load(sample_path, sr=fs)
#     print(filename)
#     # with open('results.txt', 'a') as file:
#     #     file.write(f'{filename}\n')
#     # entry = input("Press a key to enter a voice sample for identification: ")
#     # audio_file, fs = record_audio(2, "", "")

#     audio = audio_file.reshape(-1)

#     num_mfcc = num_mfcc  # Number of MFCC coefficients
#     frame_length = frame_lenght  # Frame length for STFT
#     hop_length = hop_lenght  # Hop length for STFT

#     mfccs = extract_mfcc(audio, fs, num_mfcc, frame_length, hop_length)

#     # print("Loading Codebook ... ")

#     with open(os.path.join(codebook_folder, 'Codebook'), 'r') as f:
#             stored_codebook = json.load(f)

#     # calculate distortions

#     # print("User\tDistortion")
#     # print("----------------")
#     # with open('results.txt', 'a') as file:
#     #     file.write("User\tDistortion\n")
#     #     file.write("----------------\n")
#     user_distortion = {}
#     for user, cb in stored_codebook.items():
#         distortion = calculate_distortion(mfccs.T, cb)
#         # print(f"{user}\t{distortion}")
#         # with open('results.txt', 'a') as file:
#         #     file.write(f"{user}\t{distortion}\n")
#         if user not in user_distortion:
#             user_distortion[user] = distortion
#     min_key = min(user_distortion, key=user_distortion.get)
#     print(min_key)
#     print(user_distortion.get(min_key))
#     print("----------------")

#     # with open('results.txt', 'a') as file:
#     #     file.write("----------------\n")
#     #     file.write("----------------\n")
#     #     file.write("----------------\n")
# # with open('results.txt', 'a') as file:
# #     file.write('---------------------------------------\n')
# print("\n")
# print("\n")
# print("\n")


# Plot the features and centroids

In [44]:
# feature1_idx = 1  
# feature2_idx = 2

# plt.figure(figsize=(12, 8)) 

# for user, sample_mfcc in mfccs_per_user.items():
#     # Plot MFCC
#     #plt.scatter(sample_mfcc[feature1_idx,:], sample_mfcc[feature2_idx,:], label=user)
#     # Plot centroid
#     cb_stored=np.array(stored_codebook[user])
#     plt.scatter(cb_stored[:,feature1_idx], cb_stored[:,feature2_idx], label=f'Centroid of {user}', marker='x')
   
# plt.xlabel(f'Feature {feature1_idx+1}')
# plt.ylabel(f'Feature {feature2_idx+1}')
# plt.title(f'MFCC and Centroid')
# plt.legend()
# plt.grid(True)
# plt.savefig(f"mfcc_{num_mfcc}_cb_{cb_depth}.png")
# plt.close()
# plt.show()

In [45]:
# ## MODIFICATIONS FOR TESTING
# sample_folder_rel='Sample'
# sample_folder= os.path.join(os.getcwd(), sample_folder_rel)
# sample_filename_rel='sample2.wav'
# sample_path= os.path.join(sample_folder, sample_filename_rel)
# # Read audio
# audio_file, sr = librosa.load(sample_path, sr=fs)

# # ## MODIFICATIONS FOR TESTING
# # sample_folder_rel='Users_samples'
# # sample_folder= os.path.join(os.getcwd(), sample_folder_rel)
# # sample_filename_rel='Cesar'
# # sample_folder= os.path.join(sample_folder, sample_filename_rel)
# # sample_filename_rel='voice_Cesar_1.wav'
# # sample_path= os.path.join(sample_folder, sample_filename_rel)
# # # Read audio
# # audio_file, sr = librosa.load(sample_path, sr=fs)

In [46]:

# # entry = input("Press a key to enter a voice sample for identification: ")
# # audio_file, fs = record_audio(2, "", "")

# audio = audio_file.reshape(-1)

# num_mfcc = num_mfcc  # Number of MFCC coefficients
# frame_length = frame_lenght  # Frame length for STFT
# hop_length = hop_lenght  # Hop length for STFT

# mfccs = extract_mfcc(audio, fs, num_mfcc, frame_length, hop_length)

# print("Loading Codebook ... ")

# with open(os.path.join(codebook_folder, 'Codebook'), 'r') as f:
#         stored_codebook = json.load(f)

# # calculate distortions

# print("User\tDistortion")
# print("----------------")
# for user, cb in stored_codebook.items():
#     distortion = calculate_distortion(mfccs.T, cb)
#     print(f"{user}\t{distortion}")
    

