In [122]:
# This notebook is written in Kaggle

import numpy as np
import pandas as pd
import librosa
import os

In [133]:
INPUT_PATH = "/kaggle/input/turkish-academy-voice-challenge-2023"
MFCC_COEF_COUNT = 50  # Size of MFCC vectors

def extract_mfccs(file_full_path):  # Extracts the MFCC vectors for given audio, and returns the mean of those MFCC vectors.
    audio_data, sampling_rate = librosa.load(file_full_path)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sampling_rate, n_mfcc=MFCC_COEF_COUNT)
    mfccs_mean = np.mean(mfccs.T,axis=0)
    return mfccs_mean

folder_names = ["0_AZIZSANCAR", "1_BIYKEMBOZKURT", "2_CAHITARF", "3_CANANDAGDEVIREN", "4_KORAYKAVUKCUOGLU"]  # Names of the folders for training data

train_df = pd.DataFrame(columns=range(MFCC_COEF_COUNT + 1))  # Number of columns is MFCC vector size + 1 since all elements in the vector is a feature, and also the label is included in the data frame.
for folder_name in folder_names:
    if folder_name == "4_KORAYKAVUKCUOGLU":
        folder_full_path = os.path.join(INPUT_PATH, folder_name, "7_KORAYKAVUKCUOGLU")  # Fix index mismatch
    else:
        folder_full_path = os.path.join(INPUT_PATH, folder_name, folder_name)
    for file_name in os.listdir(folder_full_path):
        file_full_path = os.path.join(folder_full_path, file_name)
        if not os.path.isfile(file_full_path):
            continue
            
        mfccs = extract_mfccs(file_full_path)  # Extract the MFCC vector for the audio.
        train_df.loc[len(train_df.index)] = [*mfccs, folder_name]   # Add vector and label to data frame.

In [134]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report 
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier

train_X = train_df.iloc[:, :-1]  # Get the X training part from the data frame.
train_y = LabelEncoder().fit_transform(train_df.iloc[:, -1])  # Get the y training part from the data frame.

mlp = MLPClassifier(max_iter=200, random_state=42)  # Set up the multi layer perceptron classifier (I think this is the best option to go for this case).
mlp.fit(train_X, train_y)  # Train the classifier.

In [135]:
SUBMISSION_CSV_PATH = "/kaggle/working/submission.csv"
TEST_PATH = "/kaggle/input/turkish-academy-voice-challenge-2023/Test"

with open(SUBMISSION_CSV_PATH, "w") as output_csv:
    output_csv.write("FileName,Class\n")  # Write the head of the file.
    for file_name in os.listdir(TEST_PATH):  # For each asked audio files
        mfccs = extract_mfccs(os.path.join(TEST_PATH, file_name))  # Extract the MFCC vector for current audio.
        prediction = mlp.predict([mfccs])[0]  # Predict the speaker using the vector (Inference).
        output_csv.write(f"{file_name},{prediction}\n")  # Append the prediction to the CSV.