# Imports

In [77]:
# By Nguyen Ba Phi (S3VYH3) with the help of ChatGPT

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from sklearn.model_selection import train_test_split

import numpy as np
from scipy.spatial.distance import cdist
from sklearn.metrics import accuracy_score

from sklearn.mixture import GaussianMixture

from itertools import product


# Data Preparation 

In [78]:
# Data Preparation
# Path to your dataset
data_path = "/Users/killercookie/Documents/GitHub/School-Code/AML/PetersonBarney/verified_pb.data"

# Load the dataset
columns = ['M/F/C', 'SPKR', 'Phoneme_Number', 'Phoneme_Ascii', 'F0', 'F1', 'F2', 'F3']
df = pd.read_csv(data_path, delim_whitespace=True, names=columns)

# Separate features and labels (Phoneme_Ascii as the target label, rest as features)
X = df[['F0', 'F1', 'F2', 'F3']]  # Acoustic features
y = df['Phoneme_Ascii']  # Target: Phoneme

# Splitting into 80/10/10 train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Display data splits
print(f"Training set size: {X_train.shape}")
print(f"Validation set size: {X_val.shape}")
print(f"Test set size: {X_test.shape}")

Training set size: (1216, 4)
Validation set size: (152, 4)
Test set size: (152, 4)


# Model Selection

## Baseline Model

In [79]:
# Baseline Model
# Calculate the centroids of each phoneme in the training set
centroids = X_train.groupby(y_train).mean()

# Function to predict phoneme using centroid-based nearest neighbor
def predict_centroid(X, centroids):
    # Compute distances between each sample and each centroid
    distances = cdist(X, centroids, metric='euclidean')
    # Get the index of the closest centroid
    closest_centroids = np.argmin(distances, axis=1)
    # Map the indices back to the corresponding phonemes
    predicted_phonemes = centroids.index[closest_centroids]
    return predicted_phonemes

# Predict on validation set
y_val_pred = predict_centroid(X_val, centroids)

# Evaluate accuracy on validation set
accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 42.76%


## GMM Model

In [80]:
# GMM Model
# Dictionary to store GMM models for each phoneme
gmm_models = {}

# Train a GMM for each phoneme
for phoneme in y_train.unique():
    # Select the data corresponding to the current phoneme
    X_phoneme = X_train[y_train == phoneme]
    
    # Train GMM with 3 components (this can be tuned)
    gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=42)
    gmm.fit(X_phoneme)
    
    # Store the trained model
    gmm_models[phoneme] = gmm

# Function to predict phonemes using the trained GMM models
def predict_gmm(X, gmm_models):
    phonemes = list(gmm_models.keys())
    likelihoods = np.zeros((X.shape[0], len(phonemes)))
    
    # For each sample, compute the log-likelihood for each phoneme's GMM
    for idx, phoneme in enumerate(phonemes):
        likelihoods[:, idx] = gmm_models[phoneme].score_samples(X)
    
    # Assign the phoneme corresponding to the highest likelihood
    predicted_phonemes = np.array(phonemes)[np.argmax(likelihoods, axis=1)]
    return predicted_phonemes

# Predict on validation set using GMM
y_val_pred_gmm = predict_gmm(X_val, gmm_models)

# Evaluate GMM accuracy on validation set
accuracy_gmm = accuracy_score(y_val, y_val_pred_gmm)
print(f"GMM Validation Accuracy: {accuracy_gmm * 100:.2f}%")

GMM Validation Accuracy: 73.03%


### GMM Hyperparameters tuning

In [81]:
# GMM Hyperparameters tuning
# Define hyperparameters to tune
n_components_range = [1, 2, 3, 4, 5]
covariance_types = ['full', 'tied', 'diag', 'spherical']

# Store best model and accuracy
best_accuracy = 0
best_params = {}

# Loop through all combinations of n_components and covariance_type
for n_components, covariance_type in product(n_components_range, covariance_types):
    gmm_models = {}
    
    # Train a GMM for each phoneme with current parameters
    for phoneme in y_train.unique():
        X_phoneme = X_train[y_train == phoneme]
        
        # Adjust n_components if the number of samples is smaller
        effective_components = min(n_components, X_phoneme.shape[0])
        
        # Train GMM
        gmm = GaussianMixture(n_components=effective_components, covariance_type=covariance_type, random_state=42)
        gmm.fit(X_phoneme)
        gmm_models[phoneme] = gmm
    
    # Predict on validation set
    y_val_pred_gmm = predict_gmm(X_val, gmm_models)
    
    # Calculate accuracy
    accuracy_gmm = accuracy_score(y_val, y_val_pred_gmm)
    
    # Check if this is the best model so far
    if accuracy_gmm > best_accuracy:
        best_accuracy = accuracy_gmm
        best_params = {
            'n_components': n_components,
            'covariance_type': covariance_type,
        }

# Output the best parameters and accuracy
print(f"Best GMM Validation Accuracy: {best_accuracy * 100:.2f}%")
print(f"Best Parameters: {best_params}")

Best GMM Validation Accuracy: 73.03%
Best Parameters: {'n_components': 3, 'covariance_type': 'full'}


# Final Evaluation based on test data

In [82]:
# Best GMM model based on tuning
best_gmm_models = {}

for phoneme in y_train.unique():
    X_phoneme = X_train[y_train == phoneme]
    effective_components = min(best_params['n_components'], X_phoneme.shape[0])
    
    gmm = GaussianMixture(n_components=effective_components,
                          covariance_type=best_params['covariance_type'],
                          random_state=42)
    gmm.fit(X_phoneme)
    best_gmm_models[phoneme] = gmm

# Predict on the test set using the best GMM model
y_test_pred_gmm = predict_gmm(X_test, best_gmm_models)

# Calculate test accuracy for the GMM model
test_accuracy_gmm = accuracy_score(y_test, y_test_pred_gmm)
print(f"Best GMM Test Accuracy: {test_accuracy_gmm * 100:.2f}%")

Best GMM Test Accuracy: 75.00%
