IN THIS NOTEBOOK, CORRELATION ANALYSIS BETWEEN EACH OF THE TARGET SPEECH FEATURES (MFCC, X-VECTOR AND EMBEDDING), AND SILHOUETTE SCORES, ARE CONDUCTED

STEP 1: load and preprocess data

In [None]:
#load the processed data
import pandas as pd

loaded_features_df = pd.read_pickle('FILE_PATH_THAT_STORES_THE_DATA')

STEP 2:prepare data on both speaker level and sample level for further analysis

STEP 2.1: prepare MFCCs and MFCC-based silhouette scores

In [None]:
#get the speaker level silhouette score based on mfcc
#prepare mfccs

import numpy as np
mfccs = [mfcc for row in loaded_features_df['mean_mfcc_segments'] for mfcc in row]

mfccs = np.array(mfccs)

In [None]:
#prepare labels to assist computation
label_df = pd.read_csv("FILE_STORE_SELECTED_SEGMENTS.csv")

label_df['full_label'] = label_df['file_name'] + '_' + label_df['speaker']
label_per_sample = label_df['full_label'].values

In [None]:
#calculate silhouette score based on mfcc

from sklearn.metrics import silhouette_samples

silhouette_speaker_mfcc = []

sil_per_sample_m = silhouette_samples(mfccs,label_per_sample)

for speaker_label in np.unique(label_per_sample):
  sil_per_speaker = sil_per_sample_m[label_per_sample == speaker_label]
  sil_speaker = np.mean(sil_per_speaker)
  silhouette_speaker_mfcc.append(sil_speaker)

STEP 2.2: prepare xvectors and xvector-based silhouette scores

In [None]:
#define the samplewise data for later analysis

mfcc_vec_per_sam = mfccs
mfcc_val_per_sam = np.mean(mfccs,axis=1)

sil_mfcc_per_sam = sil_per_sample_m

In [None]:
#get the speaker level xvector silhouette score
#prepare xvectors
xvectors = [xvector for row in loaded_features_df['xvectors'] for xvector in row]

xvectors = np.array(xvectors)
xvectors=np.squeeze(xvectors, axis=1)


In [None]:
#calculate silhouette score based on x-vectors

silhouette_speaker_xvector = []
sil_per_sample_x = silhouette_samples(xvectors,label_per_sample)

for speaker_label in np.unique(label_per_sample):
  sil_per_speaker = sil_per_sample_x[label_per_sample == speaker_label]
  sil_speaker = np.mean(sil_per_speaker)
  silhouette_speaker_xvector.append(sil_speaker)


In [None]:
#define the samplewise data for later analysis

xvector_per_sam = np.mean(xvectors,axis=1)
sil_xvector_per_sam = sil_per_sample_x

STEP 2.3: prepare embeddings and embedding-based silhouette scores

In [None]:
#get the speaker level silhouette score based on embedding

def calculate_mean_embedding(embeddings):
    mean_embedding_seg = []
    for embedding_per_segment in embeddings:
        mean_embedding = np.mean(embedding_per_segment, axis=0)
        mean_embedding_seg.append(mean_embedding)
    return mean_embedding_seg

def calculate_mean_speaker_emb(embeddings):
    mean_speaker_emb_vector = np.mean(embeddings,axis=0)
    mean_speaker_emb_value = np.mean(mean_speaker_emb_vector)
    return mean_speaker_emb_value

loaded_features_df['mean_embedding_segments'] = loaded_features_df['embeddings'].apply(calculate_mean_embedding)
loaded_features_df['mean_speaker_emb_value'] = loaded_features_df['mean_embedding_segments'].apply(calculate_mean_speaker_emb)

add_mean_embedding_df = pd.DataFrame(loaded_features_df)

embeddings = [embedding for row in loaded_features_df['mean_embedding_segments'] for embedding in row]
speaker_embeddings = [embedding for embedding in loaded_features_df['mean_speaker_emb_value']]
embeddings = np.array(embeddings)


In [None]:
#calculate the embedding silhauette score
silhouette_speaker_emb = []

sil_per_sample_e = silhouette_samples(embeddings,label_per_sample)

for speaker_label in np.unique(label_per_sample):
  sil_per_speaker = sil_per_sample_e[label_per_sample == speaker_label]
  sil_speaker = np.mean(sil_per_speaker)
  silhouette_speaker_emb.append(sil_speaker)


In [None]:
#define the samplewise data for later analysis based on each sample

embedding_per_sam = np.mean(embeddings,axis=1)
sil_embedding_per_samp = sil_per_sample_e

STEP 3: speaker level correlation analysis

STEP 3.1: conduct speaker level correlation analysis between MFCC coefficients and MFCC-based silhouette scores

In [None]:
#prepare mfccs for speaker level correlatoin analysis

#coefficientwise
speaker_mfcc_vector = [data for data in loaded_features_df['speaker_mfcc_vector']]

#use mean mfcc single value
speaker_mfcc = [data for data in loaded_features_df['speaker_mfcc_value']]


In [None]:
#plot the mfccs and the mfcc based silhouette scores
#13 plots for 13 coefficients
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr

speaker_mfcc_vector = np.array(speaker_mfcc_vector)
silhouette_speaker_mfcc = np.array(silhouette_speaker_mfcc)

for coeff_idx in range(13):
    plt.figure(figsize=(8, 6))

    plt.scatter(silhouette_speaker_mfcc, speaker_mfcc_vector[:, coeff_idx], color='b', s=50)

    plt.title(f"MFCC Coefficient {coeff_idx + 1} vs Silhouette Score (speaker-based)", fontsize=16)
    plt.xlabel("Silhouette Score", fontsize=12)
    plt.ylabel(f"MFCC Coefficient {coeff_idx + 1}", fontsize=12)
    plt.grid(True)

    plt.tight_layout()
    plt.show()

    #calculate the linear correlation level (pearson correlation)
    pearson_corr, pearson_p_value = pearsonr(speaker_mfcc_vector[:, coeff_idx], silhouette_speaker_mfcc)
    print(f'Linear correlation: {pearson_corr}')

    #calculate the none-linear correlation level (spearman’s rank correlation)
    spearman_corr, spearman_p_value = spearmanr(speaker_mfcc_vector[:, coeff_idx], silhouette_speaker_mfcc)
    print(f"Non-linear correlation: {spearman_corr}")

STEP 3.2: conduct speaker level correlation analysis between mean MFCC value and MFCC-based silhouette scores

In [None]:
#plot the mfccs and the mfcc based silhouette scores
#one plot, based on one mean mfcc value per speaker
speaker_mfcc = np.array(speaker_mfcc)
silhouette_speaker_mfcc = np.array(silhouette_speaker_mfcc)

plt.figure(figsize=(8, 6))
plt.scatter(silhouette_speaker_mfcc, speaker_mfcc, color='b', s=50)

plt.title(f"MFCC vs Silhouette Score (speaker-based)", fontsize=16)
plt.xlabel("Silhouette Score", fontsize=12)
plt.ylabel(f"MFCC", fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

#calculate the linear correlation level (pearson correlation)
pearson_corr, pearson_p_value = pearsonr(speaker_mfcc, silhouette_speaker_mfcc)
print(f'Linear correlation: {pearson_corr}')

#calculate the none-linear correlation level (spearman’s rank correlation)
spearman_corr, spearman_p_value = spearmanr(speaker_mfcc, silhouette_speaker_mfcc)
print(f"Non-linear correlation: {spearman_corr}")

STEP 3.3: conduct speaker level correlation analysis between mean xvector value and xvector-based silhouette scores

In [None]:
#prepare the xvector for correlation analysis
speaker_xvector = [data for data in loaded_features_df['speaker_mean_xvector_value']]

In [None]:
#plot xvectors and the xvector based silhouette scores
#one plot, based on one mean xvector value per speaker
speaker_xvector = np.array(speaker_xvector)
silhouette_speaker_xvector = np.array(silhouette_speaker_xvector)

plt.figure(figsize=(8, 6))
plt.scatter(silhouette_speaker_xvector, speaker_xvector, color='b', s=50)

plt.title(f"X-vector vs Silhouette Score (speaker-based)", fontsize=16)
plt.xlabel("Silhouette Score", fontsize=12)
plt.ylabel(f"X-vector", fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

#calculate the linear correlation level (pearson correlation)
speaker_xvector = np.squeeze(speaker_xvector)
pearson_corr, pearson_p_value = pearsonr(speaker_xvector, silhouette_speaker_xvector)
print(f'Linear correlation: {pearson_corr}')

#calculate the none-linear correlation level (spearman’s rank correlation)
spearman_corr, spearman_p_value = spearmanr(speaker_xvector, silhouette_speaker_xvector)
print(f"Non-linear correlation: {spearman_corr}")

STEP 3.4: conduct speaker level correlation analysis between mean embedding values and embedding-based silhouette scores

In [23]:
#prepare the embedding for correlation analysis
speaker_embeddings = speaker_embeddings
silhouette_speaker_embedding = silhouette_speaker_emb

In [None]:
#plot embedding and embedding based silhouette scores
speaker_embeddings = np.array(speaker_embeddings)
silhouette_speaker_embedding = np.array(silhouette_speaker_embedding)
#print(silhouette_speaker_embedding.shape)
#print(speaker_embeddings.shape)

plt.figure(figsize=(8, 6))
plt.scatter(silhouette_speaker_embedding, speaker_embeddings, color='b', s=50)

plt.title(f"Embeddings vs Silhouette Score (speaker-based)", fontsize=16)
plt.xlabel("Silhouette Score", fontsize=12)
plt.ylabel(f"Embeddings", fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

#calculate the linear correlation level (pearson correlation)
speaker_xvector = np.squeeze(speaker_xvector)
pearson_corr, pearson_p_value = pearsonr(speaker_embeddings, silhouette_speaker_embedding)
print(f'Linear correlation: {pearson_corr}')

#calculate the none-linear correlation level (spearman’s rank correlation)
spearman_corr, spearman_p_value = spearmanr(speaker_embeddings, silhouette_speaker_embedding)
print(f"Non-linear correlation: {spearman_corr}")

STEP 4: sample level correlation analysis

STEP 4.1: conduct sample level correlation analysis between MFCC coefficients and MFCC-based silhouette scores

In [None]:
#plot mfccs and the mfcc based silhouette scores based on per sample
#13 plots for 13 coefficients

mfcc_vec_per_sam = np.array(mfcc_vec_per_sam)
sil_mfcc_per_sam = np.array(sil_mfcc_per_sam)

for coeff_idx in range(13):
    plt.figure(figsize=(8, 6))

    plt.scatter(sil_mfcc_per_sam, mfcc_vec_per_sam[:, coeff_idx], color='b', s=50)

    plt.title(f"MFCC Coefficient {coeff_idx + 1} vs Silhouette Score (sample-based)", fontsize=16)
    plt.xlabel("Silhouette Score", fontsize=12)
    plt.ylabel(f"MFCC Coefficient {coeff_idx + 1}", fontsize=12)
    plt.grid(True)

    plt.tight_layout()
    plt.show()

    #calculate the linear correlation level (pearson correlation)
    pearson_corr, pearson_p_value = pearsonr(mfcc_vec_per_sam[:, coeff_idx], sil_mfcc_per_sam)
    print(f'Linear correlation: {pearson_corr}')

    #calculate the none-linear correlation level (spearman’s rank correlation)
    spearman_corr, spearman_p_value = spearmanr(mfcc_vec_per_sam[:, coeff_idx], sil_mfcc_per_sam)
    print(f"Non-linear correlation: {spearman_corr}")


STEP 4.2: conduct sample level correlation analysis between mean MFCC values and MFCC-based silhouette scores

In [None]:
#plot mfccs and mfcc based silhouette scores based on per sample
#one plot, one mean mfcc value per sample
mfcc_val_per_sam = np.array(mfcc_val_per_sam)
sil_mfcc_per_sam = np.array(sil_mfcc_per_sam)

plt.figure(figsize=(8, 6))
plt.scatter(sil_mfcc_per_sam, mfcc_val_per_sam, color='b', s=50)

plt.title(f"MFCC vs Silhouette Score (sample-based)", fontsize=16)
plt.xlabel("Silhouette Score", fontsize=12)
plt.ylabel(f"MFCC", fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

#calculate the linear correlation level (pearson correlation)
pearson_corr, pearson_p_value = pearsonr(mfcc_val_per_sam, sil_mfcc_per_sam)
print(f'Linear correlation: {pearson_corr}')

#calculate the none-linear correlation level (spearman’s rank correlation)
spearman_corr, spearman_p_value = spearmanr(mfcc_val_per_sam, sil_mfcc_per_sam)
print(f"Non-linear correlation: {spearman_corr}")

STEP 4.3: conduct sample level correlation analysis between mean xvector values and xvector-based silhouette scores

In [None]:
#plot xvectors and xvector based silhouette scores based on per sample

xvector_per_sam = np.array(xvector_per_sam)
sil_xvector_per_sam = np.array(sil_xvector_per_sam)

plt.figure(figsize=(8, 6))
plt.scatter(sil_xvector_per_sam, xvector_per_sam, color='b', s=50)

plt.title(f"X-vector vs Silhouette Score (sample-based)", fontsize=16)
plt.xlabel("Silhouette Score", fontsize=12)
plt.ylabel(f"X-vector", fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

#calculate the linear correlation level (pearson correlation)
speaker_xvector = np.squeeze(speaker_xvector)
pearson_corr, pearson_p_value = pearsonr(xvector_per_sam, sil_xvector_per_sam)
print(f'Linear correlation: {pearson_corr}')

#calculate the none-linear correlation level (spearman’s rank correlation)
spearman_corr, spearman_p_value = spearmanr(xvector_per_sam, sil_xvector_per_sam)
print(f"Non-linear correlation: {spearman_corr}")

STEP 4.4: conduct sample level correlation analysis between mean embedding values and embedding-based silhouette scores

In [None]:
#plot embedding and embedding based silhouette scores based on per sample

embedding_per_sam = np.array(embedding_per_sam)
sil_embedding_per_samp = np.array(sil_embedding_per_samp)

plt.figure(figsize=(8, 6))
plt.scatter(sil_embedding_per_samp, embedding_per_sam, color='b', s=50)

plt.title(f"Embeddings vs Silhouette Score (sample-based)", fontsize=16)
plt.xlabel("Silhouette Score", fontsize=12)
plt.ylabel(f"Embeddings", fontsize=12)
plt.grid(True)

plt.tight_layout()
plt.show()

#calculate the linear correlation level (pearson correlation)
speaker_xvector = np.squeeze(speaker_xvector)
pearson_corr, pearson_p_value = pearsonr(embedding_per_sam, sil_embedding_per_samp)
print(f'Linear correlation: {pearson_corr}')

#calculate the none-linear correlation level (spearman’s rank correlation)
spearman_corr, spearman_p_value = spearmanr(embedding_per_sam, sil_embedding_per_samp)
print(f"Non-linear correlation: {spearman_corr}")