In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn')

In [None]:
tfidf_mean_perf = pd.read_csv('../results/Finding optimal epochs/mean_performance/mean_performance-tfidf_kmeans.csv')
doc2vec_mean_perf = pd.read_csv('../results/Finding optimal epochs/mean_performance/mean_performance-doc2vec_kmeans.csv')
sbert_mean_perf = pd.read_csv('../results/Finding optimal epochs/mean_performance/mean_performance-sbert_kmeans.csv')

In [None]:
tfidf_mean_perf['feature-ext'] = 'TF-IDF'
doc2vec_mean_perf['feature-ext'] = 'Doc2Vec'
sbert_mean_perf['feature-ext'] = 'Sentence BERT'

In [None]:
mean_perf = pd.concat([tfidf_mean_perf, doc2vec_mean_perf, sbert_mean_perf], ignore_index=True)

In [None]:
plt.figure(figsize=(15,8))
fig = sns.lineplot(data=mean_perf, x='epoch', y='AMI', hue='feature-ext', ci=0.90)
plt.legend(bbox_to_anchor=(0.9, 0.20), loc='center', title='Embedding techniques', borderaxespad=0)
plt.title('AMI by epoch')
plt.savefig('../results/Finding optimal epochs/plots/AMI by epoch.pdf', dpi=300, bbox_inches='tight')

In [None]:
plt.figure(figsize=(15,8))
fig = sns.lineplot(data=mean_perf, x='epoch', y='ARI', hue='feature-ext', ci=0.90)
plt.legend(bbox_to_anchor=(0.9, 0.20), loc='center', title='Embedding techniques', borderaxespad=0)
plt.title('ARI by epoch')
plt.savefig('../results/Finding optimal epochs/plots/ARI by epoch.pdf', dpi=300, bbox_inches='tight')

In [None]:
plt.figure(figsize=(15,8))
fig = sns.lineplot(data=mean_perf, x='epoch', y='NMI', hue='feature-ext', ci=0.90)
plt.legend(bbox_to_anchor=(0.9, 0.20), loc='center', title='Embedding techniques', borderaxespad=0)
plt.title('NMI by epoch')
plt.savefig('../results/Finding optimal epochs/plots/NMI by epoch.pdf', dpi=300, bbox_inches='tight')

In [None]:
# Maximum values for each of the embeddings
print('TF-IDF')
max_index = tfidf_mean_perf[['AMI','ARI','NMI']].idxmax() 
print(tfidf_mean_perf.loc[max_index])

print('Doc2Vec')
max_index = doc2vec_mean_perf[['AMI','ARI','NMI']].idxmax() 
print(doc2vec_mean_perf.loc[max_index])

print('Sentence BERT')
max_index = sbert_mean_perf[['AMI','ARI','NMI']].idxmax() 
print(sbert_mean_perf.loc[max_index])