In [None]:
! python -V
import os
import numpy as np
import pandas as pd
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import seaborn as sns
from experiment import Experiment

In [None]:
experiment_words = ['sit', 'plant', 'wonder', 'relation']
exp = Experiment(
    mapping='mapping_english.txt',
    dictionary='cmudict-0.7b-with-vitz-nonce',
    encoding='latin1',
    words=experiment_words)
dataset = exp.get_dataset()

In [None]:
def similarity_scores(methods):
    columns = {}
    for method in methods:
        scores = []
        for word in experiment_words:
            obtained = dataset[dataset['actual'] == word]['obtained'].to_numpy()
            vw_predicted = dataset[dataset['actual'] == word][method].to_numpy()
            scores.append(np.corrcoef(obtained, vw_predicted)[0, 1])
        columns[method] = scores
    return pd.DataFrame(columns, index=experiment_words)

def draw_plot(methods):
    scores = similarity_scores(methods)
    # colors = list(mcolors.TABLEAU_COLORS)[::-1]
    colors = ['orange', 'lightskyblue', 'darkseagreen', 'palevioletred', 'silver']
    fig, ax = plt.subplots(facecolor='w')
    scores.plot.bar(ax=ax, width=0.8, legend=False, figsize=(12,4), color=colors, fontsize=18)
    ax.patch.set_facecolor('w')
    ax.set_ylabel('Pearson\ncorrelation\ncoefficient\n', fontsize=18)
    ax.set_xlim(-0.5, len(scores)-.5)
    ax.set_ylim(np.around(scores.min(numeric_only=True).to_numpy().min()-0.05, decimals=1), 1)
    ax.axes.get_xaxis().set_visible(False)
    # ax.legend(loc='center right', bbox_to_anchor=(1.35, 0.5), shadow=True, ncol=1)
    table = pd.plotting.table(ax, np.round(scores.T, 5), loc='bottom', cellLoc='center', rowColours=colors)
    # table.update({'text.color' : "blue", 'axes.labelcolor' : "blue"})
    # print(dir(table.rcParams))
    table.set_fontsize(18)
    table.scale(1, 2)

draw_plot(['unigram', 'bigram', 'bigram p=2.5'])
draw_plot(['vw_predicted', 'bigram p=2.5', 'bigram p=2.5 VW'])

In [None]:
penalties = exp.penalty_analysis(experiment_words, 1, 5, 33, bigram=True, vowel=False)
best_penalty = penalties['avg'].idxmax()
print(best_penalty)
penalties = penalties.drop(columns=['avg'])
fig, ax = plt.subplots(facecolor='w')
penalties.plot.line(ax=ax, figsize=(10,4), fontsize=16)
ax.set_xlabel('Penalty', fontsize=16)
ax.set_ylabel('Pearson\ncorrelation\ncoefficient\n', fontsize=16)
ax.legend(loc='center right', bbox_to_anchor=(1.25, 0.5), shadow=True, ncol=1, fontsize=16)
ax.axvline(best_penalty, color='k', linestyle='--')
plt.text(best_penalty, 0.5, ' max of average', rotation=0, fontsize=12)