In [20]:
import os
import shutil
import numpy as np
import pandas as pd
import statsmodels.stats.api as sms

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [21]:
n_topics = 2
min_tau = 1
max_tau = 800

n_iters = 60
iters = [i + 1 for i in range(n_iters)]

In [4]:
def read_results(res_name):
    file_name = '{}_topics{}_tau{}_{}.txt'.format(
        res_name, n_topics, min_tau, max_tau)
    res = {}
    with open(file_name, 'r') as fin:
        for i, line in enumerate(fin.readlines()):
            res[i] = np.array(line.split(), dtype=float)

    return res

In [None]:
figure(figsize=(6, 5))

auc_folds_iter = read_results('auc')

mean_auc = [np.mean(value) for key, value in auc_folds_iter.items()]

left_edge = []
right_edge = []
for auc in auc_folds_iter:
    left, right = sms.DescrStatsW(auc).tconfint_mean()
    left_edge.append(left)
    right_edge.append(right)


fill_between(iters, left_edge, right_edge, color='violet')
plot(iters, mean_auc, color='magenta', lw=3)

xlabel('iteration', fontsize=18)
xlim([1, n_iters])
xticks(list(xticks()[0][1:]) + [1])

ylabel('AUC', fontsize=18)
title('AUC_CV', fontsize=18)
#text(35, 0.7, r'$|T|=2$', fontsize=18)
#text(35, 0.6, r'$\tau \in range(1, 800)$', fontsize=18)

#savefig(
#    os.path.join(
#        new_dir, 'auc_topics{}_tau{}_{}.eps'.format(
#            n_topics, int(
#                np.min(tau)), int(
#                    np.max(tau)))), format='eps')

show()

In [None]:
figure(figsize=(6, 5))

logloss_folds_iter = read_results('logloss')

mean_logloss = [np.mean(value) for key, value in logloss_folds_iter.items()]

left_edge = []
right_edge = []
for logloss in logloss_folds_iter:
    left, right = sms.DescrStatsW(auc).tconfint_mean()
    left_edge.append(left)
    right_edge.append(right)

fill_between(iters, left_edge, right_edge, color='violet')
plot(iters, mean_logloss, color='magenta', lw=3)

xlabel('iteration', fontsize=18)
xlim([1, n_iters])
xticks(list(xticks()[0][1:]) + [1])

ylabel('LogLoss', fontsize=18)
title('LogLoss_CV', fontsize=18)
#text(35, 0.65, r'$|T|=2$', fontsize=18)
#text(35, 0.6, r'$\tau \in range(1, 800)$', fontsize=18)
#savefig(
#    'logloss_topics{}_tau{}_{}.eps'.format(
#        n_topics, int(
#            np.min(tau)), int(
#               np.max(tau))), format='eps')
show()

In [None]:
figure(figsize=(6, 5))

perplexity_c = read_results('perplexity_c')

mean_perplexity_c = [np.mean(value) for key, value in perplexity_c.items()]


plot(iters, mean_perplexity_c, color='magenta', lw=3)

xlabel('iteration', fontsize=18)
xlim([1, n_iters])
xticks(list(xticks()[0][1:]) + [1])
ylabel('Perplexity', fontsize=18)
title('Perplexity_CV' + r'$p(c|t)$', fontsize=18)

#text(35, 1.7, r'$|T|=2$', fontsize=18)
#text(35, 1.6, r'$\tau \in range(1, 800)$', fontsize=18)

#savefig(
#    os.path.join(
#        new_dir, 'perplexity_c_topics{}_tau{}_{}.eps'.format(
#            n_topics, int(
#                np.min(tau)), int(
#                    np.max(tau)))), format='eps')
show()

In [None]:
figure(figsize=(6, 5))

perplexity_gram3 = read_results('perplexity_gram3')

mean_perplexity_gram3 = [np.mean(value) for key, value in perplexity_gram3.items()]


plot(iters, mean_perplexity_gram3, color='magenta', lw=3)

xlabel('iteration', fontsize=18)
xlim([1, n_iters])
xticks(list(xticks()[0][1:]) + [1])
ylabel('Perplexity', fontsize=18)
title('Perplexity_CV' + r'$p(gram3|t)$', fontsize=18)

#text(35, 1.7, r'$|T|=2$', fontsize=18)
#text(35, 1.6, r'$\tau \in range(1, 800)$', fontsize=18)

#savefig(
#    os.path.join(
#        new_dir, 'perplexity_c_topics{}_tau{}_{}.eps'.format(
#            n_topics, int(
#                np.min(tau)), int(
#                    np.max(tau)))), format='eps')
show()

In [None]:
figure(figsize=(6, 5))

sparsity_phi_c = read_results('sparsity_phi_c')
sparsity_phi_gram3 = read_results('sparsity_phi_gram3')
sparsity_theta = read_results('sparsity_theta')

mean_sparsity_phi_c = [np.mean(value) for key, value in sparsity_phi_c.items()]
mean_sparsity_phi_gram3 = [np.mean(value)
                           for key, value in sparsity_phi_gram3.items()]
mean_sparsity_theta = [np.mean(value) for key, value in sparsity_theta.items()]

plot(iters, mean_sparsity_phi_c, color='magenta', lw=3)
plot(iters, mean_sparsity_phi_gram3, color='purple', lw=3)
plot(iters, mean_sparsity_theta, color='green', lw=3)

xlabel('iteration', fontsize=18)
xlim([1, n_iters])
xticks(list(xticks()[0][1:]) + [1])
ylabel('Sparsity', fontsize=18)
title('Sparsity_CV', fontsize=18)
legend([r'$p(c|t)$', r'$p(gram3|t)$', r'$p(t|d)$'], loc = 'upper left', fontsize = 15)
#text(10, 0.3, r'$|T|=2$', fontsize=18)
#text(10, 0.25, r'$\tau \in range(1, 800)$', fontsize=18)

#savefig(
#    os.path.join(
#        new_dir, 'sparsity_topics{}_tau{}_{}.eps'.format(
#            n_topics, int(
#                np.min(tau)), int(
#                    np.max(tau)))), format='eps')
show()

In [None]:
import seaborn as sns
figure(figsize=(6, 5))

file_src = 'ptc_topics{}_tau{}_{}.csv'.format(n_topics, min_tau, max_tau)
ptc = pd.read_csv(file_src, index_col=0, sep=',')

sns.heatmap(ptc.as_matrix())
text(0.2, 2.1, '$topic0$', fontsize=18)
text(1.29, 2.1, '$topic1$', fontsize=18)

#savefig(os.path.join(new_dir, '{}.eps'.format(file_src)), format = 'eps')

show()