In [None]:
import os
import pandas as pd
import numpy as np
%matplotlib inline
from matplotlib import pylab as plt

basefolder='data/onlineregression'
algo = 'stabu'
expid = 'tabu0'

In [None]:
# look at the set of problems
problems = []
for i in range(41,51):
    problems.append("ta{}".format(i))
    
print("will generate probability plots for \n",problems)

In [None]:
# collect all data in one data frame
colnames = ['algo','expid','problem','runid','seed','time','epoch','muopt','accuracy'] 
dataset = pd.DataFrame([],columns=colnames)

for problem in problems:
    directory = "{}/{}/{}/{}".format(basefolder,algo,expid,problem)
    for file in os.listdir(directory):        
        data = pd.read_csv("{}/{}".format(directory,file),header=None, 
                   names=colnames)
        dataset = pd.concat( [dataset,data] )
 
dataset.head()

In [None]:
ep = []
minmu = []
maxmu = []
avgmu = []

minacc = []
maxacc = []
avgacc = []

epochstart = 0
epochend = 201
step = 1

for epoch in range(epochstart,epochend,step):
    minforproblem = []
    maxforproblem = []
    avgforproblem = []
    
    minaccforproblem = []
    maxaccforproblem = []
    avgaccforproblem = []
    
    for problem in problems:
        mus = dataset[ (dataset.epoch==epoch) & (dataset.problem=="{}.txt".format(problem))]["muopt"]
        acc = dataset[ (dataset.epoch==epoch) & (dataset.problem=="{}.txt".format(problem))]["accuracy"]
        minforproblem.append(np.min(mus))
        maxforproblem.append(np.max(mus))
        avgforproblem.append(np.mean(mus))
        
        minaccforproblem.append(np.min(acc))
        maxaccforproblem.append(np.max(acc))
        avgaccforproblem.append(np.mean(acc))
        
    ep.append(epoch)    
    minmu.append( np.mean(minforproblem))
    maxmu.append( np.mean(maxforproblem))
    avgmu.append( np.mean(avgforproblem))
    
    minacc.append(np.mean(minaccforproblem))
    maxacc.append(np.mean(maxaccforproblem))
    avgacc.append(np.mean(avgaccforproblem))

In [None]:
fig, axs = plt.subplots(2, 1, sharex=True)
fig.subplots_adjust(hspace=0)

axs[0].plot(ep,avgacc, ls='dashed' ,color="red",  label="average accuracy")
axs[0].fill_between(ep, np.round(minacc,2), np.round(maxacc,2),alpha=0.3)
axs[0].set_ylim(0.7, 1.1)
axs[0].set_yticks(np.arange(0.75, 1.00, 0.05))
axs[0].legend()
axs[0].text(15,1.0, "Taillard's Instances ta41-ta50")
axs[0].grid()
axs[1].plot(ep,avgmu, ls='dashed' ,color="blue", label=r'average optimal parameter $\theta$')
axs[1].fill_between(ep, minmu, maxmu,alpha=0.3)
axs[1].set_ylim(0.0, 0.6)
axs[1].set_yticks(np.arange(0.00, 0.60, 0.1))
axs[1].legend()

#axs[0].set_ylim(-1, 1)
axs[1].grid()
plt.xlabel("epoch")
plt.savefig('figs/accta41-ta50-{}-{}.png'.format(algo,expid), dpi=300)