In [1]:
import inspect
import logging
import os

import pandas as pd

from csrank.util import setup_logger
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector

Using TensorFlow backend.


Current learners 
 
feta_ranker => <class 'csrank.objectranking.feta_object_ranker.FETAObjectRanker'>
ranknet => <class 'csrank.objectranking.rank_net.RankNet'>
cmpnet => <class 'csrank.objectranking.cmp_net.CmpNet'>
err => <class 'csrank.objectranking.expected_rank_regression.ExpectedRankRegression'>
ranksvm => <class 'csrank.objectranking.rank_svm.RankSVM'>
fate_ranker => <class 'csrank.objectranking.fate_object_ranker.FATEObjectRanker'>
listnet => <class 'csrank.objectranking.list_net.ListNet'>
feta_choice => <class 'csrank.choicefunctions.feta_choice.FETAChoiceFunction'>
fate_choice => <class 'csrank.choicefunctions.fate_choice.FATEChoiceFunction'>
feta_dc => <class 'csrank.discretechoice.feta_discrete_choice.FETADiscreteChoiceFunction'>
fate_dc => <class 'csrank.discretechoice.fate_discrete_choice.FATEDiscreteChoiceFunction'>
ranknet_dc => <class 'csrank.discretechoice.ranknet_discrete_choice.RankNetDiscreteChoiceFunction'>
cmpnet_dc => <class 'csrank.discretechoice.cmpnet_discret

In [2]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logger(log_path=log_path)
logger = logging.getLogger('Result parsing')
config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
DATASET = "mnist_dc"
learning_problem = "discrete_choice"
results_table = 'results.{}'.format(learning_problem)
schema = 'masterthesis'

In [3]:
self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)

In [4]:
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x for x in keys])
metrics

'CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6'

In [5]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format(self.schema)
select_st = "SELECT dataset_params, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'".format(
    results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)

SELECT dataset_params, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN masterthesis.avail_jobs ON results.discrete_choice.job_id = masterthesis.avail_jobs.job_id where masterthesis.avail_jobs.dataset='mnist_dc'


In [6]:
data = []
for job in self.cursor_db.fetchall():
    values = list(job.values())
    keys = list(job.keys())
    columns = ['dataset'] + keys[1:]
    vals = [job['dataset_params']['dataset_type'].upper()] + values[1:]
    data.append(vals)

In [7]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format("pymc3")
select_st = "SELECT dataset_params, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'".format(
    results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
for job in self.cursor_db.fetchall():
    values = list(job.values())
    keys = list(job.keys())
    columns = ['dataset'] + keys[1:]
    vals = [job['dataset_params']['dataset_type'].upper()] + values[1:]
    data.append(vals)

SELECT dataset_params, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN pymc3.avail_jobs ON results.discrete_choice.job_id = pymc3.avail_jobs.job_id where pymc3.avail_jobs.dataset='mnist_dc'


In [8]:
df_full = pd.DataFrame(data, columns=columns)
#df_full['zeroonerankaccuracy'] = 1 - df_full['zeroonerankloss']
df_full.head()

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,MEDIAN,fate_dc,0.9506,0.9849,0.9932,0.9963,0.9978,0.9988
1,UNIQUE,ranksvm_dc,0.1098,0.2103,0.3074,0.4037,0.4982,0.5916
2,UNIQUE,fate_dc,0.9593,0.9854,0.9937,0.9963,0.9976,0.9986
3,UNIQUE,fate_dc,0.9387,0.9783,0.991,0.9954,0.9979,0.999
4,UNIQUE,fate_dc,0.9621,0.9843,0.9931,0.9955,0.997,0.998


In [9]:
grouped = df_full.groupby(['dataset', 'learner'])
data = []
for name, group in grouped:
    one_row = [name[0], str(name[1]).upper()]
    std = group.std(axis=0).values
    mean = group.mean(axis=0).values
    if np.all(np.isnan(std)):
        one_row.extend(["{:.4f}".format(m) for m in mean])
    else:
        one_row.extend(["{:.3f}+-{:.3f}".format(m, s) for m, s in zip(mean, std)])
    data.append(one_row)

In [10]:
df = pd.DataFrame(data, columns=columns)
df.sort_values(by='dataset')
df_path = os.path.join(DIR_PATH, 'results' , DATASET+'.csv')
df.to_csv(df_path)
df

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,LARGEST,FATE_DC,0.977+-0.004,0.994+-0.001,0.997+-0.001,0.998+-0.001,0.999+-0.000,1.000+-0.000
1,LARGEST,FETA_DC,0.947+-0.025,0.964+-0.024,0.971+-0.021,0.975+-0.019,0.979+-0.015,0.983+-0.012
2,LARGEST,GENERALIZED_EXTREME_VALUE,0.916+-0.001,0.968+-0.001,0.984+-0.001,0.991+-0.000,0.995+-0.000,0.997+-0.000
3,LARGEST,MULTINOMIAL_LOGIT_MODEL,0.916+-0.001,0.968+-0.001,0.984+-0.000,0.991+-0.000,0.995+-0.000,0.997+-0.000
4,LARGEST,NESTED_LOGIT_MODEL,0.931+-0.005,0.975+-0.002,0.988+-0.000,0.993+-0.000,0.996+-0.000,0.997+-0.000
5,LARGEST,PAIRED_COMBINATORIAL_LOGIT,0.9155,0.9671,0.9843,0.9911,0.9948,0.9968
6,LARGEST,RANKNET_DC,0.977+-0.002,0.993+-0.001,0.996+-0.001,0.998+-0.001,0.999+-0.000,0.999+-0.000
7,LARGEST,RANKSVM_DC,0.906+-0.006,0.962+-0.004,0.982+-0.003,0.991+-0.002,0.995+-0.001,0.997+-0.001
8,MEDIAN,FATE_DC,0.955+-0.004,0.985+-0.001,0.994+-0.000,0.997+-0.000,0.998+-0.000,0.999+-0.000
9,MEDIAN,FETA_DC,0.762+-0.046,0.926+-0.023,0.979+-0.006,0.991+-0.002,0.995+-0.001,0.997+-0.001


In [11]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [12]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]

In [13]:
from datetime import datetime
self.schema = 'pymc3'
avail_jobs = "{}.avail_jobs".format(self.schema)
running_jobs = "{}.running_jobs".format(self.schema)
fold_id = 1
cluster_id=1234
self.fetch_job_arguments(cluster_id=cluster_id)
self.init_connection(cursor_factory=None)
job_desc = dict(self.job_description)
job_desc['fold_id'] = fold_id
job_id = job_desc['job_id']
del job_desc['job_id']
learner, dataset, dataset_type = job_desc['learner'],  job_desc['dataset'], job_desc['dataset_params']['dataset_type']
select_job = "SELECT job_id from {} where fold_id = {} AND learner = \'{}\' AND dataset = \'{}\' AND dataset_params->>'dataset_type' = \'{}\'".format(
    avail_jobs, fold_id, learner, dataset, dataset_type)
self.cursor_db.execute(select_job)

if self.cursor_db.rowcount == 0:
    keys = list(job_desc.keys())
    columns = ', '.join(keys)
    index = keys.index('fold_id')
    keys[index] = str(fold_id)
    values_str = ', '.join(keys)
    insert_job = "INSERT INTO {0} ({1}) SELECT {2} FROM {0} where {0}.job_id = {3} RETURNING job_id".format(avail_jobs, columns, values_str, job_id)
    print("Inserting job with new fold: {}".format(insert_job))
    self.cursor_db.execute(insert_job)    
job_id = self.cursor_db.fetchone()[0]
print("Job {} with fold id {} updated/inserted".format(fold_id, job_id))
start = datetime.now()
update_job = """UPDATE {} set job_allocated_time = %s WHERE job_id = %s""".format(avail_jobs)
self.cursor_db.execute(update_job, (start, job_id))
select_job = """SELECT * FROM {0} WHERE {0}.job_id = {1} AND {0}.interrupted = {2} FOR UPDATE""".format(
    running_jobs, job_id, True)
self.cursor_db.execute(select_job)
count_ = len(self.cursor_db.fetchall())
if count_ == 0:
    insert_job = """INSERT INTO {0} (job_id, cluster_id ,finished, interrupted) 
                    VALUES ({1}, {2},FALSE, FALSE)""".format(running_jobs, job_id, cluster_id)
    self.cursor_db.execute(insert_job)
    if self.cursor_db.rowcount == 1:
        print("The job {} is updated in runnung jobs".format(job_id))
else:
    print("Job with job_id {} present in the updating and row locked".format(job_id))
    update_job = """UPDATE {} set cluster_id = %s, interrupted = %s WHERE job_id = %s""".format(
        running_jobs)
    self.cursor_db.execute(update_job, (cluster_id, 'FALSE', job_id))
    if self.cursor_db.rowcount == 1:
        print("The job {} is updated in runnung jobs".format(job_id))
self.close_connection()

jobs available []
Error as the all jobs are already assigned to another nodes list index out of range


TypeError: 'NoneType' object is not iterable