In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [5]:
def extract_data_three_cols_format(filename):
    '''Get loss and AUC data from log files (exp #2-6) with three columns format: 
    number of epoch, number of iteration, loss/AUC

    Parameters
    ----------
    filename : str
        The file location of the log file

    Returns
    -------
    list
        list with mean loss/auc for each epoch
    list
        list with all the loss/auc for each epoch
    '''
    with open(filename, 'r') as f:
        values=list(map(lambda x: x.split('\t'),f.read().split('\n')))[:-1]
    values=list(map(lambda x: list(map(lambda y: float(y),x)),values))
    values=np.array(values)
    epoch_prev=1.0
    epoch_input=[]
    mean_values=[]
    for epoch,input in zip(values[:,0],values[:,2]):
        if epoch_prev==epoch:
            epoch_input.append(input)
        else:
            mean_values.append(np.array(epoch_input).mean())
            epoch_input=[]
        epoch_prev=epoch
    mean_values.append(np.array(epoch_input).mean())
    return (np.array(mean_values)),values

def extract_data_channels(filename):
    '''Get AUC data from log files (exp #9-17) with four columns format: 
    number of epoch, number of iteration, number of channel, AUC

    Parameters
    ----------
    filename : str
        The file location of the log file

    Returns
    -------
    list
        list of mean values of AUC per channel per epoch
    '''
    with open(filename,'r') as f:
        values=list(map(lambda x: x.split('\t'),f.read().split('\n')))[:-1]
    values=list(map(lambda x: list(map(lambda y: float(y),x)),values))
    values=np.array(values)
    aucs=np.zeros(shape=(12,int(np.max(values[:,0]))))

    for p in range(12):
        one_aucs=values[np.where(values[:,2]==p)][:,[0,3]]
        for epoch in range(int(np.max(values[:,0]))):
            one_aucs_epoch=one_aucs[np.where(one_aucs[:,0]==epoch)][:,1].mean()
            aucs[p][epoch]=one_aucs_epoch
    return aucs[:,1:]

In [88]:
data_dir = './experiments'

data=pd.read_csv('../MultiTox/database/data/tox21_10k_data_all_no_salts.csv')

props=list(data)
try:
    props.remove('SMILES')
except:
    pass

experiments_description = pd.read_excel(os.path.join(data_dir,'Experiments_description.xlsx'), index_col = 'Number of experiment')
experiments_description = experiments_description.reindex(columns = experiments_description.columns.tolist() + props)

for i in range(2,18):
    filename = str(i) + '_log_train_auc.txt'
    try:
        if i<9:
            aucs, _ = extract_data_three_cols_format(os.path.join(data_dir, filename))
            experiments_description.loc[i, 'train auc'] = aucs[-1]
        else:
            aucs = extract_data_channels(os.path.join(data_dir, filename))[:,-1]
            experiments_description.loc[i, 'train auc'] = aucs.mean()
    except FileNotFoundError:
        pass
    
    try:
        filename = str(i) + '_log_test_auc.txt'
        if i<9:
            aucs, _ = extract_data_three_cols_format(os.path.join(data_dir, filename))
            experiments_description.loc[i, 'test auc'] = aucs[-1]
        else:
            aucs = extract_data_channels(os.path.join(data_dir, filename))[:,-1]
            experiments_description.loc[i, props] = aucs
            experiments_description.loc[i, 'test auc'] = aucs.mean()
    except FileNotFoundError:
        pass
    
    try:
        filename = str(i) + '_log_test_loss.txt'
        losses, _ = extract_data_three_cols_format(os.path.join(data_dir, filename)) 
        experiments_description.loc[i, 'test loss'] = losses[-1]
    except FileNotFoundError:
        pass
    
    try:
        filename = str(i) + '_log_train_loss.txt'
        losses, _ = extract_data_three_cols_format(os.path.join(data_dir, filename)) 
        experiments_description.loc[i, 'train loss'] = losses[-1]
    except FileNotFoundError:
        pass    

In [93]:
experiments_description.to_excel('./Tox21_experiments_description_and_results.xlsx')