# Load packages

In [0]:
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.utils.data as utils

import matplotlib
import matplotlib.pyplot as plt

from google.colab import drive

import pandas as pd
import numpy as np
import time
from sklearn.model_selection import KFold
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

import os
import re

In [0]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Device config


In [0]:
device = torch.device("cuda") # PyTorch v0.4.0

# K-Fold

## Methods

### Load data

In [0]:
import re

def tryint(s):
    try:
        return int(s)
    except ValueError:
        return s
     
def alphanum_key(s):
    """ Turn a string into a list of string and number chunks.
        "z23a" -> ["z", 23, "a"]
    """
    return [ tryint(c) for c in re.split('([0-9]+)', s) ]

def sort_nicely(l):
    """ Sort the given list in the way that humans expect.
    
    amazing <3 RegExp
    """
    l.sort(key=alphanum_key)
    return l

def load_matrices(combination):
    model_type = combination[0]
    mode = combination[1]
    INPUT_SIZE_CNN = 25 if model_type == 'keywords_clusters' else 126 if 'custom' in model_type else 500
    
    train_classes, train_user_data, train_matrices, train_dates, train_btc_meta_data, train_classes_t = None, None, None, None, None, None
    base_str = '/content/gdrive/My Drive/projects/data/btc/{}/{}/train/'.format(model_type, mode)
    files = os.listdir(base_str)
    for train_file in sort_nicely(files):
        if 'btc' in train_file:
            temp = np.load(base_str + train_file)
            if train_btc_meta_data is None:
                train_btc_meta_data = temp
            else:
                train_btc_meta_data = np.concatenate((train_btc_meta_data, temp), axis=0)
        elif 'tc_up' in train_file:
            temp = np.load(base_str + train_file)
            if train_classes_t is None:
                train_classes_t = temp
            else:
                train_classes_t = np.concatenate((train_classes_t, temp), axis=0)
        elif 'c_up' in train_file:
            temp = np.load(base_str + train_file)
            if train_classes is None:
                train_classes = temp
            else:
                train_classes = np.concatenate((train_classes, temp), axis=0)
        elif 'md_up' in train_file:
            temp = np.load(base_str + train_file)
            if train_user_data is None:
                train_user_data = temp
            else:
                train_user_data = np.vstack((train_user_data, temp))
        elif 'dp_up' in train_file:
            temp = np.load(base_str + train_file)
            if train_matrices is None:
                train_matrices = temp
            else:
                train_matrices = np.vstack((train_matrices, temp))
        elif 'dt_up' in train_file:
            temp = np.load(base_str + train_file)
            if train_dates is None:
                train_dates = temp
            else:
                train_dates = np.concatenate((train_dates, temp), axis=0)
    print(train_classes.shape, train_user_data.shape, train_matrices.shape, train_dates.shape, train_btc_meta_data.shape, train_classes_t.shape)
    test_classes, test_user_data, test_matrices, test_dates, test_btc_meta_data, test_classes_t = None, None, None, None, None, None
    base_str = '/content/gdrive/My Drive/projects/data/btc/{}/{}/test/'.format(model_type, mode)

    for test_file in sort_nicely(os.listdir(base_str)):
        if 'btc' in test_file:
            temp = np.load(base_str + test_file)
            if test_btc_meta_data is None:
                test_btc_meta_data = temp
            else:
                test_btc_meta_data = np.concatenate((test_btc_meta_data, temp), axis=0)
        elif 'tc_up' in test_file:
            temp = np.load(base_str + test_file)
            if test_classes_t is None:
                test_classes_t = temp
            else:
                test_classes_t = np.concatenate((test_classes_t, temp), axis=0)
        elif 'c_up' in test_file:
            temp = np.load(base_str + test_file)
            if test_classes is None:
                test_classes = temp
            else:
                test_classes = np.concatenate((test_classes, temp), axis=0)
        elif 'md_up' in test_file:
            temp = np.load(base_str + test_file)
            if test_user_data is None:
                test_user_data = temp
            else:
                test_user_data = np.vstack((test_user_data, temp))
        elif 'dp_up' in test_file:
            temp = np.load(base_str + test_file)
            if test_matrices is None:
                test_matrices = temp
            else:
                test_matrices = np.vstack((test_matrices, temp))
        elif 'dt_up' in test_file:
            temp = np.load(base_str + test_file)
            if test_dates is None:
                test_dates = temp
            else:
                test_dates = np.concatenate((test_dates, temp), axis=0)
  
    # Normalize
    # Re-assign tokens for userIds as this makes normalization easier (no precision errors).
    unique_users = np.unique(np.concatenate((np.unique(train_user_data[:,-1]), np.unique(test_user_data[:,-1]))))
    user_ids = {}
    for i, v in enumerate(unique_users):
        user_ids[v] = (i+1)

    for i, v in enumerate(train_user_data):
        train_user_data[i,-1] = user_ids[v[-1]]

    for i, v in enumerate(test_user_data):
        test_user_data[i,-1] = user_ids[v[-1]]

    # Remove train/test users that are not in both sets.
    indices = []
    test_user_ids = np.unique(test_user_data[:,-1])
    for i in range(len(train_user_data[:,-1])):
        if train_user_data[i,-1] not in test_user_ids:
            indices.append(i)

    len(indices) / len(train_user_data[:,-1])
    
    train_matrices = np.delete(train_matrices, indices, axis=0)
    train_classes_t = np.delete(train_classes_t, indices)
    train_classes = np.delete(train_classes, indices)
    train_user_data = np.delete(train_user_data, indices, axis=0)
    train_btc_meta_data = np.delete(train_btc_meta_data, indices, axis=0)
    train_dates = np.delete(train_dates, indices)
    train_matrices.shape, train_classes_t.shape, train_user_data.shape

    indices = []
    train_user_ids = np.unique(train_user_data[:,-1])
    for i in range(len(test_user_data[:,-1])):
        if test_user_data[i,-1] not in train_user_ids:
            indices.append(i)

    len(indices) / len(test_user_data[:,-1])

    test_matrices = np.delete(test_matrices, indices, axis=0)
    test_classes_t = np.delete(test_classes_t, indices)
    test_classes = np.delete(test_classes, indices)
    test_user_data = np.delete(test_user_data, indices, axis=0)
    test_btc_meta_data = np.delete(test_btc_meta_data, indices, axis=0)
    test_dates = np.delete(test_dates, indices)

    return train_classes, train_user_data, train_matrices, train_dates, train_btc_meta_data, train_classes_t, test_classes, test_user_data, test_matrices, test_dates, test_btc_meta_data, test_classes_t

In [0]:
# ['keywords_tfidf_p', 'count', True, False]
def load_data_kfold(combination):
    # Load matrices
    train_classes, train_user_data, train_matrices, train_dates, train_btc_meta_data, train_classes_t, test_classes, test_user_data, test_matrices, test_dates, test_btc_meta_data, test_classes_t = load_matrices(combination)
    # Preprocess data (remove test/train and normalize).
    print('c1')
    # create tensors etc
    tensor_1 = torch.from_numpy(test_matrices)
    tensor_2 = torch.from_numpy(test_classes_t) if multi_class else torch.from_numpy(test_classes)
    tensor_3 = torch.from_numpy(test_user_data.astype(float))
    tensor_4 = torch.from_numpy(test_dates.astype(float))
    tensor_5 = torch.from_numpy(test_btc_meta_data.astype(float))

    test = torch.utils.data.TensorDataset(tensor_1, tensor_2, tensor_3, tensor_4, tensor_5)

    tensor_1 = torch.from_numpy(train_matrices)
    tensor_2 = torch.from_numpy(train_classes_t) if multi_class else torch.from_numpy(train_classes)
    tensor_3 = torch.from_numpy(train_user_data.astype(float))
    tensor_4 = torch.from_numpy(train_dates.astype(float))
    tensor_5 = torch.from_numpy(train_btc_meta_data.astype(float))

    full_dataset = torch.utils.data.TensorDataset(tensor_1, tensor_2, tensor_3, tensor_4, tensor_5)

    del train_classes, train_user_data, train_matrices, train_dates, train_btc_meta_data, train_classes_t, test_classes, test_user_data, test_matrices, test_dates, test_btc_meta_data, test_classes_t
    
    return full_dataset, test 

### Early stoppage class

In [0]:
class EarlyStopping:
    """
    Early stops the training if validation loss dosen't improve after a given patience.
    CREDITS GO TO: https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
    """
    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
                            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        
    def __call__(self, val_loss, model):

        score = -val_loss
        
        
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).')
        #torch.save(model.state_dict(), '/content/gdrive/My Drive/Colab Notebooks/data/btc/models/{}/{}/{}_checkpoint.pt'.format(model_type, batch_size, np.round(map_score, 3)))
        self.val_loss_min = val_loss

### Model

In [0]:
class ConvNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        # Todo: add batchnorm for all layers?
        self.conv1 = nn.Conv1d(INPUT_SIZE_CNN, 256, kernel_size=5, stride=1, padding=2) #+ 7
        self.bn1 = nn.BatchNorm1d(256)
        
        self.conv2 = nn.Conv1d(256, 128, kernel_size=5, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(128)
        
        self.conv3 = nn.Conv1d(128, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm1d(64)
        
        self.conv4 = nn.Conv1d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm1d(64)
        
        self.fc1 = nn.Linear(5*64, 128)
        self.bn5 = nn.BatchNorm1d(128)
        self.drop1 = nn.Dropout(0.5)
        
        self.fc2 = nn.Linear(128, 32)
        self.bn6 = nn.BatchNorm1d(32)
        self.drop2 = nn.Dropout(0.5)
        
        self.fc3 = nn.Linear(FCN_INPUT_SIZE, num_classes) # +  size_user_info
        self.bn7 = nn.BatchNorm1d(num_classes)
        self.drop3 = nn.Dropout(0.5)
        
        self.out_act = nn.Sigmoid()

    def forward(self, x, user_data, btc_meta_data, batch_size, use_btc, use_user):
        if use_btc:
            x = torch.cat((x, btc_meta_data), 1)
        x = x.view(batch_size, INPUT_SIZE_CNN, 7).float() #+ btc_meta_data.shape[1]
        
        x = self.bn1(F.relu(self.conv1(x)))

        x = self.bn2(F.relu(self.conv2(x)))
        x = self.bn3(F.relu(self.conv3(x)))
        x = self.bn4(F.relu(self.conv4(x)))
        x = x.view(x.shape[0], -1)

        x = self.drop1(self.bn5(F.relu(self.fc1(x))))
        x = self.drop2(self.bn6(F.relu(self.fc2(x))))
        
        if use_user:
            x = torch.cat((x, user_data.float()), 1)
        x = self.drop3(self.bn7(self.fc3(x)))
        
        if not multi_class:
            x = self.out_act(x)
        return x

### Evaluate user-specific data

In [0]:
from sklearn.metrics import accuracy_score, roc_auc_score, average_precision_score

def validate_model(val_loader, multi_class, batch_size, combination):
    model.eval()
    with torch.no_grad():
        pred = []
        y = []
        losses = []
        for images, labels, user_data, dates, btc_meta_data in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            user_data = user_data.to(device)
            btc_meta_data = btc_meta_data.to(device)
            outputs = model(images, user_data, btc_meta_data, batch_size, combination[2], combination[3])
            if multi_class:
                loss = criterion(outputs, torch.max(labels, 1)[1].view(-1, 1))
                pred.extend(torch.max(outputs, 1)[1].data.cpu().numpy())
                y.extend(torch.max(labels, 1)[1].data.cpu().numpy())
            else:
                loss = criterion(outputs, labels.float().view(-1, 1))
                #pred.extend(outputs.data.cpu().numpy())
                pred.extend([1 if x > 0.5 else 0 for x in outputs.data.cpu().numpy()])
                y.extend(labels.data.cpu().numpy())
            losses.append(loss.data.cpu().numpy())
            
        losses = np.mean(losses)
      
        if multi_class:
            auc = accuracy_score(y, pred)
        else:
            auc = [precision_score(y, pred), recall_score(y, pred)]#average_precision_score(y, pred)

        print('Epoch [{}/{}], val MAP/acc: {}, val loss: {}'
              .format(epoch + 1, num_epochs, auc, losses))
        
        return losses, auc, y, pred

def train_model(train_loader, multi_class, batch_size, combination):
    losses = []
    pred = []
    y = []
    model.train()
    for i, (images, labels, user_data, dates, btc_meta_data) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        user_data = user_data.to(device)
        btc_meta_data = btc_meta_data.to(device)
        outputs = model(images, user_data, btc_meta_data, batch_size, combination[2], combination[3])
        
        if multi_class:
            loss = criterion(outputs, torch.max(labels, 1)[1].view(-1, 1))
            pred.extend(torch.max(outputs, 1)[1].data.cpu().numpy())
            y.extend(torch.max(labels, 1)[1].data.cpu().numpy())
        else:
            loss = criterion(outputs, labels.float().view(-1, 1))
            #pred.extend(outputs.data.cpu().numpy())
            pred.extend([1 if x > 0.5 else 0 for x in outputs.data.cpu().numpy()])
            y.extend(labels.data.cpu().numpy())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        losses.append(loss.data.cpu().numpy())
        
        if i % int(len(train_loader) / 3)  == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, np.mean(losses)))

    if multi_class:
        auc = accuracy_score(y, pred)
    else:
        auc = [precision_score(y, pred), recall_score(y, pred)]#average_precision_score(y, pred)

    return np.mean(losses), auc, y, pred

def test_model(test_loader, multi_class, batch_size, combination):
    model.eval()
    with torch.no_grad():
        pred = []
        y = []
        for images, labels, user_data, dates, btc_meta_data in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            user_data = user_data.to(device)
            btc_meta_data = btc_meta_data.to(device)
            outputs = model(images, user_data, btc_meta_data, batch_size, combination[2], combination[3])
            if multi_class:
                pred.extend(torch.max(outputs, 1)[1].data.cpu().numpy())
                y.extend(torch.max(labels, 1)[1].data.cpu().numpy())
            else:
                #pred.extend(outputs.data.cpu().numpy())
                pred.extend([1 if x > 0.5 else 0 for x in outputs.data.cpu().numpy()])
                y.extend(labels.data.cpu().numpy())   
      
        if multi_class:
            auc = accuracy_score(y, pred)
        else:
            auc = [precision_score(y, pred), recall_score(y, pred)]#average_precision_score(y, pred)

        print('Test score: {}'.format(auc))
        return auc, y, pred

### Aggregate users

In [0]:
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns


def conf_matrix(combination, batch_size, conf_matrix, score_type):    
    class_names=[0,1] # name  of classes
    fig, ax = plt.subplots()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names)
    plt.yticks(tick_marks, class_names)

    # create heatmap
    sns.heatmap(pd.DataFrame(conf_matrix), annot=True, fmt='g')
    ax.xaxis.set_label_position("top")
#     plt.tight_layout()
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.savefig('/content/gdrive/My Drive/projects/data/btc/result_conf_matrix/{}_{}_{}_{}_{}_{}'.format(combination[0], combination[1], int(combination[2]), int(combination[3]), batch_size, score_type))
    plt.close()

def mean_pred_test(combination, train_df, test_df):
    dates = test_df['date'].unique()
    y_test = []
    y_pred = []
    for date in dates:
        temp = test_df[test_df['date'] == date]
        y_true = temp['true'].unique()[0]

        max_out = temp['output'].mean()
        #print(max_out)
        max_all = 1.0 if max_out > 0.5 else 0.0
        y_pred.append(max_all)
        y_test.append(y_true)
    #print(y_test, y_pred)
    #conf_matrix(combination, y_test, y_pred)
    return metrics.confusion_matrix(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred)

def dataset_LR(train_df, test_df):
    # Create dataset first.
    train_users = train_df['user'].unique()
    test_users = test_df['user'].unique()

    users = np.concatenate((train_users, test_users))
    user_ids_lr = {}
    for i, v in enumerate(users):
        user_ids_lr[v] = i

    # Loop over dates, check if user has prediction
    X_train = None
    y_train = []
    cnt = 0
    for date in train_df['date'].unique():
        cnt += 1
        temp = train_df[train_df['date'] == date]
        arr = np.zeros(len(users))
        for row in temp.itertuples():
            usr = row[3]
            pred = row[1]
            lbl = row[2]
            arr[user_ids_lr[usr]] = pred
        y_train.append([lbl])
        if X_train is None:
            X_train = arr
        else:
            X_train = np.vstack((X_train, arr))
    
    y_train = np.array(y_train)  

    # Loop over dates, check if user has prediction
    X_test = None
    y_test = []

    for date in test_df['date'].unique():
        cnt += 1
        temp = test_df[test_df['date'] == date]
        arr = np.zeros(len(users))
        for row in temp.itertuples():
            usr = row[3]
            pred = row[1]
            lbl = row[2]
#             try:
            arr[user_ids_lr[usr]] = pred
#             except:
#                 continue
                #print('User {} not found'.format(usr))
#                 cnt_n += 1
        y_test.append([lbl])
        if X_test is None:
            X_test = arr
        else:
            X_test = np.vstack((X_test, arr))

    y_test = np.array(y_test)
    return X_train, y_train, X_test, y_test
    
    
def LR(combination, train_df, test_df):    
    X_train, y_train, X_test, y_test = dataset_LR(train_df, test_df)
    # instantiate the model (using the default parameters)
    logreg = LogisticRegression()

    # fit the model with data
    logreg.fit(X_train, y_train)
    
    y_pred = logreg.predict(X_test)
    y_pred_prob = logreg.predict_proba(X_test)[:,1]
    #print(y_pred_prob, y_test)
    #conf_matrix(combination, y_test, y_pred)
    return metrics.confusion_matrix(y_test, y_pred), precision_score(y_test, y_pred), recall_score(y_test, y_pred)

def aggregate_evaluation_test(combination):
    ## Create datasets.
    train_lr = None
    model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
    with torch.no_grad():
        for images, labels, user_data, dates, btc_meta_data in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            user_data = user_data.to(device)
            btc_meta_data = btc_meta_data.to(device)
            outputs = model(images, user_data, btc_meta_data, batch_size, combination[2], combination[3])
            predicted = torch.max(outputs, 1)[1].data.cpu().numpy()

            outputs = np.array([x[0] for x in outputs.data.cpu().numpy()])
            temp = np.dstack((outputs, labels.data.cpu().numpy(), 
                               user_data.data.cpu().numpy()[:,-1],
                               dates.data.cpu().numpy(), predicted)).squeeze()
            if train_lr is None:
                train_lr = temp
            else:
                train_lr = np.concatenate((train_lr, temp), axis=0)
                
                
    test_lr = None
    model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
    with torch.no_grad():
        for images, labels, user_data, dates, btc_meta_data in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            user_data = user_data.to(device)
            btc_meta_data = btc_meta_data.to(device)
            outputs = model(images, user_data, btc_meta_data, batch_size, combination[2], combination[3])
            predicted = torch.max(outputs, 1)[1].data.cpu().numpy()

            outputs = np.array([x[0] for x in outputs.data.cpu().numpy()])
            temp = np.dstack((outputs, labels.data.cpu().numpy(), 
                               user_data.data.cpu().numpy()[:,-1],
                               dates.data.cpu().numpy(), predicted)).squeeze()

            if test_lr is None:
                test_lr = temp
            else:
                test_lr = np.concatenate((test_lr, temp), axis=0)
                
    train_df = pd.DataFrame({'output':train_lr[:,0],'true':train_lr[:,1], 
                                   'user':train_lr[:,2], 'date':train_lr[:,3], 'pred_max':train_lr[:,4]})
    test_df = pd.DataFrame({'output':test_lr[:,0],'true':test_lr[:,1], 
                                       'user':test_lr[:,2], 'date':test_lr[:,3], 'pred_max':test_lr[:,4]})
    
    mean_conf, mean_prec, mean_recall = mean_pred_test(combination, train_df, test_df)
    LR_conf, LR_prec, LR_recall = LR(combination, train_df, test_df)
    
    return LR_conf, LR_prec, LR_recall, mean_conf, mean_prec, mean_recall

## Iterate

In [0]:
# LR_conf, LR_prec, LR_recall, mean_conf, mean_prec, mean_recall = aggregate_evaluation_test(combination)
            
# print('Finished fold, scores: {}'.format([LR_prec, LR_recall, mean_prec, mean_recall]))

In [0]:
# Hyper parameters
num_epochs = 35
batch_sizes = [128, 64, 512, 256, 1024, 2048]
n_folds = 5
multi_class = False
num_classes = 1
size_user_info = 8
size_btc_md = 6
patience_early_stopping = 1
patience_reduce_lr = 1
learning_rate = 0.001


# Add combinations, make sure custom_features has 'count' due to mapping structure.
# 20 tests in total, 8 each for TFIDF/TFIDF-P, 4 for custom features.
# Booleans being the usage of btc meta-data and user meta-data or not.
combinations = [['custom_features', 'count', False, False],
               ['custom_features', 'count', True, False],
               ['custom_features', 'count', False, True],
               ['custom_features', 'count', True, True],
                
               ['keywords_tfidf', 'binary', False, False, 512],
               ['keywords_tfidf', 'binary', True, False, 512],
               ['keywords_tfidf', 'binary', False, True],
               ['keywords_tfidf', 'binary', True, True],
               
               ['keywords_tfidf', 'count', False, False, 512],
               ['keywords_tfidf', 'count', True, False],
               ['keywords_tfidf', 'count', False, True],
               ['keywords_tfidf', 'count', True, True],
               
               ['keywords_tfidf_p', 'binary', False, False, 512],
               ['keywords_tfidf_p', 'binary', True, False],
               ['keywords_tfidf_p', 'binary', False, True],
               ['keywords_tfidf_p', 'binary', True, True],
               
               ['keywords_tfidf_p', 'count', False, False],
               ['keywords_tfidf_p', 'count', True, False],
               ['keywords_tfidf_p', 'count', False, True]]#,
               ['keywords_tfidf_p', 'count', True, True]]

prev_comb = [None, None, None, None]
for combination in combinations:
    print('-------------------------------')
    print('-------------------------------')
    print('STARTING NEW COMBINATION {}'.format(combination))
    print('-------------------------------')
    print('-------------------------------')
    if prev_comb[0:2] != combination[0:2]:
        if 'full_dataset' in globals():
            del full_dataset

        full_dataset, test_dataset = load_data_kfold(combination)
    else:
        prev_comb[0:2] = combination[0:2]

    kf = KFold(n_splits=n_folds, shuffle=False)
    results = {}
    for batch_size in batch_sizes:
        INPUT_SIZE_CNN = 126 if 'custom' in combination[0] else 500
        FCN_INPUT_SIZE = 32
        
        if combination[2] & combination[3]:
            INPUT_SIZE_CNN = INPUT_SIZE_CNN + size_btc_md
            FCN_INPUT_SIZE = FCN_INPUT_SIZE + size_user_info
        elif combination[2] & (not combination[3]):
            INPUT_SIZE_CNN = INPUT_SIZE_CNN + size_btc_md
        elif (not combination[2]) & combination[3]:
            FCN_INPUT_SIZE = FCN_INPUT_SIZE + size_user_info
        
        # Add learning rates that match the batch sizes.
        print('-------------------------------')
        print('Starting training for new batch size {}'.format(batch_size))
        print('-------------------------------')
        train_scores = []
        val_scores = []
        test_scores = []

        test_agg = []
        mean_conf_matrices = []
        LR_conf_matrices = []
        
        test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              drop_last=True,
                                              shuffle=False)
#         print('a4')
        for train_indexes, validation_indexes in kf.split(full_dataset):
            train = torch.utils.data.dataset.Subset(full_dataset, train_indexes)
            validation = torch.utils.data.dataset.Subset(full_dataset, validation_indexes)            
#             print('a5')
            # Add parameters for different model_combinations.
            model = ConvNet(num_classes).to(device)
            criterion = nn.BCELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
            early_stopping = EarlyStopping(patience=patience_early_stopping, 
                                           verbose=True)
            scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                       'min', verbose=True, 
                                                       patience=patience_reduce_lr)

            # Set train and validation data loaders.
            train_loader = torch.utils.data.DataLoader(dataset=train,
                                                       batch_size=batch_size,
                                                       drop_last=True,
                                                       shuffle=True)
            val_loader = torch.utils.data.DataLoader(dataset=validation,
                                                      batch_size=batch_size,
                                                      drop_last=True,
                                                      shuffle=True)
            del train, validation
            
            total_step = len(train_loader)
            print('-------------------------------')
            print('Starting training for new fold')
            print('-------------------------------')
            for epoch in range(num_epochs):
                train_loss, train_score, _, _ = train_model(train_loader, multi_class,
                                                         batch_size, combination)
                val_loss, val_score, _, _ = validate_model(val_loader, multi_class, 
                                                         batch_size, combination)
                print('At end of epoch, average (training) loss: {}, score: {} '.format(train_loss, train_score))
                print('At end of epoch, average (validation) loss: {}, score: {} '.format(val_loss, val_score))
                early_stopping(val_loss, model)
                scheduler.step(val_loss)

                if early_stopping.early_stop:
                    print("EARLY STOPPAGE AFTER {} EPOCHS".format(epoch))
                    break
            test_score, y, pred = test_model(test_loader, multi_class, batch_size, combination)
            LR_conf, LR_prec, LR_recall, mean_conf, mean_prec, mean_recall = aggregate_evaluation_test(combination)
            
            print('Finished fold, scores: {}'.format([LR_prec, LR_recall, mean_prec, mean_recall]))
            
            train_scores.append(train_score)
            val_scores.append(val_score)
            test_scores.append(test_score)
            test_agg.append([LR_prec, LR_recall, mean_prec, mean_recall])
            mean_conf_matrices.append(mean_conf)
            LR_conf_matrices.append(LR_conf)

        # Take means and add to dict.
        results[batch_size] = [np.mean(train_scores, axis=0), np.mean(val_scores, axis=0), 
                               np.mean(test_scores, axis=0), np.mean(test_agg, axis=0)]

        # Save confusion matrix as mean of all folds.
        conf_matrix(combination, batch_size, np.mean(LR_conf_matrices, axis=0), 'LR')   
        conf_matrix(combination, batch_size, np.mean(mean_conf_matrices, axis=0), 'mean')
        
        print('-------------------------------')
        print('{}-fold validation has been executed, mean LR_prec, LR_recall, mean_prec, mean_recall: {}'.
              format(n_folds, np.mean(test_agg, axis=0)))
        
        
    # Save results for model type and all batch size combinations.
    np.save('/content/gdrive/My Drive/projects/data/btc/results/{}_{}_{}_{}'.format(combination[0], combination[1], int(combination[2]), int(combination[3])), np.array(results))
    print('Saved results')

# Process final results

In [0]:
base_str = '/content/gdrive/My Drive/Colab Notebooks/data/btc/results/'
files = os.listdir(base_str)

results = {}
for file in files:
    results[file.strip('.npy')] = np.load(base_str + file)[()] 

In [0]:
# method, batch_size, train_map, val_map, test_map, test_lr, test_mean_pred.
columns = ['Experiment', 'Batch_size', 'type', 'user_meta_data', 'Train Precision', 'Train Recall', 'Val Precision', 'Val Recall', 'Test Precision', 'Test Recall',
          'LR Precision', 'LR Recall', 'Mean Precision', 'Mean Recall']
final = []
for k in results.keys():
    for batch_size in [32, 64, 128, 256, 512]:
        k_replaced = k.replace('keywords_', '').replace('_count_', '').replace('_binary_', '')[:-3]

        t = 'Binary' if 'binary' in k else 'Count'
        b = 'Yes' if k[-3] == '1' else 'No'
        u = 'Yes' if k[-1] == '1' else 'No'
        merge = [item for sublist in results[k][batch_size] for item in sublist]        
        temp = [k_replaced, batch_size, t, u]
        temp.extend(merge)
        # train_map, val_map, test_map, test_lr, test_mean_pred.
        final.append(temp)
        
# Booleans being the usage of btc meta-data and user meta-data or not.
final = pd.DataFrame.from_records(final, columns=columns)
final['Experiment'][final['Experiment'] == 'tfidf_p'] = 'Partial TF-IDF'
final['Experiment'][final['Experiment'] == 'tfidf'] = 'TF-IDF'
final = final[final['Experiment'] != 'custom_features']

In [0]:
# Personally went through all combinations of experiments, types, btc meta data and user meta data to find the max scores.
a = final[(final['Experiment'] == 'Partial TF-IDF') & (final['type'] == 'Binary') 
      & (final['user_meta_data'] == 'No')]

a.sort_values(['LR Precision', 'Mean Precision'], ascending=False)