<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Dataset-parsers-and-cleaning-functions" data-toc-modified-id="Dataset-parsers-and-cleaning-functions-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Dataset parsers and cleaning functions</a></span><ul class="toc-item"><li><span><a href="#Test-data" data-toc-modified-id="Test-data-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Test data</a></span></li></ul></li><li><span><a href="#Training" data-toc-modified-id="Training-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Training</a></span><ul class="toc-item"><li><span><a href="#Creating-a-new-data-structure-for-all-valid-data-and-pickling-it" data-toc-modified-id="Creating-a-new-data-structure-for-all-valid-data-and-pickling-it-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Creating a new data structure for all valid data and pickling it</a></span></li></ul></li><li><span><a href="#Multi-Class-Classifier:-Train/Test-Functions" data-toc-modified-id="Multi-Class-Classifier:-Train/Test-Functions-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Multi Class Classifier: Train/Test Functions</a></span></li><li><span><a href="#Multi-Class-Classifier:-SOTA-Comparison" data-toc-modified-id="Multi-Class-Classifier:-SOTA-Comparison-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Multi Class Classifier: SOTA Comparison</a></span><ul class="toc-item"><li><span><a href="#Multi-Layer-Perceptron-(0-Hidden-Layers)" data-toc-modified-id="Multi-Layer-Perceptron-(0-Hidden-Layers)-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Multi-Layer Perceptron (0 Hidden Layers)</a></span></li><li><span><a href="#Multi-Layer-Perceptron-(1-Hidden-Layer)" data-toc-modified-id="Multi-Layer-Perceptron-(1-Hidden-Layer)-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Multi-Layer Perceptron (1 Hidden Layer)</a></span></li><li><span><a href="#Multi-Layer-Perceptron-(2-Hidden-Layers)" data-toc-modified-id="Multi-Layer-Perceptron-(2-Hidden-Layers)-5.3"><span class="toc-item-num">5.3&nbsp;&nbsp;</span>Multi-Layer Perceptron (2 Hidden Layers)</a></span></li><li><span><a href="#Multi-Layer-Perceptron-(2-Hidden-Layers,-with-Dropout)" data-toc-modified-id="Multi-Layer-Perceptron-(2-Hidden-Layers,-with-Dropout)-5.4"><span class="toc-item-num">5.4&nbsp;&nbsp;</span>Multi-Layer Perceptron (2 Hidden Layers, with Dropout)</a></span></li></ul></li></ul></div>

# Imports

In [None]:
# Required imports
import os
import numpy as np
import pandas as pd
import gzip
import glob
import pickle
from io import StringIO
import importlib.machinery

from sklearn.model_selection import train_test_split as TT_split
# Models
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.multiclass import OneVsRestClassifier as OvR

from sklearn.metrics import accuracy_score,confusion_matrix,balanced_accuracy_score
from sklearn.metrics import precision_recall_fscore_support,classification_report
#from sklearn.metrics import multilabel_confusion_matrix # Only available in dev .21

# Need Pytorch for multilabel classifications
import torch
from torch.autograd import Variable as V
from torch import nn,optim
import torch.nn.functional as F
import torch.utils.data as utils
#import skorch [Scikit-learn wrapper around Pytorch so allowing for K-fold cross-validation]
from sklearn.model_selection import KFold
from skorch import NeuralNetClassifier
random_state=10

In [None]:
# Data location and sample user
prefix='dataset/Extrasensory_uuid_fl_uTAR/'
cross_validation_user_loc='dataset/cv_5_folds/'
user_sample='3600D531-0C55-44A7-AE95-A7A38519464E.features_labels'

# Dataset parsers and cleaning functions

In [None]:
# Dataset parsers for header/ body for CSVs
def parse_header_of_csv(csv_str):
    # Isolate the headline columns:
    headline = csv_str[:csv_str.index('\n')];
    columns = headline.split(',');

    # The first column should be timestamp:
    assert columns[0] == 'timestamp';
    # The last column should be label_source:
    assert columns[-1] == 'label_source';
    
    # Search for the column of the first label:
    for (ci,col) in enumerate(columns):
        if col.startswith('label:'):
            first_label_ind = ci;
            break;
        pass;

    # Feature columns come after timestamp and before the labels:
    feature_names = columns[1:first_label_ind];
    # Then come the labels, till the one-before-last column:
    label_names = columns[first_label_ind:-1];
    for (li,label) in enumerate(label_names):
        # In the CSV the label names appear with prefix 'label:', but we don't need it after reading the data:
        assert label.startswith('label:');
        label_names[li] = label.replace('label:','');
        pass;
    
    return (feature_names,label_names);

def parse_body_of_csv(csv_str,n_features):
    # Read the entire CSV body into a single numeric matrix:
    full_table = np.loadtxt(StringIO(csv_str),delimiter=',',skiprows=1);
    
    # Timestamp is the primary key for the records (examples):
    timestamps = full_table[:,0].astype(int);
    
    # Read the sensor features:
    X = full_table[:,1:(n_features+1)];
    
    # Read the binary label values, and the 'missing label' indicators:
    trinary_labels_mat = full_table[:,(n_features+1):-1]; # This should have values of either 0., 1. or NaN
    M = np.isnan(trinary_labels_mat); # M is the missing label matrix
    
    #print("M matrix shape:",M.shape)
    #print("Matrix: ",np.argwhere(M))
    
    Y = np.where(M,0,trinary_labels_mat) > 0.; # Y is the label matrix
    
    return (X,Y,M,timestamps);

def read_user_data(directory):
    print('Reading {}'.format(directory.split("/")[-1]))

    # Read the entire csv file of the user:
    with gzip.open(directory,'rb') as fid:
        csv_str = fid.read();
        csv_str = csv_str.decode("utf-8")
        pass;

    (feature_names,label_names) = parse_header_of_csv(csv_str);
    n_features = len(feature_names);
    (X,Y,M,timestamps) = parse_body_of_csv(csv_str,n_features);

    return (X,Y,M,timestamps,feature_names,label_names);

In [None]:
# Clean labels
def clean_labels(input_label):
    if label.endswith('_'):
        label=label[:-1]+')'
    label=label.replace('__',' (').replace('_',' ')
    label=label[0]+label[1:].lower()
    label=label.replace('i m','I\'m')
    return label

In [None]:
# Get a summary of the sensor feature
'''
# Summarize features as we are only using phone_acc,phone_gyro,phone_mag,phone_loc,phone_audio,
# phone_app,phone_battery,phone_use,phone_callstat,phone_wifi,phone_lf,phone_time
# We are ignoring the use of the smartwatch features. There are definitely features that will be used
# much more (e.g. than the phone_callstat) but we'll leave that up to the ML algorithm.
'''
def summarize_features(feature_list):
    summary_feature_list=np.empty_like(feature_list)
    for (ind,feature) in enumerate(feature_list):
        if feature.startswith('raw_acc'):
            summary_feature_list[ind]='phone_acc' 
        if feature.startswith('proc_gyro'):
            summary_feature_list[ind]='phone_gyro'
        if feature.startswith('raw_magnet'):
            summary_feature_list[ind]='phone_mag'
        if feature.startswith('watch_acc'):
            summary_feature_list[ind]='watch_acc'
        if feature.startswith('watch_heading'):
            summary_feature_list[ind]='watch_dir'
        if feature.startswith('location'):
            summary_feature_list[ind]='phone_loc'
        if feature.startswith('audio_naive'):
            summary_feature_list[ind]='phone_audio'
        if feature.startswith('discrete:app_state'):
            summary_feature_list[ind]='phone_app'
        if feature.startswith('discrete:battery'):
            summary_feature_list[ind]='phone_battery'
        if feature.startswith('discrete:on'):
            summary_feature_list[ind]='phone_use'
        if feature.startswith('discrete:ringer'):
            summary_feature_list[ind]='phone_callstat'
        if feature.startswith('discrete:wifi'):
            summary_feature_list[ind]='phone_wifi'
        if feature.startswith('lf'):
            summary_feature_list[ind]='phone_lf'
        if feature.startswith('discrete:time'):
            summary_feature_list[ind]='phone_time'

    return summary_feature_list

In [None]:
# Custom dictionary class with help for duplicate keys
class Customdictionary(dict):
    def __setitem__(self,key,value):
        try:
            self[key]
        except KeyError:
            super(Customdictionary,self).__setitem__(key,[])
        self[key].append(value)

## Test data

In [None]:
# Reading sample data
sample_loc='{}/{}.csv.gz'.format(prefix,user_sample)
x_user,y_user,missedlabel_user,tstamp_user,featurename_user,labelname_user=read_user_data(sample_loc)

# Dataset summaries for this user
print('Data shape input for user (Len minutes/num examples, num sensors): ',x_user.shape) # Timestep examples, number of sensors
print('Label shape for user (Len minutes, num labels): ',y_user.shape,'\n') # Timestep examples, labels

countlabels_user=np.sum(y_user,axis=0) # Column summary
labelname_countlabel_user=zip(labelname_user,countlabels_user) # Zip together names, counts
labelname_countlabel_user=sorted(labelname_countlabel_user,key=lambda row:row[-1],reverse=True)

print('Sensor feature names:\n')
feature_names=summarize_features(featurename_user)
    
for i,sensor_feature in enumerate(featurename_user):
    print('{} :: {} ::--> {}\n'.format(i,feature_names[i],sensor_feature))

print('Activities and counts:')
print(labelname_countlabel_user)

<span style="color:red">
    ISSUE: There are some labels (e.g. Phone location:bag etc.) that some users have not filled out for any timestep and shows up as np.nan. The label sum above was a check to see if the same label wasn't filled out for other users (hence would have a count of zero) and would let the label being completely removed. The lowest count was (Elevator:200) which doesn't help.
    I cannot do blindly remove rows because a particular label wasn't filled out for any timestep for a user. For single label case, this is fine...but for a multi-label case, this will mean that other valid labels are ignored. The only option that I have so far is to naively convert all nans in the labels to zeros. This could mean a loss of accuracy (the user might have been doing the task in the label but have omitted annotating it, and so we are incorrectly training a feature vector....but there is no choice so far.
</span>

# Training

In [None]:
# Choosing sensor labels
'''
Summary sensor choices are: phone_acc,phone_gyro,phone_mag,watch_acc,watch_dir,phone_loc,phone_audio,
phone_app,phone_battery,phone_use,phone_callstat,phone_wifi,phone_lf,phone_time
In this project, we aren't using watch_acc,watch_dir (no smartwatch)
'''

def choose_sensors(X_train,used_sensors,summarized_feature_names):
    used_sensor_feature_names=np.zeros(len(summarized_feature_names),dtype=bool)
    # Creates a zero boolean vector of all possible feature names
    for s in used_sensors:
        used_sensor_feature_names=np.logical_or(used_sensor_feature_names,(s==summarized_feature_names))
    X_train=X_train[:,used_sensor_feature_names]
    return X_train

In [None]:
# Returns a standardized (0 mean, 1 variance) dataset
def standardize(X_train):
    mean=np.nanmean(X_train,axis=0).reshape((1,-1))# Ignores NaNs while finding the mean across rows
    standard_dev=np.nanstd(X_train,axis=0) # Ignores NaNs while finding the standard deviation across rows
    standard_dev_nonzero=np.where(standard_dev>0,standard_dev,1.).reshape((1,-1)) # Div zero
    
    X=(X_train-mean)/standard_dev_nonzero
    return X,mean,standard_dev_nonzero   

In [None]:
# Sensor Types, Label Possibilities variables
sensor_types=['phone_acc','phone_gyro','phone_mag','phone_loc','phone_audio',
'phone_app','phone_battery','phone_use','phone_callstat','phone_wifi','phone_lf','phone_time']
label_possibilities=['LOC_home','OR_indoors','PHONE_ON_TABLE','SITTING','WITH_FRIENDS',
 'LYING_DOWN','SLEEPING','WATCHING_TV','EATING','PHONE_IN_POCKET',
 'TALKING','DRIVE_-_I_M_A_PASSENGER','OR_standing','IN_A_CAR',
 'OR_exercise','AT_THE_GYM','SINGING','FIX_walking','OR_outside',
 'SHOPPING','AT_SCHOOL','BATHING_-_SHOWER','DRESSING','DRINKING__ALCOHOL_',
 'PHONE_IN_HAND','FIX_restaurant','IN_CLASS','PHONE_IN_BAG','IN_A_MEETING',
 'TOILET','COOKING','ELEVATOR','FIX_running','BICYCLING','LAB_WORK',
 'LOC_main_workplace','ON_A_BUS','DRIVE_-_I_M_THE_DRIVER','STROLLING',
 'CLEANING','DOING_LAUNDRY','WASHING_DISHES','SURFING_THE_INTERNET',
 'AT_A_PARTY','AT_A_BAR','LOC_beach','COMPUTER_WORK','GROOMING','STAIRS_-_GOING_UP',
 'STAIRS_-_GOING_DOWN','WITH_CO-WORKERS']

## Creating a new data structure for all valid data and pickling it

Remove rows with np.nan labels (missing labels). Zero impute missing feature entries. Standardization done at train time.

In [None]:
# Reading data in the directory (Stacked)
X_train_t=np.empty((0,168))
Y_train_t=np.empty((0,51))
X_test_t=np.empty((0,168))
Y_test_t=np.empty((0,51))
#M_train_t=np.empty((0,51))
#M_test_t=np.empty((0,51))

for u_file in glob.glob('{}/*.csv.gz'.format(prefix)):
        x_user,y_user,missed_label_user,tstamp_user,featurename_user,labelname_user=read_user_data(u_file)
        x_sh=x_user.shape
        y_sh=y_user.shape
        # Removing invalid labels, imputing missing features before splitting
        #missed_label_user=missed_label_user.astype(int) # Convert Boolean to int array
        #missed_label_user=np.sum(missed_label_user,axis=1)# Sum across columns creating a n_row*1 vector
        # If the value for a particular row ==0, no features are missing : Can use that row
        #use_labels=np.logical_not(missed_label_user)
        #x_user=x_user[use_labels,:]
        y_user=np.nan_to_num(y_user) # Blind way to replace NAN labels in y_train/y_test to 0
        # Assuming that if the user hasn't bothered with that label, it means that it wasn't too applicable.
        x_user=np.nan_to_num(x_user)
        #y_user=y_user[use_labels,:]
        
        print('X_shape before removing invalid labels:{}, after:{}'.format(x_sh,x_user.shape))
        print('Y_shape before removing invalid labels:{}, after:{}'.format(y_sh,y_user.shape))
        
        # Split each user data into train-test splits .70-.30 as in literature
        x_train_u,x_test_u,y_train_u,y_test_u=TT_split(x_user,y_user,test_size=0.30,random_state=random_state)
        #m_train,m_test=TT_split(missed_label_user,test_size=0.30,random_state=random_state)
        
        # Removing smart watch features
        x_train_u=choose_sensors(x_train_u,used_sensors=sensor_types,summarized_feature_names=feature_names)
        x_test_u=choose_sensors(x_test_u,used_sensors=sensor_types,summarized_feature_names=feature_names)
       
        # Stacking data. Will be changed for K-Fold cross-validation
        X_train_t=np.vstack((X_train_t,x_train_u))
        Y_train_t=np.vstack((Y_train_t,y_train_u))
        X_test_t=np.vstack((X_test_t,x_test_u))
        Y_test_t=np.vstack((Y_test_t,y_test_u))
        
        print('\t Per User Training examples:{}, Testing examples:{}'.
              format(y_train_u.shape[0],y_test_u.shape[0]))
assert len(X_train_t)==len(Y_train_t)
assert len(X_test_t)==len(Y_test_t)

print('\nTraining: X::{} ,Y::{}'.format(X_train_t.shape,Y_train_t.shape))
print('Testing: X::{} ,Y::{}'.format(X_test_t.shape,Y_test_t.shape))

print("Pickling data files")
with open('dataset/pickled/x_train.pkl','wb') as f:
    pickle.dump(X_train_t,f)
with open('dataset/pickled/y_train.pkl','wb') as f:
    pickle.dump(Y_train_t,f)
with open('dataset/pickled/x_test.pkl','wb') as f:
    pickle.dump(X_test_t,f)
with open('dataset/pickled/y_test.pkl','wb') as f:
    pickle.dump(Y_test_t,f)
print("Done")

# Multi Class Classifier: Train/Test Functions

 Using the saved pickle files for this

In [None]:
# Defining sizes for neural networks and other hyperparameters
input_size=X_train_t.shape[-1]
hidden_size=16
output_size=Y_train_t.shape[-1]
n_epoch=40
bs=300
lr_init=0.1
momentum=0.5
#torch.set_default_tensor_type('torch.cuda.FloatTensor')

In [None]:
# Simple function to run using GPU when available
def C(structure):
    if torch.cuda.is_available():
        device=torch.device("cuda")
        return structure.to(device)

In [None]:
print(torch.cuda.get_device_name(0))

In [None]:
# Load pickle file datasets and normalize (and normalize the test set using same values)
with open('dataset/pickled/x_train.pkl','rb') as f:
    X_train=pickle.load(f)
    X_train,mean,standard_dev_nonzero=standardize(X_train) # Standardizing X_train
    X_train=C(torch.from_numpy(X_train).double())

with open('dataset/pickled/y_train.pkl','rb') as f:
    Y_train=pickle.load(f)
    Y_train=C(torch.from_numpy(Y_train).double())

with open('dataset/pickled/x_test.pkl','rb') as f:
    X_test=pickle.load(f)
    X_test=(X_test-mean)/standard_dev_nonzero
    X_test=C(torch.from_numpy(X_test).double())
    
with open('dataset/pickled/y_test.pkl','rb') as f:
    Y_test=pickle.load(f)
    Y_test=C(torch.from_numpy(Y_test).double())

In [None]:
# Dataloader creation
train_dataset=utils.TensorDataset(X_train,Y_train)
train_loader=utils.DataLoader(dataset=train_dataset,batch_size=bs,shuffle=False,drop_last=False)

test_dataset=utils.TensorDataset(X_test,Y_test)
test_loader=utils.DataLoader(dataset=test_dataset,batch_size=bs,shuffle=False,drop_last=False)

In [None]:
# Linear decreasing LR scheduler
def linear_lr_scheduler(optimizer, epoch):
    """
    LR_init=0.1, LR_final=0.01, n_epochs=40
    Sets the learning rate to the initial LR decayed by 1.04 every epoch"""
    for param_group in optimizer.param_groups:
        lr=param_group['lr']
    lr=lr*(0.94**(epoch//1))
    for param_group in optimizer.param_groups:
        param_group['lr']=lr

In [None]:
# Adds euclidean regularization to weight matrices
def frobenius_norm(model,loss):
    regularizer_loss=0
    
    for m in model.modules():
        if isinstance(m,nn.Linear): # Linear layer
            reg=torch.sum(((torch.sum(((m.weight)**2),1))**0.5),0) # Only applying regularization to weight matrix
            regularizer_loss=regularizer_loss+0.001*reg
    return regularizer_loss

In [None]:
for m in model.modules():
    if isinstance(m,nn.Linear):
        print(m.weight)

# Multi Class Classifier: SOTA Comparison

## Multi-Layer Perceptron (0 Hidden Layers)

In [None]:
class LinearMLP(nn.Module):
    def __init__(self):
        super(LinearMLP,self).__init__()
        self.fc1=nn.Linear(input_size,output_size)
    def forward(self,x):
        x = self.fc1(x)
        return x

In [None]:

model=LinearMLP()
C(model) # Train model with CUDA

optimizer=optim.SGD(model.parameters(),lr=lr_init,momentum=momentum)
#criterion=C(nn.BCEWithLogitsLoss()) # Or MultiLabelSoftMarginLoss (same thing in this case)
criterion=C(nn.BCELoss()) # BCEWithLogitsLoss adds a sigmoid layer to the BCELoss layer.
# However, we want to binarize the outputs of the sigmoid first before getting the loss.
# Though the BCELoss isn't very stable by itself.
#criterion=C(nn.MultiLabelMarginLoss) # Needs the sigmoid output first
#criterion=C(nn.MultiLabelSoftMarginLoss())

for epoch in range(n_epoch):
    sum_total=0.
    done=1
    for i,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs=V(C(inputs)).float()
        labels=V(C(labels),requires_grad=False).type(torch.cuda.FloatTensor)
        
        optimizer.zero_grad() # Zero gradients
        if done:
            linear_lr_scheduler(optimizer,epoch) # Reduce LR once every epoch
            done=0
        
        output=model(inputs) # Log probabilities
        sigmoid_output=torch.sigmoid(output) # Squash log probabilities to between 0 -1 (linear scale)
        sigmoid_output=(sigmoid_output>=0.50).type(torch.cuda.FloatTensor)# Binarize outputs using a threshold
        sigmoid_output=V(sigmoid_output,requires_grad=True)
        
        loss=criterion(sigmoid_output,labels)
        regularized_loss=frobenius_norm(model,loss)

        regularized_loss.backward()
        optimizer.step()
        
        sum_total+=regularized_loss.item()
        #print("Batch Loss: ",loss.item())
        for param_group in optimizer.param_groups:
            epoch_lr=param_group['lr']
        if i%300==0: # Every minibatch
            print("Epoch {}::Minibatch {}::LR {} --> Loss {}".format(epoch+1,i+1,epoch_lr,sum_total/bs))
            sum_total=0.
    done=1
print('\n Finished training')

<span style="color:red">
    ISSUE: Doesn't seem to train well.
</span>

In [None]:
# Saving trained models
root='saved_models/multilabel_classifier/'
model_path=root+'mlp_0hidden'
checkpoint_path=root+'mlp_0hidden_checkpoint'

torch.save(model,model_path) # Saving the whole model

# Saving checkpoint model
torch.save({'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss':loss.item(),
            'sumloss':sum_total/bs},checkpoint_path)

In [None]:
# Test dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in test_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

# Precision, Recall, F-1, support
mlp_0H_clfreport=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Test Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport.items()):
    print(key,"\n")
    print("\t",value)

In [None]:
# Train dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in train_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

mlp_0H_clfreport_train=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Train Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport_train.items()):
    print(key,"\n")
    print("\t",value)

## Multi-Layer Perceptron (1 Hidden Layer)

In [None]:
class MLP_1H(nn.Module):
    def __init__(self):
        super(MLP_1H,self).__init__()
        self.hidden0=nn.Sequential(
            nn.Linear(input_size,hidden_size),
            nn.LeakyReLU(negative_slope=0.1)
        )
        self.out=nn.Sequential(
            nn.Linear(hidden_size,output_size)
        )
        
    def forward(self,x):
        x = self.hidden0(x)
        return self.out(x)
    
model=MLP_1H()
C(model) # Train model with CUDA

optimizer=optim.SGD(model.parameters(),lr=lr_init,momentum=momentum)
#criterion=C(nn.BCEWithLogitsLoss()) # Or MultiLabelSoftMarginLoss (same thing in this case)
#criterion=C(nn.BCELoss()) # BCEWithLogitsLoss adds a sigmoid layer to the BCELoss layer.
# However, we want to binarize the outputs of the sigmoid first before getting the loss.
# Though the BCELoss isn't very stable by itself.
#criterion=C(nn.MultiLabelMarginLoss) # Needs the sigmoid output first
criterion=C(nn.MultiLabelSoftMarginLoss())

for epoch in range(n_epoch):
    sum_total=0.
    done=1
    for i,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs=V(C(inputs)).float()
        labels=V(C(labels),requires_grad=False).type(torch.cuda.FloatTensor)
        
        optimizer.zero_grad() # Zero gradients
        if done:
            linear_lr_scheduler(optimizer,epoch) # Reduce LR once every epoch
            done=0
        
        output=model(inputs) # Log probabilities
        #sigmoid_output=torch.sigmoid(output) # Squash log probabilities to between 0 -1 (linear scale)
        #sigmoid_output=(sigmoid_output>=0.50).type(torch.cuda.FloatTensor)# Binarize outputs using a threshold
        #sigmoid_output=V(sigmoid_output,requires_grad=True)
        
        loss=criterion(output.type(torch.cuda.FloatTensor),labels)
        loss.backward()
        optimizer.step()
        
        sum_total+=loss.item()
        #print("Batch Loss: ",loss.item())
        for param_group in optimizer.param_groups:
            epoch_lr=param_group['lr']
        if i%300==0: # Every minibatch
            print("Epoch {}::Minibatch {}::LR {} --> Loss {}".format(epoch+1,i+1,epoch_lr,sum_total/bs))
            sum_total=0.
    done=1
print('\n Finished training')

In [None]:
# Saving trained models
root='saved_models/multilabel_classifier/'
model_path=root+'mlp_1hidden'
checkpoint_path=root+'mlp_1hidden_checkpoint'

torch.save(model,model_path) # Saving the whole model

# Saving checkpoint model
torch.save({'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss':loss.item(),
            'sumloss':sum_total/bs},checkpoint_path)

In [None]:
# Test dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in test_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

# Precision, Recall, F-1, support
mlp_0H_clfreport=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Test Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport.items()):
    print(key,"\n")
    print("\t",value)

In [None]:
# Train dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in train_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

mlp_0H_clfreport_train=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Train Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport_train.items()):
    print(key,"\n")
    print("\t",value)

## Multi-Layer Perceptron (2 Hidden Layers)

In [None]:
class MLP_2H(nn.Module):
    def __init__(self):
        super(MLP_2H,self).__init__()
        self.hidden0=nn.Sequential(
            nn.Linear(input_size,hidden_size),
            nn.LeakyReLU(negative_slope=0.1)
        )
        self.hidden1=nn.Sequential(
            nn.Linear(hidden_size,hidden_size),
            nn.LeakyReLU(negative_slope=0.1)
        )
        self.out=nn.Sequential(
            nn.Linear(hidden_size,output_size)
        )
        
    def forward(self,x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        return self.out(x)
    
model=MLP_2H()
C(model) # Train model with CUDA

optimizer=optim.SGD(model.parameters(),lr=lr_init,momentum=momentum)
#criterion=C(nn.BCEWithLogitsLoss()) # Or MultiLabelSoftMarginLoss (same thing in this case)
#criterion=C(nn.BCELoss()) # BCEWithLogitsLoss adds a sigmoid layer to the BCELoss layer.
# However, we want to binarize the outputs of the sigmoid first before getting the loss.
# Though the BCELoss isn't very stable by itself.
#criterion=C(nn.MultiLabelMarginLoss) # Needs the sigmoid output first
criterion=C(nn.MultiLabelSoftMarginLoss())

for epoch in range(n_epoch):
    sum_total=0.
    done=1
    for i,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs=V(C(inputs)).float()
        labels=V(C(labels),requires_grad=False).type(torch.cuda.FloatTensor)
        
        optimizer.zero_grad() # Zero gradients
        if done:
            linear_lr_scheduler(optimizer,epoch) # Reduce LR once every epoch
            done=0
        
        output=model(inputs) # Log probabilities
        #sigmoid_output=torch.sigmoid(output) # Squash log probabilities to between 0 -1 (linear scale)
        #sigmoid_output=(sigmoid_output>=0.50).type(torch.cuda.FloatTensor)# Binarize outputs using a threshold
        #sigmoid_output=V(sigmoid_output,requires_grad=True)
        
        loss=criterion(output.type(torch.cuda.FloatTensor),labels)
        loss.backward()
        optimizer.step()
        
        sum_total+=loss.item()
        #print("Batch Loss: ",loss.item())
        for param_group in optimizer.param_groups:
            epoch_lr=param_group['lr']
        if i%300==0: # Every minibatch
            print("Epoch {}::Minibatch {}::LR {} --> Loss {}".format(epoch+1,i+1,epoch_lr,sum_total/bs))
            sum_total=0.
    done=1
print('\n Finished training')

In [None]:
# Saving trained models
root='saved_models/multilabel_classifier/'
model_path=root+'mlp_2hidden'
checkpoint_path=root+'mlp_2hidden_checkpoint'

torch.save(model,model_path) # Saving the whole model

# Saving checkpoint model
torch.save({'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss':loss.item(),
            'sumloss':sum_total/bs},checkpoint_path)

In [None]:
# Test dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in test_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

# Precision, Recall, F-1, support
mlp_0H_clfreport=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Test Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport.items()):
    print(key,"\n")
    print("\t",value)

In [None]:
# Train dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in train_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

mlp_0H_clfreport_train=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Train Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport_train.items()):
    print(key,"\n")
    print("\t",value)

## Multi-Layer Perceptron (2 Hidden Layers, with Dropout)

In [None]:
class MLP_2HDrop(nn.Module):
    def __init__(self):
        super(MLP_2HDrop,self).__init__()
        self.hidden0=nn.Sequential(
            nn.Linear(input_size,hidden_size),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Dropout(0.20)
        )
        self.hidden1=nn.Sequential(
            nn.Linear(hidden_size,hidden_size),
            nn.LeakyReLU(negative_slope=0.1),
            nn.Dropout(0.20)
        )
        self.out=nn.Sequential(
            nn.Linear(hidden_size,output_size)
        )
        
    def forward(self,x):
        x = self.hidden0(x)
        x = self.hidden1(x)
        return self.out(x)
    
model=MLP_2HDrop()
C(model) # Train model with CUDA

optimizer=optim.SGD(model.parameters(),lr=lr_init,momentum=momentum)
#criterion=C(nn.BCEWithLogitsLoss()) # Or MultiLabelSoftMarginLoss (same thing in this case)
#criterion=C(nn.BCELoss()) # BCEWithLogitsLoss adds a sigmoid layer to the BCELoss layer.
# However, we want to binarize the outputs of the sigmoid first before getting the loss.
# Though the BCELoss isn't very stable by itself.
#criterion=C(nn.MultiLabelMarginLoss) # Needs the sigmoid output first
criterion=C(nn.MultiLabelSoftMarginLoss())

for epoch in range(n_epoch):
    sum_total=0.
    done=1
    for i,data in enumerate(train_loader,0):
        inputs,labels=data
        inputs=V(C(inputs)).float()
        labels=V(C(labels),requires_grad=False).type(torch.cuda.FloatTensor)
        
        optimizer.zero_grad() # Zero gradients
        if done:
            linear_lr_scheduler(optimizer,epoch) # Reduce LR once every epoch
            done=0
        
        output=model(inputs) # Log probabilities
        #sigmoid_output=torch.sigmoid(output) # Squash log probabilities to between 0 -1 (linear scale)
        #sigmoid_output=(sigmoid_output>=0.50).type(torch.cuda.FloatTensor)# Binarize outputs using a threshold
        #sigmoid_output=V(sigmoid_output,requires_grad=True)
        
        loss=criterion(output.type(torch.cuda.FloatTensor),labels)
        loss.backward()
        optimizer.step()
        
        sum_total+=loss.item()
        #print("Batch Loss: ",loss.item())
        for param_group in optimizer.param_groups:
            epoch_lr=param_group['lr']
        if i%300==0: # Every minibatch
            print("Epoch {}::Minibatch {}::LR {} --> Loss {}".format(epoch+1,i+1,epoch_lr,sum_total/bs))
            sum_total=0.
    done=1
print('\n Finished training')

In [None]:
# Saving trained models
root='saved_models/multilabel_classifier/'
model_path=root+'mlp_2hiddendrop'
checkpoint_path=root+'mlp_2hiddendrop_checkpoint'

torch.save(model,model_path) # Saving the whole model

# Saving checkpoint model
torch.save({'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss':loss.item(),
            'sumloss':sum_total/bs},checkpoint_path)

In [None]:
# Test dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in test_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

# Precision, Recall, F-1, support
mlp_0H_clfreport=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Test Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport.items()):
    print(key,"\n")
    print("\t",value)

In [None]:
# Train dataset model performance

concat_predictions=torch.zeros(0,output_size)
concat_truelabels=torch.zeros(0,output_size)

for data in train_loader:
    inputs,labels=data
    inputs=V(C(inputs)).float()
    labels=V(C(labels)).type(torch.cuda.FloatTensor)
    
    outputs=model.forward(inputs).cpu() # Perform test time on CPU instead of GPU
    
    # Concat test set into one tensor
    concat_predictions=torch.cat((concat_predictions,outputs),0)
    concat_truelabels=torch.cat((concat_truelabels,labels.cpu()),0)

concat_predictions=torch.sigmoid(concat_predictions) # Squash log probabilities to between 0 -1 (linear scale)
concat_predictions=concat_predictions>=0.50 # Binarize outputs using a threshold

# Convert tensor to numpy float array
concat_predictions=concat_predictions.numpy().astype(np.float)
concat_truelabels=concat_truelabels.numpy().astype(np.float)

mlp_0H_clfreport_train=classification_report(y_true=concat_truelabels,y_pred=concat_predictions,
                                       target_names=labelname_user,output_dict=True)
print('Train Set')
for i in range(output_size):
    true_perlabel=concat_truelabels[:,i]
    prediction_perlabel=concat_predictions[:,i]
    bal_acc=balanced_accuracy_score(y_true=true_perlabel,y_pred=prediction_perlabel)
    
    print('Label {} :::-> Balanced Accuracy {}'.format(labelname_user[i],round(bal_acc,5)))
    
for key,value in enumerate(mlp_0H_clfreport_train.items()):
    print(key,"\n")
    print("\t",value)