<a href="https://colab.research.google.com/github/mickvanhulst/long-short-term-dependencies-recommender-systems/blob/master/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Instructions

This notebook contains the code for the work performed by J.M. van Hulst for his thesis. Using this Notebook is fairly easy, one only has to alter the variable names below to the name of the **Google Drive** folder where the experiments should be stored (**NOTE**: make sure that the variable name does not end with a backslash) and the **folder** where the LastFM dataset is stored. The LastFM dataset is freely available [here](https://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-1K.html). We encourage users to run this notebook on Google Colab as the models are computationally expensive.

To obtain results for this work, one only has to run all the cells, which can be done by pressing CTRL+F9 (or click on Runtime above). After which, you may freely grab a cup of coffee and enjoy the ride.

The code for the DNC was largely based on the work performed by Csordas et al., his code can be found [here](https://github.com/xdever/dnc).

In [0]:
HOME_FOLDER = '' # Example: /content/gdrive/My Drive/projects/rec_system
LASTFM_STORAGE_FOLDER = '' # Example: '/content/gdrive/My Drive/raw_data/lastfm_1k/main.tsv'

# Packages & Static Variables

In [0]:
MAX_LENGTH_SEQUENCE = 20
MIN_THRESH = 20

from google.colab import drive
from IPython.display import clear_output
import os, sys

drive.mount('/content/gdrive/')

clear_output()

In [0]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import time
import sys
import signal
import math
import numpy as np
import pandas as pd
import csv
import copy
import functools
import os

import torch
from torch.nn import Module, Parameter
import torch.nn.functional as F
from torch.autograd import Variable
import torch.utils.data
import torch.nn.functional as F
import torch.nn.init as init

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
RANDOM_SEEDS = list(range(RANDOM_SEED, RANDOM_SEED + 10))

In [0]:
if not os.path.isdir(HOME_FOLDER):
    os.mkdir(HOME_FOLDER)
    
for exp in ['/exp1/', '/exp2/', '/exp3/', '/exp4/']:
    if not os.path.isdir(HOME_FOLDER + exp):
        for f in ['', 'models']:
            os.mkdir(HOME_FOLDER + exp + f)


# Methods

## DNC

### Remainder

In [0]:
# Copyright 2017 Robert Csordas. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================


def oneplus(t):
    return F.softplus(t, 1, 20) + 1.0

def get_next_tensor_part(src, dims, prev_pos=0):
    if not isinstance(dims, list):
        dims=[dims]
    n = functools.reduce(lambda x, y: x * y, dims)
    data = src.narrow(-1, prev_pos, n)
    return data.contiguous().view(list(data.size())[:-1] + dims) if len(dims)>1 else data, prev_pos + n

def split_tensor(src, shapes):
    pos = 0
    res = []
    for s in shapes:
        d, pos = get_next_tensor_part(src, s, pos)
        res.append(d)
    return res

def dict_get(dict,name):
    return dict.get(name) if dict is not None else None


def dict_append(dict, name, val):
    if dict is not None:
        l = dict.get(name)
        if not l:
            l = []
            dict[name] = l
        l.append(val)


def init_debug(debug, initial):
    if debug is not None and not debug:
        debug.update(initial)

def merge_debug_tensors(d, dim):
    if d is not None:
        for k, v in d.items():
            if isinstance(v, dict):
                merge_debug_tensors(v, dim)
            elif isinstance(v, list):
                d[k] = torch.stack(v, dim)


def linear_reset(module, gain=1.0):
    assert isinstance(module, torch.nn.Linear)
    init.xavier_uniform_(module.weight, gain=gain)
    s = module.weight.size(1)
    if module.bias is not None:
        module.bias.data.zero_()

_EPS = 1e-6

class AllocationManager(torch.nn.Module):
    def __init__(self):
        super(AllocationManager, self).__init__()
        self.usages = None
        self.zero_usages = None
        self.debug_sequ_init = False
        self.one = None

    def _init_sequence(self, prev_read_distributions):
        # prev_read_distributions size is [batch, n_heads, cell count]
        s = prev_read_distributions.size()
        if self.zero_usages is None or list(self.zero_usages.size())!=[s[0],s[-1]]:
            self.zero_usages = torch.zeros(s[0], s[-1], device = prev_read_distributions.device)
            if self.debug_sequ_init:
                self.zero_usages += torch.arange(0, s[-1]).unsqueeze(0) * 1e-10

        self.usages = self.zero_usages

    def _init_consts(self, device):
        if self.one is None:
            self.one = torch.ones(1, device=device)

    def new_sequence(self):
        self.usages = None

    def update_usages(self, prev_write_distribution, prev_read_distributions, free_gates):
        # Read distributions shape: [batch, n_heads, cell count]
        # Free gates shape: [batch, n_heads]

        self._init_consts(prev_read_distributions.device)
        phi = torch.addcmul(self.one, -1, free_gates.unsqueeze(-1), prev_read_distributions).prod(-2)
        # Phi is the free tensor, sized [batch, cell count]

        # If memory usage counter if doesn't exists
        if self.usages is None:
            self._init_sequence(prev_read_distributions)
            # in first timestep nothing is written or read yet, so we don't need any further processing
        else:
            self.usages = torch.addcmul(self.usages, 1, prev_write_distribution.detach(), (1 - self.usages)) * phi

        return phi

    def forward(self, prev_write_distribution, prev_read_distributions, free_gates):
        phi = self.update_usages(prev_write_distribution, prev_read_distributions, free_gates)
        sorted_usage, free_list = (self.usages*(1.0-_EPS)+_EPS).sort(-1)

        u_prod = sorted_usage.cumprod(-1)
        one_minus_usage = 1.0 - sorted_usage
        sorted_scores = torch.cat([one_minus_usage[..., 0:1], one_minus_usage[..., 1:] * u_prod[..., :-1]], dim=-1)

        return sorted_scores.clone().scatter_(-1, free_list, sorted_scores), phi


class ContentAddressGenerator(torch.nn.Module):
    def __init__(self, disable_content_norm=False, mask_min=0.0, disable_key_masking=False):
        super(ContentAddressGenerator, self).__init__()
        self.disable_content_norm = disable_content_norm
        self.mask_min = mask_min
        self.disable_key_masking = disable_key_masking

    def forward(self, memory, keys, betas, mask=None):
        # Memory shape [batch, cell count, word length]
        # Key shape [batch, n heads*, word length]
        # Betas shape [batch, n heads]
        if mask is not None and self.mask_min != 0:
            mask = mask * (1.0-self.mask_min) + self.mask_min

        single_head = keys.dim() == 2
        if single_head:
            # Single head
            keys = keys.unsqueeze(1)
            if mask is not None:
                mask = mask.unsqueeze(1)

        memory = memory.unsqueeze(1)
        keys = keys.unsqueeze(-2)

        if mask is not None:
            mask = mask.unsqueeze(-2)
            memory = memory * mask
            if not self.disable_key_masking:
                keys = keys * mask

        # Shape [batch, n heads, cell count]
        norm = keys.norm(dim=-1)
        if not self.disable_content_norm:
            norm = norm * memory.norm(dim=-1)

        scores = (memory * keys).sum(-1) / (norm + _EPS)
        scores *= betas.unsqueeze(-1)

        res = F.softmax(scores, scores.dim()-1)
        return res.squeeze(1) if single_head else res


class WriteHead(torch.nn.Module):
    @staticmethod
    def create_write_archive(write_dist, erase_vector, write_vector, phi):
        return dict(write_dist=write_dist, erase_vector=erase_vector, write_vector=write_vector, phi=phi)

    def __init__(self, dealloc_content=True, disable_content_norm=False, mask_min=0.0, disable_key_masking=False):
        super(WriteHead, self).__init__()
        self.write_content_generator = ContentAddressGenerator(disable_content_norm, mask_min=mask_min, disable_key_masking=disable_key_masking)
        self.allocation_manager = AllocationManager()
        self.last_write = None
        self.dealloc_content = dealloc_content
        self.new_sequence()

    def new_sequence(self):
        self.last_write = None
        self.allocation_manager.new_sequence()

    @staticmethod
    def mem_update(memory, write_dist, erase_vector, write_vector, phi):
        # In original paper the memory content is NOT deallocated, which makes content based addressing basically
        # unusable when multiple similar steps should be done. The reason for this is that the memory contents are
        # still there, so the lookup will find them, unless an allocation clears it before the next search, which is
        # completely random. So I'm arguing that erase matrix should also take in account the free gates (multiply it
        # with phi)
        write_dist = write_dist.unsqueeze(-1)

        erase_matrix = 1.0 - write_dist * erase_vector.unsqueeze(-2)
        if phi is not None:
            erase_matrix = erase_matrix * phi.unsqueeze(-1)

        update_matrix = write_dist * write_vector.unsqueeze(-2)
        return memory * erase_matrix + update_matrix

    def forward(self, memory, write_content_key, write_beta, erase_vector, write_vector, alloc_gate, write_gate,
                free_gates, prev_read_dist, write_mask=None, debug=None):
        last_w_dist = self.last_write["write_dist"] if self.last_write is not None else None

        content_dist = self.write_content_generator(memory, write_content_key, write_beta, mask = write_mask)
        alloc_dist, phi = self.allocation_manager(last_w_dist, prev_read_dist, free_gates)

        # Shape [batch, cell count]
        write_dist = write_gate * (alloc_gate * alloc_dist + (1-alloc_gate)*content_dist)
        self.last_write = WriteHead.create_write_archive(write_dist, erase_vector, write_vector, phi if self.dealloc_content else None)

        dict_append(debug, "alloc_dist", alloc_dist)
        dict_append(debug, "write_dist", write_dist)
        dict_append(debug, "mem_usages", self.allocation_manager.usages)
        dict_append(debug, "free_gates", free_gates)
        dict_append(debug, "write_betas", write_beta)
        dict_append(debug, "write_gate", write_gate)
        dict_append(debug, "write_vector", write_vector)
        dict_append(debug, "alloc_gate", alloc_gate)
        dict_append(debug, "erase_vector", erase_vector)
        if write_mask is not None:
            dict_append(debug, "write_mask", write_mask)

        return WriteHead.mem_update(memory, **self.last_write)

class RawWriteHead(torch.nn.Module):
    def __init__(self, n_read_heads, word_length, use_mask=False, dealloc_content=True, disable_content_norm=False,
                 mask_min=0.0, disable_key_masking=False):
        super(RawWriteHead, self).__init__()
        self.write_head = WriteHead(dealloc_content = dealloc_content, disable_content_norm = disable_content_norm,
                                    mask_min=mask_min, disable_key_masking=disable_key_masking)
        self.word_length = word_length
        self.n_read_heads = n_read_heads
        self.use_mask = use_mask
        self.input_size = 3*self.word_length + self.n_read_heads + 3 + (self.word_length if use_mask else 0)

    def new_sequence(self):
        self.write_head.new_sequence()

    def get_prev_write(self):
        return self.write_head.last_write

    def forward(self, memory, nn_output, prev_read_dist, debug):
        shapes = [[self.word_length]] * (4 if self.use_mask else 3) + [[self.n_read_heads]] + [[1]] * 3
        tensors = split_tensor(nn_output, shapes)

        if self.use_mask:
            write_mask = torch.sigmoid(tensors[0])
            tensors=tensors[1:]
        else:
            write_mask = None

        write_content_key, erase_vector, write_vector, free_gates, write_beta, alloc_gate, write_gate = tensors

        erase_vector = torch.sigmoid(erase_vector)
        free_gates = torch.sigmoid(free_gates)
        write_beta = oneplus(write_beta)
        alloc_gate = torch.sigmoid(alloc_gate)
        write_gate = torch.sigmoid(write_gate)

        return self.write_head(memory, write_content_key, write_beta, erase_vector, write_vector,
                               alloc_gate, write_gate, free_gates, prev_read_dist, debug=debug, write_mask=write_mask)

    def get_neural_input_size(self):
        return self.input_size


class ReadHead(torch.nn.Module):
    def __init__(self, disable_content_norm=False, mask_min=0.0, disable_key_masking=False):
        super(ReadHead, self).__init__()
        self.content_addr_generator = ContentAddressGenerator(disable_content_norm=disable_content_norm,
                                                              mask_min=mask_min,
                                                              disable_key_masking=disable_key_masking)
        self.read_dist = None
        self.read_data = None
        self.new_sequence()

    def new_sequence(self):
        self.read_dist = None
        self.read_data = None

    def forward(self, memory, read_content_keys, read_betas, forward_dist, backward_dist, gates, read_mask=None, debug=None):
        content_dist = self.content_addr_generator(memory, read_content_keys, read_betas, mask=read_mask)

        self.read_dist = backward_dist * gates[..., 0:1] + content_dist * gates[...,1:2] + forward_dist * gates[..., 2:]

        # memory shape: [ batch, cell count, word_length ]
        # read_dist shape: [ batch, n heads, cell count ]
        # result shape: [ batch, n_heads, word_length ]
        self.read_data = (memory.unsqueeze(1) * self.read_dist.unsqueeze(-1)).sum(-2)

        dict_append(debug, "content_dist", content_dist)
        dict_append(debug, "balance", gates)
        dict_append(debug, "read_dist", self.read_dist)
        dict_append(debug, "read_content_keys", read_content_keys)
        if read_mask is not None:
            dict_append(debug, "read_mask", read_mask)
        dict_append(debug, "read_betas", read_betas.unsqueeze(-2))
        if read_mask is not None:
            dict_append(debug, "read_mask", read_mask)

        return self.read_data


class RawReadHead(torch.nn.Module):
    def __init__(self, n_heads, word_length, use_mask=False, disable_content_norm=False, mask_min=0.0,
                 disable_key_masking=False):
        super(RawReadHead, self).__init__()
        self.read_head = ReadHead(disable_content_norm=disable_content_norm, mask_min=mask_min,
                                  disable_key_masking=disable_key_masking)
        self.n_heads = n_heads
        self.word_length = word_length
        self.use_mask = use_mask
        self.input_size = self.n_heads * (self.word_length*(2 if use_mask else 1) + 3 + 1)

    def get_prev_dist(self, memory):
        if self.read_head.read_dist is not None:
            return self.read_head.read_dist
        else:
            m_shape = memory.size()
            return torch.zeros(m_shape[0], self.n_heads, m_shape[1]).to(memory)

    def get_prev_data(self, memory):
        if self.read_head.read_data is not None:
            return self.read_head.read_data
        else:
            m_shape = memory.size()
            return torch.zeros(m_shape[0], self.n_heads, m_shape[-1]).to(memory)

    def new_sequence(self):
        self.read_head.new_sequence()

    def forward(self, memory, nn_output, forward_dist, backward_dist, debug):
        shapes = [[self.n_heads, self.word_length]] * (2 if self.use_mask else 1) + [[self.n_heads], [self.n_heads, 3]]
        tensors = split_tensor(nn_output, shapes)

        if self.use_mask:
            read_mask = torch.sigmoid(tensors[0])
            tensors = tensors[1:]
        else:
            read_mask = None

        keys, betas, gates = tensors

        betas = oneplus(betas)
        gates = F.softmax(gates, gates.dim()-1)

        return self.read_head(memory, keys, betas, forward_dist, backward_dist, gates, debug=debug, read_mask=read_mask)

    def get_neural_input_size(self):
        return self.input_size

class DistSharpnessEnhancer(torch.nn.Module):
    def __init__(self, n_heads):
        super(DistSharpnessEnhancer, self).__init__()
        self.n_heads = n_heads if isinstance(n_heads, list) else [n_heads]
        self.n_data = sum(self.n_heads)

    def forward(self, nn_input, *dists):
        assert len(dists) == len(self.n_heads)
        nn_input = oneplus(nn_input[..., :self.n_data])
        factors = split_tensor(nn_input, self.n_heads)

        res = []
        for i, d in enumerate(dists):
            s = list(d.size())
            ndim = d.dim()
            f  = factors[i]
            if ndim==2:
                assert self.n_heads[i]==1
            elif ndim==3:
                f = f.unsqueeze(-1)
            else:
                assert False

            d += _EPS
            d = d / d.max(dim=-1, keepdim=True)[0]
            d = d.pow(f)
            d = d / d.sum(dim=-1, keepdim=True)
            res.append(d)
        return res

    def get_neural_input_size(self):
        return self.n_data

### Temporal Linkage Matrix

In [0]:
class TemporalMemoryLinkage(torch.nn.Module):
    def __init__(self):
        super(TemporalMemoryLinkage, self).__init__()
        self.temp_link_mat = None
        self.precedence_weighting = None
        self.diag_mask = None

        self.initial_temp_link_mat = None
        self.initial_precedence_weighting = None
        self.initial_diag_mask = None
        self.initial_shape = None

    def new_sequence(self):
        self.temp_link_mat = None
        self.precedence_weighting = None
        self.diag_mask = None

    def _init_link(self, w_dist):
        s = list(w_dist.size())
        if self.initial_shape is None or s != self.initial_shape:
            self.initial_temp_link_mat = torch.zeros(s[0], s[-1], s[-1]).to(w_dist.device)
            self.initial_precedence_weighting = torch.zeros(s[0], s[-1]).to(w_dist.device)
            self.initial_diag_mask = (1.0 - torch.eye(s[-1]).unsqueeze(0).to(w_dist)).detach()

        self.temp_link_mat = self.initial_temp_link_mat
        self.precedence_weighting = self.initial_precedence_weighting
        self.diag_mask = self.initial_diag_mask

    def _update_precedence(self, w_dist):
        # w_dist shape: [ batch, cell count ]
        self.precedence_weighting = (1.0 - w_dist.sum(-1, keepdim=True)) * self.precedence_weighting + w_dist

    def _update_links(self, w_dist):
        if self.temp_link_mat is None:
            self._init_link(w_dist)

        wt_i = w_dist.unsqueeze(-1)
        wt_j = w_dist.unsqueeze(-2)
        pt_j = self.precedence_weighting.unsqueeze(-2)

        self.temp_link_mat = ((1 - wt_i - wt_j) * self.temp_link_mat + wt_i * pt_j) * self.diag_mask

    def forward(self, w_dist, prev_r_dists, debug = None):
        self._update_links(w_dist)
        self._update_precedence(w_dist)

        # Emulate matrix-vector multiplication by broadcast and sum. This way we don't need to transpose the matrix
        tlm_multi_head = self.temp_link_mat.unsqueeze(1)

        forward_dist = (tlm_multi_head * prev_r_dists.unsqueeze(-2)).sum(-1)
        backward_dist = (tlm_multi_head * prev_r_dists.unsqueeze(-1)).sum(-2)
        
        dict_append(debug, "forward_dists", forward_dist)
        dict_append(debug, "backward_dists", backward_dist)
        dict_append(debug, "precedence_weights", self.precedence_weighting)

        # output shapes [ batch, n_heads, cell_count ]
        return forward_dist, backward_dist

### DNC

In [0]:
class DNC(torch.nn.Module):
    def __init__(self, input_size, output_size, word_length, cell_count, n_read_heads, controller, batch_first=False, clip_controller=20,
                 bias=True, mask=False, dealloc_content=True, link_sharpness_control=True, disable_content_norm=False,
                 mask_min=0.0, disable_key_masking=False, output_classes=43, use_temp_link_gradient=False, seq_len=20,
                return_sequences=False):
        super(DNC, self).__init__()

        self.clip_controller = clip_controller

        self.read_head = RawReadHead(n_read_heads, word_length, use_mask=mask, disable_content_norm=disable_content_norm,
                                     mask_min=mask_min, disable_key_masking=disable_key_masking)
        self.write_head = RawWriteHead(n_read_heads, word_length, use_mask=mask, dealloc_content=dealloc_content,
                                       disable_content_norm=disable_content_norm, mask_min=mask_min,
                                       disable_key_masking=disable_key_masking)
        self.temporal_link = TemporalMemoryLinkage()
        #self.temp_test = TemporalMemoryLinkageSparse()
        #NOTE: added myself
        self.use_temp_link_gradient = use_temp_link_gradient
        self.sharpness_control = DistSharpnessEnhancer([n_read_heads, n_read_heads]) if link_sharpness_control else None

        in_size = input_size + n_read_heads * word_length
        control_channels = self.read_head.get_neural_input_size() + self.write_head.get_neural_input_size() +\
                           (self.sharpness_control.get_neural_input_size() if self.sharpness_control is not None else 0)

        self.controller = controller
        controller.init(in_size)
        
        #NOTE: Original paper has shared biases, this does not.
        self.controller_to_controls = torch.nn.Linear(controller.get_output_size(), control_channels, bias=bias)
        self.controller_to_out = torch.nn.Linear(controller.get_output_size(), output_size, bias=bias)
        self.read_to_out = torch.nn.Linear(word_length * n_read_heads, output_size, bias=bias)

        #NOTE: Added last FCN, layer normalization and dropout (last two according to ADNC).
        self.return_sequences = return_sequences
        if self.return_sequences:
            self.last_layer = torch.nn.Linear(output_size * seq_len, output_classes, bias=bias)
        else:
            self.last_layer = torch.nn.Linear(output_size, output_classes, bias=bias)
        #self.layer_norm = torch.nn.LayerNorm(control_channels)
#         self.drop_out = torch.nn.Dropout(p=0.2)
        
        self.cell_count = cell_count
        self.word_length = word_length

        self.memory = None
        self.reset_parameters()

        self.batch_first = batch_first
        self.zero_mem_tensor = None

    def reset_parameters(self):
        linear_reset(self.controller_to_controls)
        linear_reset(self.controller_to_out)
        linear_reset(self.read_to_out)
        linear_reset(self.last_layer)
        self.controller.reset_parameters()

    def _step(self, in_data, user_data=None, debug=None):
        init_debug(debug, {
            "read_head": {},
            "write_head": {},
            "temporal_links": {}
        })

        # input shape: [ batch, channels ]
        batch_size = in_data.size(0)

        # run the controller
        prev_read_data = self.read_head.get_prev_data(self.memory).view([batch_size, -1])
        if isinstance(user_data, torch.Tensor):
            control_data = self.controller(torch.cat([in_data, prev_read_data], -1), torch.cat([user_data, prev_read_data], -1))
        else:
            control_data = self.controller(torch.cat([in_data, prev_read_data], -1))

        
        # memory ops NOTE: this layer also takes care of the fact that the controller output is now the same shape as the word length.
        controls = self.controller_to_controls(control_data).contiguous()
        controls = controls.clamp(-self.clip_controller, self.clip_controller) if self.clip_controller is not None else controls
        
        shapes = [[self.write_head.get_neural_input_size()], [self.read_head.get_neural_input_size()]]
        
        if self.sharpness_control is not None:
            shapes.append(self.sharpness_control.get_neural_input_size())

        tensors = split_tensor(controls, shapes)

        write_head_control, read_head_control = tensors[:2]
        tensors = tensors[2:]

        prev_read_dist = self.read_head.get_prev_dist(self.memory)

        self.memory = self.write_head(self.memory, write_head_control, prev_read_dist, debug=dict_get(debug,"write_head"))

        prev_write = self.write_head.get_prev_write()
        forward_dist, backward_dist = self.temporal_link(prev_write["write_dist"] if prev_write is not None else None, prev_read_dist, debug=dict_get(debug, "temporal_links"))

        if self.sharpness_control is not None:
            forward_dist, backward_dist = self.sharpness_control(tensors[0], forward_dist, backward_dist)

        read_data = self.read_head(self.memory, read_head_control, forward_dist, backward_dist, debug=dict_get(debug,"read_head"))

        return self.controller_to_out(control_data) + self.read_to_out(read_data.view(batch_size,-1))

    def _mem_init(self, batch_size, device):
        if self.zero_mem_tensor is None or self.zero_mem_tensor.size(0)!=batch_size:
            self.zero_mem_tensor = torch.zeros(batch_size, self.cell_count, self.word_length).to(device)

        self.memory = self.zero_mem_tensor

    def forward(self, in_data, user_data=None, debug=None):
        self.write_head.new_sequence()
        self.read_head.new_sequence()
        self.temporal_link.new_sequence()
        self.controller.new_sequence()

        self._mem_init(in_data.size(0 if self.batch_first else 1), in_data.device)
        
        for t in range(in_data.size(1)):
            if isinstance(user_data, torch.Tensor):
                out_tsteps =  self._step(in_data[:,t], user_data[:,t])
            else:
                out_tsteps =  self._step(in_data[:,t]) 
        out = self.last_layer(out_tsteps)

        return out


## LSTM/GRU

In [0]:
class GRU_LSTM_Model(torch.nn.Module):
    def init_layer(self):
        torch.nn.init.uniform_(self.fc.weight.data, -0.1, 0.1)
        torch.nn.init.uniform_(self.fc.bias.data, -0.1, 0.1)
    
    def __init__(self, input_dim, output_dim, controller, clip_controller):
        super(GRU_LSTM_Model, self).__init__()
         
        # Number of hidden layers
        self.controller = controller
        controller.init(input_dim)
        self.clip_controller = clip_controller
        
        self.fc = torch.nn.Linear(controller.get_output_size(), output_dim)
        self.init_layer()
    
    def forward(self, item_data, user_data=None):
        self.controller.new_sequence()
        
        for t in range(item_data.size(1)):
            if isinstance(user_data, torch.Tensor):
                out_tsteps = self.controller(item_data[:,t], user_data[:,t])
            else:
                out_tsteps = self.controller(item_data[:,t])
            out_tsteps.clamp(-self.clip_controller, self.clip_controller)
        
        out = self.fc(out_tsteps) 
        return out

## Controllers

In [0]:
class GRUController(torch.nn.Module):
    '''
    Based on formula https://pytorch.org/docs/stable/nn.html
    '''
    
    def __init__(self, layer_sizes, out_from_all_layers=False, grad_clip=5.0):
        super(GRUController, self).__init__()
        self.out_from_all_layers = out_from_all_layers
        self.grad_clip = grad_clip
        self.hidden_size = layer_sizes
        self.outputs = None
    
    def new_sequence(self):
        self.outputs = None
    
    def reset_parameters(self):
        def init_layer(l):
            torch.nn.init.uniform_(l.weight.data, -0.1, 0.1)

        init_layer(self.layer_item)
        init_layer(self.layer_hidden)
    
    def _add_modules(self, name, m):
        self.add_module("%s_%d" % (name, 0), m)
    
    def init(self, input_size):
        self.input_size = input_size

        self.layer_item = torch.nn.Linear(self.input_size, 3 * self.hidden_size, bias=True)
        self.layer_hidden = torch.nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=True)
        
        self.drop_item = torch.nn.Dropout(0.2)
        
        self._add_modules("layer_in", self.layer_item)
        self._add_modules("layer_hidden", self.layer_hidden)

        self.reset_parameters()

    def get_output_size(self):
        return self.hidden_size

    def forward(self, input_item):        
        if self.outputs is not None:
            h = self.outputs
        else:
            h = torch.cuda.FloatTensor(input_item.size(0), self.hidden_size).fill_(0)
                
        gi = self.drop_item(self.layer_item(input_item))        
        gh = self.layer_hidden(h)            
        
        i_r, i_i, i_n = gi.chunk(3, 1)
        h_r, h_i, h_n = gh.chunk(3, 1)
        
        r = torch.sigmoid(i_r + h_r)
        z = torch.sigmoid(i_i + h_i)

        n = torch.tanh(i_n + r * h_n)
        self.outputs = (1 - z) * n + z * h

        return self.outputs

In [0]:
def clip_grad(v, min, max):
    v_tmp = v.expand_as(v)
    v_tmp.register_hook(lambda g: g.clamp(min, max))
    return v_tmp

class UserBasedGRUController(torch.nn.Module):
    '''
    Based on formula https://pytorch.org/docs/stable/nn.html and work of Donkers et al.,
    https://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/p152-donkers.pdf
    '''
    def __init__(self, hidden_size, out_from_all_layers=False):
        super(UserBasedGRUController, self).__init__()
        self.out_from_all_layers = out_from_all_layers
        self.hidden_size = hidden_size
        self.outputs = None
    
    def new_sequence(self):
        self.outputs = None
    
    def reset_parameters(self):
        def init_layer(l):
            torch.nn.init.uniform_(l.weight.data, -0.1, 0.1)
            torch.nn.init.uniform_(l.bias.data, -0.1, 0.1)
            
        init_layer(self.layer_attention_item)               
        init_layer(self.layer_item)
        init_layer(self.layer_user)
        init_layer(self.layer_hidden)
    
    def _add_modules(self, name, m):
        self.add_module("%s_%d" % (name, 0), m)
    
    def init(self, input_size):
        self.input_size = input_size
        
        self.layer_attention_item = torch.nn.Linear(self.input_size*2 + self.hidden_size, self.input_size, bias=True)                
        self.layer_item = torch.nn.Linear(self.input_size, 3 * self.hidden_size, bias=True)
        self.layer_user = torch.nn.Linear(self.input_size, self.hidden_size, bias=True)
        self.layer_hidden = torch.nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=True) 
        
        self.drop_item = torch.nn.Dropout(0.2)
        self.drop_user = torch.nn.Dropout(0.5)  
        
        self._add_modules("layer_attention_item", self.layer_attention_item)        
        self._add_modules("layer_item", self.layer_item)
        self._add_modules("layer_user", self.layer_user)
        self._add_modules("layer_hidden", self.layer_hidden)

        self.reset_parameters()

    def get_output_size(self):
        return self.hidden_size

    def forward(self, input_item, input_user):
        if self.outputs is not None:
            h = self.outputs
        else:
            h = torch.cuda.FloatTensor(input_item.size(0), self.hidden_size).fill_(0)
            
        i_a = self.layer_attention_item(torch.cat((h, input_item, input_user), 1))
        
        a = torch.sigmoid(i_a)
        gh = self.layer_hidden(h)
        gi = self.drop_item(self.layer_item((1 - a)*input_item))
        u_n = self.drop_user(self.layer_user(a*input_user))
        
        i_r, i_i, i_n = gi.chunk(3, 1)
        h_r, h_i, h_n = gh.chunk(3, 1)
        
        r = torch.sigmoid(i_r + h_r)
        u = torch.sigmoid(i_i + h_i)

        k = torch.tanh((r*h_n) + i_n + u_n)
        self.outputs = (1 - u)*h + u*k
        return self.outputs

## Early stopping

In [0]:
class EarlyStopping:
    """
    Early stops the training if validation loss doesn't improve after a given patience.
    CREDITS GO TO: https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
    """
    def __init__(self, patience=7, verbose=True, save_model=True):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
                            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        
    def __call__(self, val_loss, model):

        score = -val_loss
        
        if self.best_score is None:
            self.best_score = score
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            #self.best_model = copy.deepcopy(model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).')
        self.val_loss_min = val_loss

# Experiments

## Load raw dataset

In [0]:
def load_raw_dataset(subset_perc=0.1, head=False):
    df = pd.read_csv(LASTFM_STORAGE_FOLDER, sep='\t', error_bad_lines=False, quoting=csv.QUOTE_NONE, header=None)
    # Preprocessing (timestamp is converted to UNIX as this makes ordering easier.)
    df.columns = ['userId', 'timestamp', 'artist_mbid', 'artist_name', 'song_mbid', 'song_title']
    df['userId'] = df.groupby(['userId']).ngroup()
    df['artistId'] = df.groupby(['artist_name']).ngroup()
    df['songId'] = df.groupby(['song_title']).ngroup()
    df['timestamp'] = pd.to_datetime(df['timestamp']).astype(np.int64) // 10**9
    df = df.drop(columns=['artist_mbid', 'artist_name', 'song_title', 'song_mbid'])

    df = df.sort_values(by=['timestamp'])
    subset_len = int(len(df)*subset_perc)
    
    if head:
        df = df.head(subset_len)
    else:
        df = df.tail(subset_len)
    return df

## Load dataloaders

In [0]:
def load_data(location_files, merge_train_val=False, exp_var_seq=False):
    sequences = np.load(location_files, allow_pickle=True)
    sequences_train, sequences_val, sequences_test = sequences[0], sequences[1], sequences[2]
    
    if merge_train_val:
        merge = np.vstack((sequences_train, sequences_val))
    else:
        merge = sequences_train
    
    unique, counts = np.unique(merge, return_counts=True)
    class_weights = dict(zip(unique, counts))
    
    # Sorts low to high.
    sorted_list = sorted(class_weights, key=class_weights.__getitem__)
    pos_not_popular = int((1/3)*len(sorted_list))
    pos_popular = int((2/3)*len(sorted_list))
    
    # Items for calculation.
    items_not_popular = sorted_list[:pos_not_popular]
    items_semi_popular = sorted_list[pos_not_popular:pos_popular]
    items_popular = sorted_list[pos_popular:]
                               
    print('N of items not popular: {}, N items semi-popular: {}, N items popular: {}'.format(len(items_not_popular), len(items_semi_popular), len(items_popular)))
    
    if (len(set(items_not_popular) & set(items_semi_popular)) > 0) | (len(set(items_semi_popular) & set(items_popular)) > 0) | (len(set(items_not_popular) & set(items_popular)) > 0):
        print('THERE IS OVERLAP BETWEEN THE SETS!!')
                               
    train_X = sequences_train[:, :-1]
    val_X = sequences_val[:, :-1]
    test_X = sequences_test[:, :-1]

    train_y = sequences_train[:, -1]
    val_y = sequences_val[:, -1]
    test_y = sequences_test[:, -1]
    
    size_voca = sequences_train[:,:,1].max() + 1
    size_users = sequences_train[:,:,0].max() + 1
    
    sequences_train, sequences_val, sequences_test = None, None, None
    
    tensor_1 = torch.from_numpy(train_X).long()
    tensor_2 = torch.from_numpy(train_y).long()        

    dataset = torch.utils.data.TensorDataset(tensor_1, tensor_2)

    tensor_1 = torch.from_numpy(test_X).long()
    tensor_2 = torch.from_numpy(test_y).long()

    test = torch.utils.data.TensorDataset(tensor_1, tensor_2)

    tensor_1 = torch.from_numpy(val_X).long()
    tensor_2 = torch.from_numpy(val_y).long()

    val = torch.utils.data.TensorDataset(tensor_1, tensor_2)

    # Embedding size and the out size equals the amount of items in the vocabulary
    in_size = embedding_size

    if merge_train_val:
        dataset = torch.utils.data.ConcatDataset((dataset, val))
        train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True, drop_last=True, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=4, pin_memory=True, drop_last=True, shuffle=False)
        return train_loader, test_loader, size_voca, size_users, items_not_popular, items_semi_popular, items_popular
    else:  
        train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True, drop_last=True, shuffle=True)
        test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, num_workers=4, pin_memory=True, drop_last=True, shuffle=False)
        val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, num_workers=4, pin_memory=True, drop_last=True, shuffle=False)
        return train_loader, val_loader, test_loader, size_voca, size_users

## Train model

In [0]:
#!/usr/bin/env python3
#
# Copyright 2017 Robert Csordas. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================

#!export CUDA_VISIBLE_DEVICES=1
def train(n_epochs, size_voca, train_loader, test_loader, val_loader=None, size_users=None, items_not_popular=None, items_semi_popular=None, items_popular=None):
    def run_model(model_input):
        if size_users:
            user_input = embedding_user(model_input[:,:,0])
            item_input = embedding_item(model_input[:,:,1])
            
            return model(item_input, user_input)
        else:
            item_input = embedding_item(model_input[:,:,1])
            return model(item_input)
    
    def multiply_grads(params, mul):
        if mul==1:
            return

        for pa in params:
            for p in pa["params"]:
                p.grad.data *= mul
    
    def evaluate_set(data_loader, loss_only=True):
        model.eval()
        with torch.no_grad():
            losses = []
            scores = {'recall20': [],
                     'recall1': [],
                     'mrr20': [],
                     'recall20_p': [],
                     'recall20_sp': [],
                     'recall20_np': [],
                     'recall1_p': [],
                     'recall1_sp': [],
                     'recall1_np': []}
            for idx, (batch, labels) in enumerate(data_loader):
                labels = labels.to(device)
                batch = batch.to(device)
                outputs = run_model(batch)
                l = loss_func(outputs, labels[:,1])
                losses.append(l.item())

                if not loss_only:
                    labels = labels[:,1].cpu().detach().numpy()
                    _, output_indices = torch.topk(outputs, recall_k)
                    output_indices = output_indices.cpu().detach().numpy()
                    for i, top_k in enumerate(output_indices):
                        if labels[i] == top_k[0]:
                            scores['recall1'].append(1)
                        else:
                            scores['recall1'].append(0)
                        
                        if labels[i] in top_k:
                            scores['recall20'].append(1)
                            m = np.argwhere(np.array(top_k) == labels[i])[0][0] + 1
                            scores['mrr20'].append(1/m)
                        else:
                            scores['mrr20'].append(0)
                            scores['recall20'].append(0)
                        
                        if labels[i] in items_not_popular:
                            if labels[i] == top_k[0]:
                                scores['recall1_np'].append(1)
                            else:
                                scores['recall1_np'].append(0)
                            
                            if labels[i] in top_k:
                                scores['recall20_np'].append(1)
                            else:
                                scores['recall20_np'].append(0)
                        elif labels[i] in items_popular:
                            if labels[i] == top_k[0]:
                                scores['recall1_p'].append(1)
                            else:
                                scores['recall1_p'].append(0)
                            
                            if labels[i] in top_k:
                                scores['recall20_p'].append(1)
                            else:
                                scores['recall20_p'].append(0)
                        else:
                            if labels[i] == top_k[0]:
                                scores['recall1_sp'].append(1)
                            else:
                                scores['recall1_sp'].append(0)
                            
                            if labels[i] in top_k:
                                scores['recall20_sp'].append(1)
                            else:
                                scores['recall20_sp'].append(0)         
                        
        model.train()
        if not loss_only:
            # Return Recall@1, Recall@20, MRR@20 and the loss.
            return [np.mean(x) for x in scores.values()] + [np.mean(losses)]
        else:
            return np.mean(losses)
    
    
    def load_checkpoint(model, optimizer, filename, embedding_item, embedding_user=None):
        # Note: Input model & optimizer should be pre-defined.  This routine only updates their states.
        start_epoch = 0
        checkpoint = torch.load(filename)
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
                  .format(filename, checkpoint['epoch']))
        best_score = checkpoint['best_score']
        embedding_item.load_state_dict(checkpoint['embedding_item'])
        
        if embedding_user:
            embedding_user.load_state_dict(checkpoint['embedding_user'])
        
        # now individually transfer the optimizer parts...
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)
        
        return model, optimizer, start_epoch, best_score, embedding_item, embedding_user
    '''
    Setup & train model.
    '''
        # If model exists.

    if controller_type.lower() == 'gru':
        print('Using GRU as the controller.')
        controller = GRUController(hidden_size)
    else:
        controller = UserBasedGRUController(hidden_size)
        print('Using a user-based GRU as the controller')
        #controller_constructor = functools.partial(LSTMController, out_from_all_layers=lstm_use_all_outputs)
    
    # If model exists, use it.
    if model_type.lower() == 'gru':
        model = GRU_LSTM_Model(embedding_size, size_voca, controller, clip_controller)
    else:
        model = DNC(embedding_size, embedding_size, data_word_size, mem_count, n_read_heads, controller, 
                     batch_first=True, mask=masked_lookup, dealloc_content=dealloc_content, link_sharpness_control=sharpness_control,
                     mask_min=mask_min, clip_controller=clip_controller, output_classes=size_voca, 
                     return_sequences=return_sequences)
    
    device = torch.device('cuda:0')
    embedding_item = torch.nn.Embedding(size_voca, embedding_size).to(device)
        
    
    early_stopping = EarlyStopping(patience=patience_early_stopping, 
                                  verbose=True)
    
    params = [
        {'params': [p for n, p in model.named_parameters() if not n.endswith(".bias") and (n != 'controller.layer_user.weight')], 'weight_decay': wd},
        {'params': [p for n, p in model.named_parameters() if n.endswith(".bias")], 'weight_decay': 0}
    ]
    
    torch.nn.init.uniform_(embedding_item.weight.data, -0.1, 0.1)
    params.append({'params': embedding_item.parameters(), 'weight_decay': 0})

    if size_users:
        # Add l2-norm to user-specific layer.
        params.append({'params': controller.layer_user.weight, 'weight_decay': wd_user})
        embedding_user = torch.nn.Embedding(size_users, embedding_size).to(device)
        torch.nn.init.uniform_(embedding_user.weight.data, -0.1, 0.1)
        params.append({'params': embedding_user.parameters(), 'weight_decay': 0})
    else:
        embedding_user = None
    
    n_params = sum([sum([t.numel() for t in d['params']]) for d in params])
    print('Number of params: {}'.format(n_params))            

    optimizer = torch.optim.Adam(params, lr=lr)#, weight_decay=wd)
    
    if model_file is None:
        start_epoch = 0
    else:
        if os.path.isfile(model_file):
            print('Model loaded.')
            model, optimizer, start_epoch, early_stop_bs, embedding_item, embedding_user = load_checkpoint(model, optimizer, model_file, embedding_item, embedding_user)
            if early_stop_bs:
                early_stopping.best_score = early_stop_bs
                early_stopping.val_loss_min = early_stop_bs*(-1)
        else:
            start_epoch = 0
     
    # Model and layers to device.
    loss_func = torch.nn.CrossEntropyLoss()
    model = model.to(device)

    embedding_item = embedding_item.to(device)
    if size_users:
        embedding_user = embedding_user.to(device)
    
    # Train model.  
    i = 0
    loss_sum = 0
    iter_start_time = time.time()
    data_load_total_time = 0
    hyperparams = {}
    
    # Sanity.
    model.train()
    for epoch in range(start_epoch, n_epochs):
        print('Starting new epoch.')
        start = time.time()
        data_load_timer = time.time()
        for data, labels in train_loader:
            labels = labels.to(device)
            data = data.to(device)
            data_load_total_time += time.time() - data_load_timer

            optimizer.zero_grad()

            n_subbatch = math.ceil(data.numel() / max_input_count_per_batch)
            real_batch = max(math.floor(batch_size/n_subbatch),1)
            n_subbatch = math.ceil(batch_size/real_batch)
            remaining_batch = batch_size % real_batch
            start_batch = time.time()

            for subbatch in range(n_subbatch):
                input = data
                target = labels

                if n_subbatch!=1:
                    input = input[subbatch * real_batch: (subbatch + 1) * real_batch]
                    target = target[subbatch * real_batch:(subbatch + 1) * real_batch]

                f2 = data.clone()
                f2 = input
                output = run_model(f2)
                
                l = loss_func(output, target[:,1])
                l.backward()

                if remaining_batch!=0 and subbatch == n_subbatch-2:
                    multiply_grads(params, real_batch/remaining_batch)

            if n_subbatch!=1:
                if remaining_batch==0:
                    multiply_grads(params, 1/n_subbatch)
                else:
                     (params, remaining_batch / batch_size)

            optimizer.step()

            i += 1
            curr_loss = l.data.item()
            loss_sum += curr_loss
            if i % int(len(train_loader) / 15) == 0:
                tim = time.time()
                loss_avg = loss_sum / i

                message = "Iteration %d, loss: %.4f" % (i, loss_avg)
                message += " (%.2f ms/iter, load time %.2g ms/iter)" % (
                            (tim - iter_start_time) / i * 1000.0,
                            data_load_total_time / i * 1000.0)
                print(message)

                iter_start_time = tim
                loss_sum = 0
                data_load_total_time = 0
            data_load_timer = time.time()
            
        # End of epoch, save model.
        if model_file is not None:
            state = {'epoch': epoch + 1, 'state_dict': model.state_dict(),
                 'optimizer': optimizer.state_dict(), 'best_score': early_stopping.best_score,
                    'embedding_item': embedding_item.state_dict()}
            if size_users:
                state['embedding_user'] = embedding_user.state_dict()
            torch.save(state, model_file)

        # End of epoch evaluation.
        if val_loader:
            val_loss = evaluate_set(val_loader)
            
            print('End of Epoch: {}, val loss {}'.format(time.time() - start, val_loss))
            #scheduler.step(val_loss)
            early_stopping(val_loss, model)
            if early_stopping.early_stop:
                print('Early stoppage triggered.')

                #results_val = evaluate_set(val_loader, True, loss_only=False)
                print('Training ended after {} and {} epochs, best validation score: {}'.format(time.time() - iter_start_time, epoch+1, early_stopping.best_score*(-1)))
                print('-----------------------------------------------------------------')
                hyperparams['n_epochs'] = np.max([epoch + 1 - early_stopping.patience, 1])
                hyperparams['val_loss'] = early_stopping.best_score * (-1)
                # Return parameters.
                return hyperparams
        else:
            print('End of Epoch: {}'.format(time.time() - start))
    
    if not val_loader:
        results_test = evaluate_set(test_loader, loss_only=False)
        print('End of training, test results {}'.format(results_test))
        return results_test
    else:
        print('Amount of epochs exceeded, but training has not finished! RESTART.')
        return -1

## Exp 1: Fold over entire dataset

This section contains the code for the first experiment regarding the sample that was taken for the previous work performed by Donkers et al. The experiment finds whether or not the threshold for removal results in a decrease in Recall@K scores.

### Generate data

In [0]:
def gen_sequences_exp_1(df, threshold_removal=20):    
    for idx_df, df_sub in enumerate(np.array_split(df, 10)):
        cnts = np.unique(df_sub['songId'], return_counts=True)
        items_to_replace = [cnts[0][i] for i, v in enumerate(cnts[1]) if v <= threshold_removal]
        df_sub = df_sub[~df_sub['songId'].isin(items_to_replace)]

        df_sub = df_sub.sort_values(by=['songId'])
        df_sub['songId'] = df_sub.groupby(['songId']).ngroup()
        df_sub = df_sub.sort_values(by=['timestamp'])

        sequences_train_usr, sequences_val_usr, sequences_test_usr = {}, {}, {}
        unique_items_train = []
        for usr in df_sub['userId'].unique():
            df_usr = df_sub[df_sub['userId'] == usr].sort_values(by=['timestamp'])
            seq_usr = df_usr[['userId', 'songId']].values

            seq_train_len = int(len(seq_usr) * 0.9)
            seq_val_len = seq_train_len + int((len(seq_usr) - seq_train_len)/2)
            seq_train, seq_val, seq_test = seq_usr[:seq_train_len], seq_usr[seq_train_len:seq_val_len], seq_usr[seq_val_len:]

            sequences_train_usr[usr] = seq_train
            sequences_val_usr[usr] = seq_val
            sequences_test_usr[usr] = seq_test

            unique_items_train.extend(seq_train[:,1])

        unique_items_train = list(set(unique_items_train))
        sequences_test, sequences_val, sequences_train = [], [], []
        for i, usr in enumerate(sequences_test_usr.keys()):
            seq_test = sequences_test_usr[usr]
            seq_train = sequences_train_usr[usr]
            seq_val = sequences_val_usr[usr]

            seq_val = np.delete(seq_val, np.where(~np.isin(seq_val[:,1], unique_items_train)), axis=0)
            seq_test = np.delete(seq_test, np.where(~np.isin(seq_test[:,1], unique_items_train)), axis=0)

            if len(seq_test) >= MAX_LENGTH_SEQUENCE:
                sequences_test.extend([seq_test[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_test)-MAX_LENGTH_SEQUENCE+1)])
            # Val set
            if len(seq_val) >= MAX_LENGTH_SEQUENCE:
                sequences_val.extend([seq_val[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_val)-MAX_LENGTH_SEQUENCE+1)])

            # Train set
            if len(seq_train) >= MAX_LENGTH_SEQUENCE:
                sequences_train.extend([seq_train[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_train)-MAX_LENGTH_SEQUENCE+1)])

        # Create and save arrays
        sequences_test = np.array(sequences_test)
        sequences_train = np.array(sequences_train)
        sequences_val = np.array(sequences_val)
        
        if threshold_removal == 2:
            np.save('{}/exp1/sequences_{}_2.npy'.format(HOME_FOLDER, idx_df), np.array([sequences_train, sequences_val, sequences_test]))
        else:
            np.save('{}/exp1/sequences_{}.npy'.format(HOME_FOLDER, idx_df), np.array([sequences_train, sequences_val, sequences_test]))
        
        print('Shape train sequences: {}, shape val sequences: {}, shape test sequences: {}'.format(sequences_train.shape, sequences_val.shape, sequences_test.shape))

In [0]:
# df = load_raw_dataset(subset_perc=1)

In [0]:
# for threshold_removal in [20, 2]:
#     gen_sequences_exp_1(df, threshold_removal)

### Experiment

In [0]:
# Variable settings
batch_size = 1000
lr = 0.001

controller_types = ['gru', 'gru_user']
wd = 0
wd_user = 0.01
name = 'random_train'
gpu = 0
clip_controller = 5.0
recall_k = 20

patience_early_stopping = 1
embedding_size = 1000
hidden_size = 1000
max_input_count_per_batch = MAX_LENGTH_SEQUENCE * batch_size

####  1.1: Removal threshold equal to 20

In [0]:
n_epochs = 25
n_folds = 10
model_file = None

for user_based in [0, 1]:
    controller_type = controller_types[user_based]
    file_results = '{}/exp1/results_user_based_{}.npy'.format(HOME_FOLDER, user_based)
    if os.path.isfile(file_results):
        results = list(np.load(file_results))
        if len(results) == n_folds:
            continue
        print('Result file loaded {}, starting at {}.'.format(user_based, len(results)))
    else:
        results = []
    
    for idx_f in range(len(results), n_folds):
        file_url = '{}/exp1/sequences_{}.npy'.format(HOME_FOLDER, idx_f)
        print(file_url)
        
        train_loader, val_loader, test_loader, size_voca, size_users = load_data(location_files=file_url)
        
        if not bool(user_based):
            size_users = None

        hyperparams = train(n_epochs, size_voca, train_loader, test_loader, val_loader, size_users)

        if hyperparams == -1:
            break
        
        print('Following hyperparameters were found: {}'.format(hyperparams))

        train_loader, test_loader, size_voca, size_users = load_data(location_files=file_url, merge_train_val=True)
        if not bool(user_based):
            size_users = None

        fold_results = train(hyperparams['n_epochs'], size_voca, train_loader, test_loader, val_loader=None, size_users=size_users)
        results.append([fold_results, hyperparams])
        np.save(file_results, np.array(results))
        print('Fold {}, Following results were found: {}'.format(idx_f, fold_results))
        print('----------------------------------------')

####  1.2: Removal threshold equal to 2

In [0]:
n_epochs = 25
n_folds = 10
model_file = None

for user_based in [0, 1]:
    controller_type = controller_types[user_based]
    file_results = '{}/exp1/results_user_based_{}_2.npy'.format(HOME_FOLDER, user_based)
    if os.path.isfile(file_results):
        results = list(np.load(file_results))
        if len(results) == n_folds:
            continue
        print('Result file loaded {}, starting at {}.'.format(user_based, len(results)))
    else:
        results = []
    
    for idx_f in range(len(results), n_folds):
        file_url = '{}/exp1/sequences_{}_2.npy'.format(HOME_FOLDER, idx_f)
        print(file_url)
        
        train_loader, val_loader, test_loader, size_voca, size_users = load_data(location_files=file_url)
        
        if not bool(user_based):
            size_users = None
        print(size_voca, size_users)
        hyperparams = train(n_epochs, size_voca, train_loader, test_loader, val_loader, size_users)

        if hyperparams == -1:
            break
        
        print('Following hyperparameters were found: {}'.format(hyperparams))

        train_loader, test_loader, size_voca, size_users = load_data(location_files=file_url, merge_train_val=True)
        if not bool(user_based):
            size_users = None

        fold_results = train(hyperparams['n_epochs'], size_voca, train_loader, test_loader, val_loader=None, size_users=size_users)
        results.append([fold_results, hyperparams])
        np.save(file_results, np.array(results))
        print('Fold {}, Following results were found: {}'.format(idx_f, fold_results))
        print('----------------------------------------')

### Results

#### Recall@20

In [0]:
file_path = '{}/exp1/results_user_based_{}.npy'.format(HOME_FOLDER, usr_based)
results_n_ub = [x[0][1] for x in np.load(file_path.format(0), allow_pickle=True)]
results_ub = [x[0][1] for x in np.load(file_path.format(1), allow_pickle=True)]

file_path = '{}/exp1/results_user_based_{}_2.npy'.format(HOME_FOLDER, usr_based)
results_n_ub_2 = [x[0][1] for x in np.load(file_path.format(0), allow_pickle=True)]
results_ub_2 = [x[0][1] for x in np.load(file_path.format(1), allow_pickle=True)]

In [0]:
df = pd.DataFrame(columns=['Fold', 'GRU user-based r=20', 'Vanilla GRU r=20', 'GRU user-based r=2', 'Vanilla GRU r=2'])
for fold in range(10):
    temp_res = [int(fold)+1, np.round(results_ub[fold], 3), np.round(results_n_ub[fold], 3), np.round(results_ub_2[fold], 3), np.round(results_n_ub_2[fold], 3)]
    df.loc[fold] = temp_res
print(df.to_latex(index=False))

In [0]:
fig = plt.figure()
ax = plt.subplot(111)
ax.scatter(list(range(1, len(results_ub)+1)), results_ub, label='User-based GRU, r=20')
ax.scatter(list(range(1, len(results_ub)+1)), results_ub_2, label='User-based GRU, r=2')
ax.scatter(list(range(1, len(results_ub)+1)), results_n_ub, label='Vanilla GRU, r=20')
ax.scatter(list(range(1, len(results_ub)+1)), results_n_ub_2, label='Vanilla GRU, r=2')
plt.ylabel('Recall@20')
plt.xlabel('Subsets')
plt.ylim(0.2, 0.45)
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
ax.legend()
plt.savefig('{}/exp1/recall@20_plot_exp1.png'.format(HOME_FOLDER))

#### Recall@1

In [0]:
file_path = '{}/exp1/results_user_based_{}.npy'.format(HOME_FOLDER, usr_based)
results_n_ub = [x[0][2] for x in np.load(file_path.format(0), allow_pickle=True)]
results_ub = [x[0][2] for x in np.load(file_path.format(1), allow_pickle=True)]

file_path = '{}/exp1/results_user_based_{}_2.npy'.format(HOME_FOLDER, usr_based)
results_n_ub_2 = [x[0][2] for x in np.load(file_path.format(0), allow_pickle=True)]
results_ub_2 = [x[0][2] for x in np.load(file_path.format(1), allow_pickle=True)]

In [0]:
df = pd.DataFrame(columns=['Fold', 'GRU user-based r=20', 'Vanilla GRU r=20', 'GRU user-based r=2', 'Vanilla GRU r=2'])
for fold in range(10):
    temp_res = [int(fold)+1, np.round(results_ub[fold], 3), np.round(results_n_ub[fold], 3), np.round(results_ub_2[fold], 3), np.round(results_n_ub_2[fold], 3)]
    df.loc[fold] = temp_res

print(df.to_latex(index=False))

In [0]:
fig = plt.figure()
ax = plt.subplot(111)
ax.scatter(list(range(1, len(results_ub)+1)), results_ub, label='User-based GRU, r=20')
ax.scatter(list(range(1, len(results_ub)+1)), results_ub_2, label='User-based GRU, r=2')
ax.scatter(list(range(1, len(results_ub)+1)), results_n_ub, label='Vanilla GRU, r=20')
ax.scatter(list(range(1, len(results_ub)+1)), results_n_ub_2, label='Vanilla GRU, r=2')
plt.ylabel('Recall@1')
plt.ylim(0.2, 0.3)
plt.xlabel('Subsets')
plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
ax.legend()
plt.savefig('/content/gdrive/My Drive/projects/thesis/sequences_gru_test/exp1/recall@1_plot_exp1.png')

## Exp 2: Varying training data size

### Generate data

In [0]:
def gen_sequences_exp_2(df1, df2):
    '''
    NOTE: Preprocess frames of 10% each separately, then merge them. This makes for a fair comparison
    with the other experiments.
    '''
    
    cnts = np.unique(df1['songId'], return_counts=True)
    items_to_replace = [cnts[0][i] for i, v in enumerate(cnts[1]) if v <= MIN_THRESH]
    df1 = df1[~df1['songId'].isin(items_to_replace)]

    cnts = np.unique(df2['songId'], return_counts=True)
    items_to_replace = [cnts[0][i] for i, v in enumerate(cnts[1]) if v <= MIN_THRESH]
    df2 = df2[~df2['songId'].isin(items_to_replace)]
    
    df = df1.append(df2)
    
    # Sort and re-assign songIds AFTER merging, else we would get missmatches.
    df = df.sort_values(by=['songId'])
    df['songId'] = df.groupby(['songId']).ngroup()
    df = df.sort_values(by=['timestamp'])
    
    for idx_df, remove_perc in enumerate([0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]):
        sequences_train_usr, sequences_val_usr, sequences_test_usr = {}, {}, {}
        unique_items_train = []
        for usr in df['userId'].unique():
            df_usr = df[df['userId'] == usr].sort_values(by=['timestamp'])
            seq_usr = df_usr[['userId', 'songId']].values
            
            seq_train_start = int(len(seq_usr) * remove_perc)
            seq_val_len = int(len(seq_usr) * 0.95)
            seq_test_len = seq_val_len + int((len(seq_usr) - seq_val_len)/2)
            seq_train, seq_val, seq_test = seq_usr[seq_train_start:seq_val_len], seq_usr[seq_val_len:seq_test_len], seq_usr[seq_test_len:]

            sequences_train_usr[usr] = seq_train
            sequences_val_usr[usr] = seq_val
            sequences_test_usr[usr] = seq_test

            unique_items_train.extend(seq_train[:,1])

        unique_items_train = list(set(unique_items_train))
        sequences_test, sequences_val, sequences_train = [], [], []
        for i, usr in enumerate(sequences_test_usr.keys()):
            seq_test = sequences_test_usr[usr]
            seq_train = sequences_train_usr[usr]
            seq_val = sequences_val_usr[usr]

            seq_val = np.delete(seq_val, np.where(~np.isin(seq_val[:,1], unique_items_train)), axis=0)
            seq_test = np.delete(seq_test, np.where(~np.isin(seq_test[:,1], unique_items_train)), axis=0)

            if len(seq_test) >= MAX_LENGTH_SEQUENCE:
                sequences_test.extend([seq_test[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_test)-MAX_LENGTH_SEQUENCE+1)])
            # Val set
            if len(seq_val) >= MAX_LENGTH_SEQUENCE:
                sequences_val.extend([seq_val[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_val)-MAX_LENGTH_SEQUENCE+1)])

            # Train set
            if len(seq_train) >= MAX_LENGTH_SEQUENCE:
                sequences_train.extend([seq_train[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_train)-MAX_LENGTH_SEQUENCE+1)])

        # Create and save arrays
        sequences_test = np.array(sequences_test)
        sequences_train = np.array(sequences_train)
        sequences_val = np.array(sequences_val)  

        np.save('{}/exp2/sequences_{}.npy'.format(HOME_FOLDER, idx_df), np.array([sequences_train, sequences_val, sequences_test]))
        print('Shape train sequences: {}, shape val sequences: {}, shape test sequences: {}'.format(sequences_train.shape, sequences_val.shape, sequences_test.shape))

In [0]:
df = load_raw_dataset(subset_perc=0.5, head=True)
df = df.tail(int(len(df) * (2/5)))
df1, df2 = np.array_split(df, 2)

In [0]:
gen_sequences_exp_2(df1, df2)

### Experiment

In [0]:
# Variable settings
batch_size = 1000
lr = 0.001

controller_types = ['gru', 'gru_user']
wd = 0
wd_user = 0.01
name = 'random_train'
gpu = 0
clip_controller = 5.0
recall_k = 20

patience_early_stopping = 1
embedding_size = 1000
hidden_size = 1000

# For sequence lengths:   
max_input_count_per_batch = MAX_LENGTH_SEQUENCE * batch_size

In [0]:
n_epochs = 25
n_folds = 10

model_file = None

for user_based in [0, 1]:
    model_type = 'gru'
    controller_type = controller_types[user_based]
    file_results = '{}/exp2/results_user_based_{}.npy'.format(HOME_FOLDER, user_based)
    if os.path.isfile(file_results):
        results = list(np.load(file_results))
        if len(results) == n_folds:
            continue
        print('Result file loaded {}, starting at {}.'.format(user_based, len(results)))
    else:
        results = []
    
    for idx_fold in range(len(results), n_folds):
        file_url = '{}/exp2/sequences_{}.npy'.format(HOME_FOLDER, idx_fold)
        train_loader, val_loader, test_loader, size_voca, size_users = load_data(location_files=file_url)
        if not bool(user_based):
            size_users = None

        hyperparams = train(n_epochs, size_voca, train_loader, test_loader, val_loader, size_users)

        if hyperparams == -1:
            break
        
        print('Following hyperparameters were found: {}'.format(hyperparams))
        
        train_loader, test_loader, size_voca, size_users = load_data(location_files=file_url, merge_train_val=True)
        if not bool(user_based):
            size_users = None

        fold_results = train(hyperparams['n_epochs'], size_voca, train_loader, test_loader, val_loader=None, size_users=size_users)
        results.append([fold_results, hyperparams])
        np.save(file_results, np.array(results))
        print('Fold {}, Following results were found: {}'.format(idx_fold, fold_results))
        print('----------------------------------------')

### Results

#### Recall@20

In [0]:
file_path = '{}/exp2/results_user_based_{}.npy'.format(HOME_FOLDER)
results_n_ub = [x[0][1] for x in np.load(file_path.format(0), allow_pickle=True)]
results_ub = [x[0][1] for x in np.load(file_path.format(1), allow_pickle=True)]
data_sizes = [1851095, 1665046, 1478678, 1292372, 1106009, 919600, 733415, 547202, 360959, 174996]
data_sizes = [int(x/1000) for x in data_sizes]

In [0]:
df = pd.DataFrame(columns=['GRU user-based', 'Vanilla GRU', 'Size training set in thousands'])
for idx, size in enumerate(data_sizes):
    temp_res = [np.round(results_ub[idx], 3), np.round(results_n_ub[idx], 3), size]
    df.loc[idx] = temp_res

print(df.to_latex(index=False))

In [0]:
fig = plt.figure()
ax = plt.subplot(111)
ax.plot(data_sizes, results_ub, label='User-based GRU',marker='o')
ax.plot(data_sizes, results_n_ub, label='Vanilla GRU',marker='o')
plt.ylabel('Recall@20')
plt.xlabel('Number of training sequences in thousands')
ax.legend()
plt.savefig('{}/exp2/recall@20_plot_exp2.png'.format(HOME_FOLDER))

#### Recall@1

In [0]:
file_path = '{}/exp2/results_user_based_{}.npy'.format(HOME_FOLDER)
results_n_ub = [x[0][2] for x in np.load(file_path.format(0), allow_pickle=True)]
results_ub = [x[0][2] for x in np.load(file_path.format(1), allow_pickle=True)]
data_sizes = [1851095, 1665046, 1478678, 1292372, 1106009, 919600, 733415, 547202, 360959, 174996]
data_sizes = [int(x/1000) for x in data_sizes]

In [0]:
df = pd.DataFrame(columns=['GRU user-based', 'Vanilla GRU', 'Size training set in thousands'])
for idx, size in enumerate(data_sizes):
    temp_res = [np.round(results_ub[idx], 3), np.round(results_n_ub[idx], 3), size]
    df.loc[idx] = temp_res

print(df.to_latex(index=False))

In [0]:
fig = plt.figure()
ax = plt.subplot(111)
ax.plot(data_sizes, results_ub, label='Vanilla GRU',marker='o')
ax.plot(data_sizes, results_n_ub, label='User-based GRU',marker='o')
plt.ylabel('Recall@1')
plt.xlabel('Number of training sequences in thousands')
plt.xlim(np.max(data_sizes) + 50, np.min(data_sizes) - 50)
ax.legend()
plt.savefig('{}/exp2/recall@1_plot_exp2.png'.format(HOME_FOLDER))

## Exp 3: Embedding size

### Experiment

In [0]:
# Variable settings
batch_size = 1000
lr = 0.001

controller_types = ['gru', 'gru_user']
wd = 0
wd_user = 0.01
name = 'random_train'
gpu = 0
clip_controller = 5.0
recall_k = 20

patience_early_stopping = 1
hidden_size = 1000

# For sequence lengths:   
max_input_count_per_batch = MAX_LENGTH_SEQUENCE * batch_size

In [0]:
n_epochs = 25

embedding_sizes = [32, 64, 128, 256, 512, 1000, 2048, 4096, 8192, -1]
file_set = '{}/exp1/sequences_{}.npy'
model_type = 'gru'

for idx_fold in [0, 1]:
    for user_based in [0, 1]:
        file_url = file_set.format(HOME_FOLDER, idx_fold)
        controller_type = controller_types[user_based]
        file_results = '{}/exp3/results_user_based_{}_f{}.npy'.format(HOME_FOLDER, user_based, idx_fold)
        
        if os.path.isfile(file_results):
            results = list(np.load(file_results, allow_pickle=True))
            if len(results) == len(embedding_sizes):
                continue
            print('Result file loaded {}, starting at {}.'.format(user_based, len(results)+1))
        else:
            results = []
        for embedding_size in embedding_sizes[len(results):]:
            model_file = '{}/exp3/models/model_type_{}_ub_{}_f{}_emb_{}_hp.pth'.format(HOME_FOLDER, model_type, user_based, idx_fold, embedding_size)

            if embedding_size != -1:
                train_loader, val_loader, test_loader, size_voca, size_users = load_data(location_files=file_url)
                if not bool(user_based):
                    size_users = None

                hyperparams = train(n_epochs, size_voca, train_loader, test_loader, val_loader, size_users)

                if hyperparams == -1:
                    break

                print('Following hyperparameters were found: {}'.format(hyperparams))

                if not bool(user_based):
                    size_users = None

                hyperparams['embedding_size'] = embedding_size

                results.append([hyperparams])
                np.save(file_results, np.array(results))
                print('Following results were found: {}'.format(results))
                print('----------------------------------------')
            elif not user_based:
                train_loader, val_loader, test_loader, size_voca, size_users = load_data(location_files=file_url)
                embedding_size = size_voca
                if not bool(user_based):
                    size_users = None

                hyperparams = train(n_epochs, size_voca, train_loader, test_loader, val_loader, size_users)

                if hyperparams == -1:
                    break

                print('Following hyperparameters were found: {}'.format(hyperparams))

                if not bool(user_based):
                    size_users = None

                hyperparams['embedding_size'] = embedding_size
                results.append([hyperparams])
                np.save(file_results, np.array(results))
                print('Following results were found: {}'.format(fold_results))
                print('----------------------------------------')

### Results

In [0]:
file_path = '{}/exp3/results_user_based_{}_f{}.npy'.format(HOMEFOLDER)
results = {}
for ub in [0, 1]:
    results[ub] = np.zeros((9))
    for idx_fold in range(3):
        temp = np.load(file_path.format(ub, idx_fold), allow_pickle=True)
        temp = np.array([np.array(x[0]['val_loss']) for x in temp])
        results[ub] += temp

    results[ub] /= 3

embed_sizes = [32, 64, 128, 256, 512, 1000, 2048, 4096, 8192]
results

In [0]:
df = pd.DataFrame(columns=['GRU user-based', 'Vanilla GRU', 'Embedding size'])
for idx, size in enumerate(embed_sizes):
    temp_res = [np.round(results[1][idx], 3), np.round(results[0][idx], 3), size]
    df.loc[idx] = temp_res
print(df.to_latex(index=False))

In [0]:
fig = plt.figure()
ax = plt.subplot(111)
ax.plot(embed_sizes, results[0], label='Vanilla GRU',marker='o')
ax.plot(embed_sizes, results[1], label='User-based GRU',marker='o')
plt.ylabel('Loss')
plt.xlabel('Embedding size')

ticks = [32, 64, 128, 256, 512, 1000, 2048, 4096, 8192]
xm = [1000, 2048, 4096, 8192]

ax.set_xscale('log', basex=2)
ax.set_xticks(embed_sizes)
ax.set_xticks(xm, minor=True)
ax.set_xticklabels(ticks)
ax.set_xticklabels([""]*len(xm), minor=True)

plt.xticks(embed_sizes)
ax.legend()
plt.savefig('{}/exp3/loss_plot_exp3.png'.format(HOME_FOLDER))

## Exp 4: Various sequence lengths DNC

### Generate data

In [0]:
def gen_sequences_exp_4(df, visualize=False):    
    for idx_df, df_sub in enumerate(np.array_split(df, 3)):
        cnts = np.unique(df_sub['songId'], return_counts=True)
        items_to_replace = [cnts[0][i] for i, v in enumerate(cnts[1]) if v <= MIN_THRESH]
        df_sub = df_sub[~df_sub['songId'].isin(items_to_replace)]

        df_sub = df_sub.sort_values(by=['songId'])
        df_sub['songId'] = df_sub.groupby(['songId']).ngroup()
        df_sub = df_sub.sort_values(by=['timestamp'])
        
        sequences_train_usr, sequences_val_usr, sequences_test_usr = {}, {}, {}
        unique_items_train = []
        
        for usr in df_sub['userId'].unique():
            df_usr = df_sub[df_sub['userId'] == usr].sort_values(by=['timestamp'])
            seq_usr = df_usr[['userId', 'songId']].values

            seq_train_len = int(len(seq_usr) * 0.9)
            seq_val_len = seq_train_len + int((len(seq_usr) - seq_train_len)/2)
            seq_train, seq_val, seq_test = seq_usr[:seq_train_len], seq_usr[seq_train_len:seq_val_len], seq_usr[seq_val_len:]

            sequences_train_usr[usr] = seq_train
            sequences_val_usr[usr] = seq_val
            sequences_test_usr[usr] = seq_test

            unique_items_train.extend(seq_train[:,1])
        if visualize:
            seq_visualize = np.array(list(sequences_train_usr.values())).flatten()
            plt.hist(x=seq_visualize, bins=10)#, density=True)#histtype='step')
            plt.xlabel('Item identification numbers')
            plt.ylabel('Number of occurrences')
            plt.savefig('{}/exp4/freq_dist_f{}'.format(HOME_FOLDER, idx_df+7))   

        unique_items_train = list(set(unique_items_train))
        for MAX_LENGTH_SEQUENCE in [50, 20, 10, 5]: 
            sequences_test, sequences_val, sequences_train = [], [], []
            for i, usr in enumerate(sequences_test_usr.keys()):
                seq_test = sequences_test_usr[usr]
                seq_train = sequences_train_usr[usr]
                seq_val = sequences_val_usr[usr]

                seq_val = np.delete(seq_val, np.where(~np.isin(seq_val[:,1], unique_items_train)), axis=0)
                seq_test = np.delete(seq_test, np.where(~np.isin(seq_test[:,1], unique_items_train)), axis=0)
                
                if len(seq_test) >= MAX_LENGTH_SEQUENCE:
                    sequences_test.extend([seq_test[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_test)-MAX_LENGTH_SEQUENCE+1)])
                # Val set
                if len(seq_val) >= MAX_LENGTH_SEQUENCE:
                    sequences_val.extend([seq_val[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_val)-MAX_LENGTH_SEQUENCE+1)])

                # Train set
                if len(seq_train) >= MAX_LENGTH_SEQUENCE:
                    sequences_train.extend([seq_train[n:n+MAX_LENGTH_SEQUENCE] for n in range(len(seq_train)-MAX_LENGTH_SEQUENCE+1)])

            # Create and save arrays
            sequences_test = np.array(sequences_test)
            sequences_train = np.array(sequences_train)
            sequences_val = np.array(sequences_val)
            FOLDER = '{}/exp4/sequences_{}_len_{}.npy'.format(HOME_FOLDER, idx_df+7, MAX_LENGTH_SEQUENCE)

            np.save(FOLDER, np.array([sequences_train, sequences_val, sequences_test]))
            print('Shape train sequences: {}, shape val sequences: {}, shape test sequences: {}'.format(sequences_train.shape, sequences_val.shape, sequences_test.shape))

In [0]:
df = load_raw_dataset(subset_perc=0.3)

In [0]:
gen_sequences_exp_4(df, visualize=False)

### Experiment

In [0]:
# Variable settings
batch_size = 1000
lr = 0.001

controller_types = ['gru', 'gru_user']
model_types = ['dnc', 'gru']
wd = 0
wd_user = 0.01
gpu = 0
clip_controller = 5.0

patience_early_stopping = 1
optimizer_type = 'adam'
embedding_size = 128
hidden_size = 128

# For sequence lengths:   
max_input_count_per_batch = 20 * batch_size

# DNC SPECIFIC
lr = 0.001
layer_sizes = [hidden_size]
lstm_use_all_outputs = False

mask_min = 0.0
grad_clip = 10

dealloc_content = True
sharpness_control = True
masked_lookup = True    
return_sequences = False

In [0]:
file_set = '{}/exp4/sequences_{}_len_{}.npy'
n_epochs = 25
max_input_count_per_batch = 80000

for seq_len in [5, 10, 20, 50]:
    for model_type in model_types:
        print('---------')
        print('Using model type {}'.format(model_type))
        for user_based in [0, 1]:
            print('User based {}'.format(user_based))
            for idx_fold in [7, 8, 9]:
                file_url = file_set.format(HOME_FOLDER, idx_fold, seq_len)
                controller_type = controller_types[user_based]
                file_results = '{}/exp4/results_model_type_{}_ub_{}_f{}_len_{}.npy'.format(HOME_FOLDER, model_type, user_based, idx_fold, seq_len)

                if os.path.isfile(file_results):
                    results = np.load(file_results, allow_pickle=True)
                    if len(results) > 1:
                        continue
                else:
                    hyperparams = None
                    results = [hyperparams]
                model_file = '{}/exp4/models/model_type_{}_ub_{}_f{}_len_{}_hp.pth'.format(HOME_FOLDER, model_type, user_based, idx_fold, seq_len)

                n_read_heads, mem_count, data_word_size = 1, 128, 64
                print(file_url)
                train_loader, val_loader, test_loader, size_voca, size_users = load_data(location_files=file_url)

                if not bool(user_based):
                    size_users = None
                
                if hyperparams is None:
                    hyperparams = train(n_epochs, size_voca, train_loader, test_loader, val_loader, size_users)
                    results.append(hyperparams)
                    np.save(file_results, np.array(results))
                print(hyperparams)
                model_file = '{}/exp4/models/model_type_{}_ub_{}_f{}_len_{}_fin.pth'.format(HOME_FOLDER, model_type, user_based, idx_fold, seq_len)

                train_loader, test_loader, size_voca, size_users, items_not_popular, items_semi_popular, items_popular = load_data(location_files=file_url, merge_train_val=True)

                if not bool(user_based):
                    size_users = None

                print('Following hyperparameters were found: {}'.format(hyperparams))

                fold_results = train(hyperparams['n_epochs'], size_voca, train_loader, test_loader, val_loader=None, size_users=size_users, items_not_popular=items_not_popular, items_semi_popular=items_semi_popular, items_popular=items_popular)

                results.append(fold_results)
                np.save(file_results, np.array(results))
                print('----------------------------------------')

### Results

In [0]:
controller_types = ['gru', 'gru_user']
model_types = ['dnc', 'gru']
results = {}
for seq_len in [5, 10, 20, 50]:
    for model_type in model_types:
        for user_based in [0, 1]:
            temp = []
            for idx_fold in [7, 8, 9]:
                file_results = '{}/exp4/results_model_type_{}_ub_{}_f{}_len_{}.npy'.format(HOME_FOLDER, model_type, user_based, idx_fold, seq_len)
                res = np.load(file_results, allow_pickle=True)

                temp.append(res[1])
            #Recall@1, Recall@20, MRR@20, recall20_p/sp/np, recall_1_p/sp/np, loss
            if seq_len == 5:
                results['{}_ub_{}_r20'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,0])]
                results['{}_ub_{}_r1'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,1])]
                
                results['{}_ub_{}_r20_p'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,3])]
                results['{}_ub_{}_r20_sp'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,4])]
                results['{}_ub_{}_r20_np'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,5])]
                
                results['{}_ub_{}_r1_p'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,6])]
                results['{}_ub_{}_r1_sp'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,7])]
                results['{}_ub_{}_r1_np'.format(model_type, user_based)] = [np.mean(np.array(temp)[:,8])]
            else:
                results['{}_ub_{}_r20'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,0]))
                results['{}_ub_{}_r1'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,1]))
                
                results['{}_ub_{}_r20_p'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,3]))
                results['{}_ub_{}_r20_sp'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,4]))
                results['{}_ub_{}_r20_np'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,5]))
                
                results['{}_ub_{}_r1_p'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,6]))
                results['{}_ub_{}_r1_sp'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,7]))
                results['{}_ub_{}_r1_np'.format(model_type, user_based)].append(np.mean(np.array(temp)[:,8]))

In [0]:
def exp_4_results(res, visualize=False):
    for i, key in enumerate(['r20', 'r20_p', 'r20_sp', 'r20_np', 'r1', 'r1_p', 'r1_sp', 'r1_np']):
        seq_lens = [5, 10, 20, 50]#, 50, 100]
        if visualize:
            fig = plt.figure()
            ax = plt.subplot(111)
            ax.plot(seq_lens, results['gru_ub_1_{}'.format(key)], label='User-based GRU', marker='o')
            ax.plot(seq_lens, results['dnc_ub_1_{}'.format(key)], label='User-based DNC', marker='o')
            ax.plot(seq_lens, results['dnc_ub_0_{}'.format(key)], label='Vanilla GRU', marker='o')
            ax.plot(seq_lens, results['gru_ub_0_{}'.format(key)], label='Vanilla DNC', marker='o')
            # plt.title('Recall@1 for various sequence lengths.')
            if i < 4:
                plt.ylabel('Recall@20')
                plt.ylim(0.2, 0.3)
            else:
                plt.ylabel('Recall@1')
                plt.ylim(0.15, 0.21)

            plt.xlabel('Sequence lengths')
            #plt.ylim(0.15, 0.2) #0.2, 0.3  0.15 - 0.2
            plt.xticks([5, 10, 20, 50])

            ticks = [5, 10, 20, 50]
            xm = [20, 50]

            ax.set_xscale('log', basex=2)
            ax.set_xticks(seq_lens)
            ax.set_xticks(xm, minor=True)
            ax.set_xticklabels(ticks)
            ax.set_xticklabels([""]*len(xm), minor=True)

            plt.xticks(seq_lens)
            ax.legend()
            plt.savefig('{}/exp4/{}_plot_exp4.png'.format(HOME_FOLDER, key))
        else:
            df = pd.DataFrame(columns=['Model type', 'Sequence length 5', 'Sequence length 10', 'Sequence length 20', 'Sequence length 50'])
            idx = 0
            for model_type in ['gru', 'dnc']:
                for ub in [0, 1]:
                    if (model_type == 'gru'):
                        if bool(ub):
                            model_type_text = 'User-based GRU'
                        else:
                            model_type_text = 'Vanilla GRU'
                    else:
                        if bool(ub):
                            model_type_text = 'User-based DNC'
                        else:
                            model_type_text = 'Vanilla DNC'
                    temp_res = [float(np.round(x, 3)) for x in results['{}_ub_{}_{}'.format(model_type, ub, key)]]
                    df.loc[idx] = [model_type_text] + temp_res
                    idx +=1

            # To latex
            print(df.to_latex(index=False))
            print('---')

In [0]:
# Latex Tables
exp_4_results(results)

In [0]:
# Plots
exp_4_results(results, visualize=True)