### 가즈아!

#### metrics

In [13]:
import math

'''
# input
#    - pred_u: 예측 값으로 정렬 된 item index
#    - target_u: test set의 item index
#    - top_k: top-k에서의 k 값
'''
def compute_metrics(pred_u, target_u, top_k):
    pred_k = pred_u[:top_k]
    num_target_items = len(target_u)

    hits_k = [(i + 1, item) for i, item in enumerate(pred_k) if item in target_u]
    num_hits = len(hits_k)

    idcg_k = 0.0
    for i in range(1, min(num_target_items, top_k) + 1):
        idcg_k += 1 / math.log(i + 1, 2)

    dcg_k = 0.0
    for idx, item in hits_k:
        dcg_k += 1 / math.log(idx + 1, 2)

    prec_k = num_hits / top_k
    recall_k = num_hits / min(num_target_items, top_k)
    ndcg_k = dcg_k / idcg_k


    return prec_k, recall_k, ndcg_k

#### Load data

In [14]:
import numpy as np
import pandas as pd
import torch

from scipy import sparse
from tqdm import tqdm

from torch.utils.data import DataLoader

from sklearn.model_selection import train_test_split

def get_csr_matrix(data: pd.DataFrame, shape: tuple):
    row = data['user_id'].tolist()
    col = data['item_id'].tolist()
    data = data['rating'].tolist()

    return sparse.csr_matrix((data, (row, col)), shape=shape)

def load_data(data_path, implicit=True):
    train_data_path = f'{data_path}/train_data.csv'
    train_data = pd.read_csv(train_data_path)
    valid_data_path = f'{data_path}/valid_data.csv'
    valid_data = pd.read_csv(valid_data_path)
    test_data_path = f'{data_path}/test_data.csv'
    test_data = pd.read_csv(test_data_path)
    # train_data = pd.concat([train_data, valid_data], axis=0)
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    valid_data = valid_data.sample(frac=1).reset_index(drop=True)
    
    num_items = max([train_data['item_id'].max(), test_data['item_id'].max(), valid_data['item_id'].max()]) + 1

    num_users_train = train_data['user_id'].unique().shape[0]
    train_matrix = get_csr_matrix(train_data, shape=(num_users_train, num_items))

    num_users_valid = valid_data['user_id'].unique().shape[0]
    valid_input, valid_target = train_test_split(valid_data, test_size=0.2, stratify=valid_data['user_id'], random_state=506)
    valid_matrix_input = get_csr_matrix(valid_input, shape=(num_users_valid, num_items))
    valid_matrix_target = get_csr_matrix(valid_target, shape=(num_users_valid, num_items))

    num_users_test = test_data['user_id'].unique().shape[0]
    test_input, test_target = train_test_split(test_data, test_size=0.2, stratify=test_data['user_id'], random_state=506)
    test_matrix_input = get_csr_matrix(test_input, shape=(num_users_test, num_items))
    test_matrix_target = get_csr_matrix(test_target, shape=(num_users_test, num_items))

    num_train_users = train_matrix.shape[0]
    num_valid_users = valid_matrix_input.shape[0]
    num_test_users = test_matrix_input.shape[0]
    num_total_users = num_train_users + num_valid_users + num_test_users

    # num_items = train_matrix.shape[1] # 이게 사용됨
    train_valid = sparse.vstack([train_matrix, (valid_matrix_input+valid_matrix_target)]).toarray()
    rating_cnt = train_valid.sum(axis=0)

    print("############################################")
    print(f"# of users: {num_total_users}")
    print(f"# of items: {num_items}")
    print(f"# of train users (ratings): {num_train_users} ({train_matrix.nnz})")
    print(f"# of valid users (# of input ratings, # of target ratings): {num_valid_users} ({valid_matrix_input.nnz}, {valid_matrix_target.nnz})")
    print(f"# of test users (# of input ratings, # of target ratings): {num_test_users} ({test_matrix_input.nnz}, {test_matrix_target.nnz})")
    print("############################################")

    if implicit:
        train_matrix.data[:] = 1
        valid_matrix_input.data[:] = 1
        valid_matrix_target.data[:] = 1
        test_matrix_input.data[:] = 1
        test_matrix_target.data[:] = 1

    return train_matrix.toarray(), valid_matrix_input.toarray(), valid_matrix_target.toarray(), test_matrix_input.toarray(), test_matrix_target.toarray(), train_data, valid_data,test_data, train_valid, rating_cnt, num_items

def eval_implicit(model, eval_input, eval_target, top_k):
    with torch.no_grad():
        model.eval()
        
        prec_list = []
        recall_list = []
        ndcg_list = []
        
        eval_indices = np.arange(eval_input.shape[0])
        eval_loader = DataLoader(eval_indices, batch_size=model.batch_size)
        
        for batch_indices in eval_loader:
            batch_data = torch.FloatTensor(eval_input[batch_indices]).to(model.device)
            preds = model.forward(batch_data).cpu().numpy()
            for i, u_idx in enumerate(batch_indices):
                input_by_user = eval_input[u_idx]
                missing_item_ids = np.where(input_by_user == 0)[0]
                pred_u_score = preds[i][missing_item_ids]
                pred_u_idx = np.argsort(pred_u_score)[::-1]  # 내림차순 정렬
                pred_u = missing_item_ids[pred_u_idx]

                target_by_user = eval_target[u_idx]
                target_u = np.where(target_by_user >= 0.5)[0]
                
                prec_k, recall_k, ndcg_k = compute_metrics(pred_u, target_u, top_k)
                prec_list.append(prec_k)
                recall_list.append(recall_k)
                ndcg_list.append(ndcg_k)

    return np.mean(prec_list), np.mean(recall_list), np.mean(ndcg_list)

#### Model
1. AE
2. DAE
3. MultiVAE

In [22]:
import numpy as np
import torch
import os
import math
import torch.nn.functional as F
import torch.nn as nn
import random
from IPython import embed

from time import time
from torch.utils.data import DataLoader

import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch

from IPython import embed

from time import time
from torch.utils.data import DataLoader

import numpy as np
import torch
from IPython import embed

from time import time
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

class DAE_implicit(torch.nn.Module):
    def __init__(self, train_matrix, valid_matrix_input, valid_matrix_target, batch_size, max_epochs, hidden_dim, learning_rate, reg_lambda, dropout, eval_topk,seed, device='cpu'):
        super().__init__()
        self.train_matrix = train_matrix
        self.valid_matrix_input = valid_matrix_input
        self.valid_matrix_target = valid_matrix_target

        self.num_items = train_matrix.shape[1]
        
        self.batch_size = batch_size
        self.max_epochs = max_epochs
        self.hidden_dim = hidden_dim
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda
        self.dropout = dropout
        self.seed = seed
        self.patience = 5
        self.best_metric = 0
        self.eval_topk = eval_topk

        self.device = device

        self.build_graph()

    def build_graph(self):
        ####### EDIT HERE #######
        # Initialize W, W' and b, b' 
        self.W = nn.Parameter(torch.ones(self.num_items,self.hidden_dim))
        self.W_prime = nn.Parameter(torch.ones(self.hidden_dim,self.num_items))
        self.b = nn.Parameter(torch.ones(self.hidden_dim))
        self.b_prime = nn.Parameter(torch.ones(self.num_items))
        nn.init.xavier_uniform_(self.W)
        nn.init.normal_(self.b, 0, 0.001)
        nn.init.xavier_uniform_(self.W_prime)
        nn.init.normal_(self.b_prime, 0, 0.001)
        #########################
        # Initialize optimizer
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.reg_lambda)

        self.to(self.device)


    def forward(self, x):
        ####### EDIT HERE #######
        # Denoise the input
        x = F.dropout(x, p=self.dropout,training=self.training)
    
        # Encode the input
        h = F.relu(torch.matmul(x,self.W) + self.b)


        # Decode the latent representation
   
        output = torch.sigmoid(torch.matmul(h, self.W_prime) + self.b_prime)
        #########################
        
        return output


    def fit(self):
        np.random.seed(self.seed)
        random.seed(self.seed)
        torch.manual_seed(self.seed)
        train_matrix = torch.FloatTensor(self.train_matrix).to(self.device)
        train_matrix = torch.FloatTensor(self.train_matrix).to(self.device)
        for epoch in range(0, self.max_epochs):
            self.train()
            loss = self.train_model_per_batch(train_matrix)
            if torch.isnan(loss):
                print('Loss NAN. Train finish.')
                break

            if epoch % 10 == 0:
                print("[DAE] epoch %d, loss: %f"%(epoch, loss))
                prec, recall, ndcg = eval_implicit(self, self.valid_matrix_input, self.valid_matrix_target, self.eval_topk)
                print(f"(DAE VALID) prec@{self.eval_topk} {prec}, recall@{self.eval_topk} {recall}, ndcg@{self.eval_topk} {ndcg}")
                if self.check_early_stop(ndcg):
                    print(f"Early stopping at epoch {epoch}")
                    break

    def train_model_per_batch(self, train_matrix):

        train_loader = DataLoader(train_matrix, batch_size=self.batch_size)
        for batch_data in train_loader:
            batch_data = batch_data.to(self.device).float()
            # Initialize gradients
            self.optimizer.zero_grad()

            # Forward
            output = self.forward(batch_data)

            # Calculate the loss
            loss = F.binary_cross_entropy(output, batch_data, reduction='none').sum(1).mean()
            
            # Backpropagation
            loss.backward()
            
            # Update weights
            self.optimizer.step()
        return loss
    
    def check_early_stop(self, metric):
        ####### EDIT HERE #######
        if metric > self.best_metric:
            self.best_metric = metric
            self.best_epoch = 0
            torch.save(self.state_dict(), f'checkpoints/{self.__class__.__name__}_best_model.pt')
        else:
            self.best_epoch += 1
            if self.best_epoch > self.patience:
                state_dict = torch.load(f'checkpoints/{self.__class__.__name__}_best_model.pt')
                self.load_state_dict(state_dict)
                return True



        #########################
        return False

class MultVAE_implicit(torch.nn.Module):
    def __init__(self, train_matrix, valid_matrix_input, valid_matrix_target, batch_size, max_epochs, hidden_dim, learning_rate, reg_lambda, dropout, eval_topk,seed, device='cpu'):
        super().__init__()
        self.train_matrix = train_matrix
        self.valid_matrix_input = valid_matrix_input
        self.valid_matrix_target = valid_matrix_target

        self.num_items = train_matrix.shape[1]

        self.batch_size = batch_size
        self.max_epochs = max_epochs
        self.hidden_dim = hidden_dim
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda
        self.dropout = dropout

        self.total_anneal_steps = 200000
        self.anneal_cap = 0.2
        self.seed = seed
        self.patience = 5
        self.best_recall = 0
        self.eval_topk = eval_topk

        self.update_count = 0
        self.device = device

        self.build_graph()


    def build_graph(self):
        ####### EDIT HERE #######
        # Initialize W, W' and b, b'
        self.W = nn.Parameter(torch.ones(self.num_items,self.hidden_dim))
        self.W_prime = nn.Parameter(torch.ones(self.hidden_dim,self.num_items))
        self.b = nn.Parameter(torch.ones(self.hidden_dim))
        self.b_prime = nn.Parameter(torch.ones(self.num_items))
   
        nn.init.xavier_uniform_(self.W)
        nn.init.normal_(self.b, 0, 0.001)
        nn.init.xavier_uniform_(self.W_prime)
        nn.init.normal_(self.b, 0, 0.001)
        

        #########################
        # Initialize optimizer
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.reg_lambda)

        # 모델을 device로 보냄
        self.to(self.device)


    def forward(self, x):
        ####### EDIT HERE #######
        # Denoise the input

        x_denoised =F.dropout(x,p=self.dropout,training=self.training)
       
        # x_denoised = F.normalize(x)

        # Encode the input
        # h = F.relu(torch.matmul(x_denoised,self.W) + self.b)
        h = torch.sigmoid(x_denoised@self.W + self.b)

        z_log_var = torch.ones_like(h)
        eps = torch.ones_like(h)
        z = h + eps * torch.exp(z_log_var*0.5)
    
        # Decode the latent representationz
        output = torch.matmul(z,self.W_prime) + self.b_prime
        
        print('h shape:', h.shape)
        print('z_log_var shape:', z_log_var.shape)
    
        # KL loss
       
        kl_loss = -0.5 * torch.mean(torch.sum(1+z_log_var.unsqueeze(1)-h.pow(2)-z_log_var.exp().unsqueeze(1),dim=2))
        #########################
        

        if self.training:
            return output, kl_loss
        else:
            return output


    def fit(self):
        np.random.seed(self.seed)
        random.seed(self.seed)
        torch.manual_seed(self.seed)
        train_matrix = torch.FloatTensor(self.train_matrix).to(self.device)
        for epoch in range(0, self.max_epochs):
            self.train()

            loss = self.train_model_per_batch(self.train_matrix)

            if torch.isnan(loss):
                print('Loss NAN. Stop training')
                break

            if epoch % 10 == 0:
                print("[MultVAE CF] epoch %d, loss: %f"%(epoch, loss))
                prec, recall, ndcg = eval_implicit(self, self.valid_matrix_input, self.valid_matrix_target, self.eval_topk)
                print(f"(MultVAE VALID) prec@{self.eval_topk} {prec}, recall@{self.eval_topk} {recall}, ndcg@{self.eval_topk} {ndcg}")
                if self.check_early_stop(ndcg):
                    print(f"Early stopping at epoch {epoch}")
                    break

    def train_model_per_batch(self, train_matrix):
       
        train_loader = DataLoader(train_matrix, batch_size=self.batch_size)
        for batch_data in train_loader:
            batch_data = batch_data.to(self.device).float()
            # Initialize gradients
            self.optimizer.zero_grad()

            # Forward
            output, kl_loss = self.forward(batch_data)

            # Calculate the loss
            ce_loss = -(F.log_softmax(output, 1) * batch_data).sum(1).mean()

            if self.total_anneal_steps > 0:
                self.anneal = min(self.anneal_cap, 1. * self.update_count / self.total_anneal_steps)
            else:
                self.anneal = self.anneal_cap
            loss = ce_loss + kl_loss * self.anneal

            # Backpropagation
            loss.backward()

            # Update weights
            self.optimizer.step()

            self.update_count += 1

        return loss

    def check_early_stop(self, metric):
        ####### EDIT HERE #######
        if metric > self.best_recall:
            self.best_recall = metric
            self.num_patience = 0
        else:
            self.num_patience += 1
        if self.num_patience >= self.patience:
            return True
        else:
            return False



        #########################

class AE_implicit(torch.nn.Module):
    def __init__(self, train_matrix, valid_matrix_input, valid_matrix_target, batch_size, max_epochs, hidden_dim, learning_rate, reg_lambda, eval_topk,seed, device='cpu'):
        super().__init__()
        self.train_matrix = train_matrix
        self.valid_matrix_input = valid_matrix_input
        self.valid_matrix_target = valid_matrix_target

        self.num_items = train_matrix.shape[1]

        self.batch_size = batch_size
        self.max_epochs = max_epochs
        self.hidden_dim = hidden_dim
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda

        self.patience = 5
        self.best_metric = 0
        self.eval_topk = eval_topk

        self.device = device
        self.seed = seed
        self.build_graph()


    def build_graph(self):
        # W, W'와 b, b'만들기
        self.enc_w = nn.Parameter(torch.ones(self.num_items, self.hidden_dim))
        self.enc_b = nn.Parameter(torch.ones(self.hidden_dim))
        nn.init.xavier_uniform_(self.enc_w)
        nn.init.normal_(self.enc_b, 0, 0.001)

        self.dec_w = nn.Parameter(torch.ones(self.hidden_dim, self.num_items))
        self.dec_b = nn.Parameter(torch.ones(self.num_items))
        nn.init.xavier_uniform_(self.dec_w)
        nn.init.normal_(self.dec_b, 0, 0.001)

        # 최적화 방법 설정
        self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.reg_lambda)

        # 모델을 device로 보냄
        self.to(self.device)


    def forward(self, x):
        # encoder 과정
        h = torch.sigmoid(x @ self.enc_w + self.enc_b)

        # decoder 과정
        output = torch.sigmoid(h @ self.dec_w + self.dec_b)
        return output
    



    def fit(self):
        np.random.seed(self.seed)
        random.seed(self.seed)
        torch.manual_seed(self.seed)
        train_matrix = torch.FloatTensor(self.train_matrix).to(self.device)

        for epoch in range(0, self.max_epochs):
            self.train()
            loss = self.train_model_per_batch(train_matrix)
            if torch.isnan(loss):
                print('Loss NAN. Train finish.')
                break

            if epoch % 10 == 0:
                print("[AE] epoch %d, loss: %f"%(epoch, loss))
                prec, recall, ndcg = eval_implicit(self, self.valid_matrix_input, self.valid_matrix_target, self.eval_topk)
                print(f"(AE VALID) prec@{self.eval_topk} {prec:.5f}, recall@{self.eval_topk} {recall:.5f}, ndcg@{self.eval_topk} {ndcg:.5f}")
                if self.check_early_stop(ndcg):
                    print(f"Early stopping at epoch {epoch}")
                    break            

    def train_model_per_batch(self, train_matrix):
        
        train_loader = DataLoader(train_matrix, batch_size=self.batch_size)
        for batch_data in train_loader:
            batch_data = batch_data.to(self.device).float()
            # Initialize gradients
            self.optimizer.zero_grad()

            # Forward
            output = self.forward(batch_data)

            # Calculate the loss
            loss = F.binary_cross_entropy(output, batch_data, reduction='none').sum(1).mean()
            
            # Backpropagation
            loss.backward()
            
            # Update weights
            self.optimizer.step()

        return loss
    
    def check_early_stop(self, metric):
        if metric > self.best_metric:
            self.best_metric = metric
            self.best_epoch = 0
            torch.save(self.state_dict(), f'checkpoints/{self.__class__.__name__}_best_model.pt')
        else:
            self.best_epoch += 1
            if self.best_epoch > self.patience:
                state_dict = torch.load(f'checkpoints/{self.__class__.__name__}_best_model.pt')
                self.load_state_dict(state_dict)
                return True
        return False



#### train model

In [16]:
# 기본 패키지 import
from time import time
import numpy as np

import warnings
import random
import warnings
import torch

import numpy as np
import random

warnings.filterwarnings('ignore')

# def seed_everything(random_seed):
#     np.random.seed(random_seed)
#     random.seed(random_seed)
#     torch.manual_seed(random_seed)

# seed = 506
# seed_everything(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')




"""
dataset loading
"""
data_path = 'data'
train_matrix, valid_matrix_input, valid_matrix_target, test_matrix_input, test_matrix_target,train_data, valid_data,test_data, train_valid, rating_cnt,num_items = load_data(data_path, implicit=True)
topk = 20

"""
model training
# """
print("Training the model...")
time_start = time()
ae = AE_implicit(train_matrix, valid_matrix_input, valid_matrix_target, batch_size=2048, max_epochs=1000, hidden_dim=512, learning_rate=0.001, reg_lambda=0.001, eval_topk=topk,seed=506, device=device)
dae = DAE_implicit(train_matrix, valid_matrix_input, valid_matrix_target, batch_size=2048, max_epochs=1000, hidden_dim=512, learning_rate=0.001, reg_lambda=0.001, dropout=0.2, eval_topk=topk,seed=506, device=device)
multvae = MultVAE_implicit(train_matrix, valid_matrix_input, valid_matrix_target, batch_size=2048, max_epochs=1000, hidden_dim=512, learning_rate=0.001, reg_lambda=0.001, dropout=0.2, eval_topk=topk,seed=506, device=device) 

############################################
# of users: 17017
# of items: 7813
# of train users (ratings): 13613 (984359)
# of valid users (# of input ratings, # of target ratings): 1703 (101464, 25367)
# of test users (# of input ratings, # of target ratings): 1701 (78141, 19536)
############################################
Training the model...


#### Grid search

In [None]:
from sklearn.model_selection import ParameterGrid

# Define hyperparameters and their possible values
hyperparams = {
    'batch_size': [2048, 4096],
    'max_epochs': [1000, 2000],
    'hidden_dim': [128,256, 512],
    'learning_rate': [0.001, 0.01],
    'reg_lambda': [0.001, 0.01],
    # 'dropout': [0.3,0.5, 0.7],
    'eval_topk': [20,50,100],
    'seed': [42, 506, 123]
}

# Generate all possible combinations of hyperparameters
param_grid = ParameterGrid(hyperparams)

# Train and evaluate models for each combination of hyperparameters
best_model = None
best_metric = 0
for params in param_grid:
    print(f"Training model with hyperparameters: {params}")
    model = AE_implicit(train_matrix, valid_matrix_input, valid_matrix_target, device=device, **params)
    model.fit()
    _, _, ndcg = eval_implicit(model, test_matrix_input, test_matrix_target, params['eval_topk'])
    print(f"Evaluated model with ndcg@{params['eval_topk']}: {ndcg}")
    if ndcg > best_metric:
        best_metric = ndcg
        best_model = model

# Print best performing model and its hyperparameters
print(f"Best performing model with ndcg@{best_model.eval_topk}: {best_metric}")
print(f"Hyperparameters: {best_model.__dict__}")

#### evaluation

In [18]:
ae.fit()
dae.fit()
multvae.fit()
print("training time: ", time()-time_start)
time_start = time()
"""
model evaluation
"""
print("model evaluation")
ae_prec, ae_recall, ae_ndcg = eval_implicit(ae, test_matrix_input, test_matrix_target, topk)
dae_prec, dae_recall, dae_ndcg = eval_implicit(dae, test_matrix_input, test_matrix_target, topk)
multvae_prec, multvae_recall, multvae_ndcg = eval_implicit(multvae, test_matrix_input, test_matrix_target, topk)
print("evaluation time: ", time()-time_start)

print(f"AE: prec@{topk} {ae_prec:.5}, recall@{topk} {ae_recall:.5f}, ndcg@{topk} {ae_ndcg:.5}")
print(f"DAE: prec@{topk} {dae_prec:.5}, recall@{topk} {dae_recall:.5}, ndcg@{topk} {dae_ndcg:.5}")
print(f"MultVAE: prec@{topk} {multvae_prec:.5}, recall@{topk} {multvae_recall:.5}, ndcg@{topk} {multvae_ndcg:.5}")

[AE] epoch 0, loss: 1657.729614
(AE VALID) prec@20 0.00232, recall@20 0.00347, ndcg@20 0.00244
[AE] epoch 10, loss: 1778.862305
(AE VALID) prec@20 0.02023, recall@20 0.03424, ndcg@20 0.02825
[AE] epoch 20, loss: 1631.737549
(AE VALID) prec@20 0.02043, recall@20 0.03434, ndcg@20 0.02858
[AE] epoch 30, loss: 1435.873169
(AE VALID) prec@20 0.02240, recall@20 0.03799, ndcg@20 0.03069
[AE] epoch 40, loss: 1290.915771
(AE VALID) prec@20 0.02408, recall@20 0.04085, ndcg@20 0.03268
[AE] epoch 50, loss: 1204.558716
(AE VALID) prec@20 0.02504, recall@20 0.04213, ndcg@20 0.03552
[AE] epoch 60, loss: 1150.796265
(AE VALID) prec@20 0.02686, recall@20 0.04450, ndcg@20 0.03768
[AE] epoch 70, loss: 1112.673340
(AE VALID) prec@20 0.02954, recall@20 0.04962, ndcg@20 0.04242
[AE] epoch 80, loss: 1082.592773
(AE VALID) prec@20 0.03371, recall@20 0.05864, ndcg@20 0.04976
[AE] epoch 90, loss: 1058.761841
(AE VALID) prec@20 0.03644, recall@20 0.06442, ndcg@20 0.05438
[AE] epoch 100, loss: 1038.859131
(AE VAL

#### Submit Kaggle

In [19]:
# autoencoder
num_users_test = test_data['user_id'].unique().shape[0]
test_indices = np.arange(test_matrix_input.shape[0])
test_loader = DataLoader(test_matrix_target, batch_size=ae.batch_size)
with torch.no_grad():
    ae.eval()

    submission = pd.DataFrame(columns=['user_id', 'item_id'])

    for user_id in tqdm(range(num_users_test)):
        input_by_user = torch.FloatTensor(test_matrix_input[user_id]).to(ae.device)
        preds = ae.forward(input_by_user).cpu().numpy()
        missing_item_ids = np.where(test_matrix_input[user_id] == 0)[0]
        pred_u_score = preds[missing_item_ids]
        pred_u_idx = np.argsort(pred_u_score)[::-1]
        pred_u = missing_item_ids[pred_u_idx]

        submission = submission.append(pd.DataFrame({'user_id': [user_id] * 20, 'item_id': pred_u[:20]}))

    submission.to_csv('submission_AE_model2.csv', index=False)

100%|██████████| 1701/1701 [00:02<00:00, 719.23it/s]


In [20]:
# DAE
num_users_test = test_data['user_id'].unique().shape[0]
test_indices = np.arange(test_matrix_input.shape[0])
test_loader = DataLoader(test_matrix_target, batch_size=dae.batch_size)
with torch.no_grad():
    dae.eval()

    submission = pd.DataFrame(columns=['user_id', 'item_id'])

    for user_id in tqdm(range(num_users_test)):
        input_by_user = torch.FloatTensor(test_matrix_input[user_id]).to(dae.device)
        preds = dae.forward(input_by_user).cpu().numpy()
        missing_item_ids = np.where(test_matrix_input[user_id] == 0)[0]
        pred_u_score = preds[missing_item_ids]
        pred_u_idx = np.argsort(pred_u_score)[::-1]
        pred_u = missing_item_ids[pred_u_idx]

        submission = submission.append(pd.DataFrame({'user_id': [user_id] * 20, 'item_id': pred_u[:20]}))

    submission.to_csv('submission_DAE_mode2.csv', index=False)

100%|██████████| 1701/1701 [00:02<00:00, 722.87it/s]


##### Ensemble

In [24]:

num_users_test = test_data['user_id'].unique().shape[0]
test_indices = np.arange(test_matrix_input.shape[0])
test_loader = DataLoader(test_matrix_target, batch_size=ae.batch_size)


with torch.no_grad():
    ae.eval()
    dae.eval()
    # multvae.eval()
    submission = pd.DataFrame(columns=['user_id', 'item_id'])
    for user_id in tqdm(range(num_users_test)):
        input_by_user = torch.FloatTensor(test_matrix_input[user_id]).to(device)

        # Get predictions from each model
        ae_preds = ae.forward(input_by_user).cpu().numpy()
        dae_preds = dae.forward(input_by_user).cpu().numpy()
        # multvae_preds = multvae.forward(input_by_user).cpu().numpy()

        # Ensemble the predictions
        preds = (ae_preds + dae_preds) / 2

        missing_item_ids = np.where(test_matrix_input[user_id] == 0)[0]
        pred_u_score = preds[missing_item_ids]
        pred_u_idx = np.argsort(pred_u_score)[::-1]
        pred_u = missing_item_ids[pred_u_idx]

        submission = submission.append(pd.DataFrame({'user_id': [user_id] * 20, 'item_id': pred_u[:20]}))

submission.to_csv('submission_ensemble2.csv', index=False)

100%|██████████| 1701/1701 [00:02<00:00, 638.96it/s]
