In [None]:
import numpy as np
import pandas as pd
import os, sys
from collections import defaultdict

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

In [None]:
!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/ml_100k_train.npy
!wget -q --show-progress https://github.com/sparsh-ai/stanza/raw/S629908/rec/CDL/data/ml_100k_test.npy



In [None]:
class AutoRecData(Dataset):
    def __init__(self, train, based_on):
        super(AutoRecData, self).__init__()
        self.train = train
        self.based_on = based_on
        self.n_user, self.n_item = train.shape

    def __len__(self):
        if self.based_on == 'item':
            return self.n_item
        elif self.based_on == 'user':
            return self.n_user
    
    def __getitem__(self, idx):
        if self.based_on == 'item':
            return torch.tensor(self.train[:, idx]).float()
        elif self.based_on == 'user':
            return torch.tensor(self.train[idx, :]).float()

In [None]:
class Config:
    lr = 0.01
    weight_decay = 5e-4
    based_on = 'item'
    batch_size = 64
    input_dim = train.shape[0] if based_on == 'item' else train.shape[1]
    hidden_dim = 15
    epochs = 30
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

config = Config()

In [None]:
class AutoRec(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(AutoRec, self).__init__()
        self.enc = nn.Linear(input_dim, hidden_dim)
        self.dec = nn.Linear(hidden_dim, output_dim)
        self.activate = F.sigmoid

    def forward(self, x):
        x = self.activate(self.enc(x))
        x = self.dec(x)
        return x

In [None]:
train = np.load('ml_100k_train.npy')
test = np.load('ml_100k_test.npy')

In [None]:
train.shape, test.shape

((943, 1682), (943, 1682))

In [None]:
for x in trainloader:
    print(x)
    break

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [3., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [5., 0., 0.,  ..., 0., 0., 0.]])


In [None]:
trainset = AutoRecData(train, config.based_on)
testset = AutoRecData(test, config.based_on)
trainloader = DataLoader(trainset, batch_size=config.batch_size, shuffle=False, drop_last=False)
testloader = DataLoader(testset, batch_size=config.batch_size*100, shuffle=False, drop_last=False)

model = AutoRec(input_dim=config.input_dim, hidden_dim=config.hidden_dim, output_dim=config.input_dim)
model = model.to(config.device)
optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)

history = defaultdict(list)
for epoch in range(config.epochs):
    model.train()
    losses = []
    for x in trainloader:
        optimizer.zero_grad()
        x = x.to(config.device)
        mask = x > 0
        pred = model(x)
        loss = torch.mean(((x - pred)[mask])**2)
        loss.backward()
        optimizer.step()
        losses.append(np.sqrt(loss.item()))
    history['tr'].append(np.mean(losses))

    model.eval()
    with torch.no_grad():
        for x in testloader:
            x = x.to(config.device)
            mask = x > 0
            pred = model(x)
            loss = torch.sqrt(torch.mean(((x - pred)[mask])**2))
            losses.append(loss.item())
    history['test'].append(np.mean(losses))
    print(f'EPOCH {epoch+1}: TRAINING loss {history["tr"][-1]} VALID loss {history["test"][-1]}')

EPOCH 1: TRAINING loss nan VALID loss nan
EPOCH 2: TRAINING loss nan VALID loss nan
EPOCH 3: TRAINING loss nan VALID loss nan
EPOCH 4: TRAINING loss nan VALID loss nan
EPOCH 5: TRAINING loss nan VALID loss nan
EPOCH 6: TRAINING loss nan VALID loss nan
EPOCH 7: TRAINING loss nan VALID loss nan
EPOCH 8: TRAINING loss nan VALID loss nan
EPOCH 9: TRAINING loss nan VALID loss nan
EPOCH 10: TRAINING loss nan VALID loss nan
EPOCH 11: TRAINING loss nan VALID loss nan
EPOCH 12: TRAINING loss nan VALID loss nan
EPOCH 13: TRAINING loss nan VALID loss nan
EPOCH 14: TRAINING loss nan VALID loss nan
EPOCH 15: TRAINING loss nan VALID loss nan
EPOCH 16: TRAINING loss nan VALID loss nan
EPOCH 17: TRAINING loss nan VALID loss nan
EPOCH 18: TRAINING loss nan VALID loss nan
EPOCH 19: TRAINING loss nan VALID loss nan
EPOCH 20: TRAINING loss nan VALID loss nan
EPOCH 21: TRAINING loss nan VALID loss nan
EPOCH 22: TRAINING loss nan VALID loss nan
EPOCH 23: TRAINING loss nan VALID loss nan
EPOCH 24: TRAINING l

---

In [None]:
!pip install -q watermark
%reload_ext watermark
%watermark -a "Sparsh A." -m -iv -u -t -d

Author: Sparsh A.

Last updated: 2021-11-28 16:07:35

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.4.104+
Machine     : x86_64
Processor   : x86_64
CPU cores   : 2
Architecture: 64bit

torch     : 1.10.0+cu111
IPython   : 5.5.0
sys       : 3.7.12 (default, Sep 10 2021, 00:21:48) 
[GCC 7.5.0]
numpy     : 1.19.5
pandas    : 1.1.5
matplotlib: 3.2.2



---

**END**