<a href="https://colab.research.google.com/github/musicjae/recommender-system/blob/main/NCF/pytorch_NCF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Import

In [80]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torch.autograd import Variable

In [81]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Preprocessing

In [82]:
dataset = pd.read_csv('/content/drive/My Drive/datasets/Movie_Dataset/ratings_small.csv', header=0)


user_id = dataset['userId'].unique().tolist() # ndarray -> list
user2i = {u:i for i,u in enumerate(user_id)}
i2user = {i:u for i,u in enumerate(user_id)}

dataset['user'] = dataset['userId'].map(user2i) # new row 'user'


movie_id = dataset['movieId'].unique().tolist()
movie2i = {m:i for i,m in enumerate(movie_id)}
i2movie = {i:m for i,m in enumerate(movie_id)}

dataset['movie'] = dataset['movieId'].map(movie2i)


dataset['rating'] = dataset['rating'].values

#train/test

pytorch에서는 dataloader에 들어가기 위해 dataframe이 Tensor로 바뀌어야 한다.

In [83]:
X_data = dataset[['user','movie']]
y_data = dataset['rating']

X_data = X_data.values
y_data =y_data.values

print(X_data.shape)
print(y_data.shape)

(100004, 2)
(100004,)


In [84]:
X_data = torch.from_numpy(X_data)
y_data = torch.from_numpy(y_data)

In [73]:
print(X_data[:3])
print(X_data.shape)
print(y_data[:3])

tensor([[0, 0],
        [0, 1],
        [0, 2]])
torch.Size([100004, 2])
tensor([2.5000, 3.0000, 3.0000], dtype=torch.float64)


##DataLoader

In [85]:
train_loader = DataLoader(X_data, batch_size=128,shuffle=True, num_workers=16)
total_batch = len(train_loader)

In [75]:
cnt=0
for i, data in enumerate(train_loader.dataset):
    if cnt <5:
        a,b = data
        print(b)
        cnt+=1
        print(b.dim())

tensor(0)
0
tensor(1)
0
tensor(2)
0
tensor(3)
0
tensor(4)
0


#GMF

In [86]:
EMBEDDING_SIZE = 32
NUM_USERS = len(user2i) # user명에서 i로 변환된것을 사용
NUM_ITEMS = len(i2movie) # 인덱스에서 movie로 변환된 것을 사용할 것

class GMF(nn.Module):
    def __init__(self, NUM_USERS, NUM_ITEMS, Embedding_size=32):
        super(GMF,self).__init__()
        self.num_users = NUM_USERS
        self.num_movies = NUM_ITEMS
        self.embedding_size = EMBEDDING_SIZE

        self.user_embedding = nn.Embedding(self.num_users, self.embedding_size)
        self.user_bias = nn.Embedding(self.num_users, 1)

        self.item_embedding = nn.Embedding(self.num_movies, self.embedding_size)
        self.item_bias = nn.Embedding(self.num_movies, 1)

    def forward(self,inputs):
        user_vec = self.user_embedding(inputs)
        user_vec = user_vec.view(-1, self.embedding_size) # # n>2 dim -> 2dim
        user_bias = self.user_bias(inputs)

        item_vec = self.item_embedding(inputs)
        item_vec = item_vec.view(-1, self.embedding_size)
        item_bias = self.item_bias(inputs)

        #dot =  keras.layers.Dot(axes=1)([user_vec, movie_vec])
        mat = user_vec * item_vec
        x = mat + user_bias + item_bias

        return x

In [87]:
model = GMF(NUM_USERS,NUM_ITEMS,Embedding_size).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
criterion = nn.BCELoss()

In [78]:
print(model)

GMF(
  (user_embedding): Embedding(671, 32)
  (user_bias): Embedding(671, 1)
  (item_embedding): Embedding(9066, 32)
  (item_bias): Embedding(9066, 1)
)


In [89]:
for epoch in range(10):
    print(f'epoch: {epoch}')
    for i, data in enumerate(train_loader.dataset):

        inputs,labels = data
        inputs = torch.as_tensor((inputs,)).cuda()
        labels= torch.as_tensor((labels,)).cuda()
        print(inputs.dim())
        #inputs,labels = Variable(inputs), Variable(labels)

        prediction = model(inputs)
        loss = criterion(prediction, labels)

        print(f'step: {i}\t loss:{loss.data[0]}')

        optimizer.zero_grad()
        loss.backward()


epoch: 0
1


ValueError: ignored

In [None]:
def evaluate(model, X_test, y_test):
    model.eval()

    test_loss = 0
    correct = 0

    with torch.no_grad():
        
        Test_data = X_test
        Label = y_test

        output = model(Test_data)
        test_loss += criterion(output, Label).item()
        prediction = output.max(1,keepdim=True)[1] 

        correct += prediction.eq(Label.view_as(prediction)).sum().item()

    test_loss /= len(X_test)
    test_acc = 100*correct/len(X_test)

    return test_loss, test_acc

In [None]:
EPOCHS = 10
for Epoch in range(1, EPOCHS+1):
    train(model,X_train, y_train, optimizer)
    test_loss, test_accuracy = evaluate(model, X_test, y_test)

    print('\n[EPOCH: {}], \t TEST LOSS: {:.4f}, \tTest Accuracy: {:.2f} %\n'.format(Epoch, test_loss, test_accuracy))

#MLP

In [None]:
class MLP(nn.Module):