In [85]:
import os 
import pandas as pd
import numpy as np

import torch
import torch.nn as nn 

from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

In [95]:
ratings_df = pd.read_csv('../epinions/rating.txt', '\t', header=None, names=['item','user','rating','status',
                                                                'creation','last_modified','type','vertical_id'])
social_df = pd.read_csv('../epinions/user_rating.txt', '\t', header=None, names=['user','target_user','trust_value','creation'])

  ratings_df = pd.read_csv('../epinions/rating.txt', '\t', header=None, names=['item','user','rating','status',
  social_df = pd.read_csv('../epinions/user_rating.txt', '\t', header=None, names=['user','target_user','trust_value','creation'])


In [96]:
ratings_df = ratings_df[['item','user','rating']]
user2idx = {user:idx for idx, user in enumerate(ratings_df['user'].unique())}
idx2user = {idx:user for idx, user in enumerate(ratings_df['user'].unique())}
item2idx = {item:idx for idx, item in enumerate(ratings_df['item'].unique())}
idx2item = {idx:item for idx, item in enumerate(ratings_df['item'].unique())}
ratings_df['user'] = ratings_df['user'].map(user2idx)
ratings_df['item'] = ratings_df['item'].map(item2idx)
ratings_df['rating'] = ratings_df['rating'].apply(lambda x: (x-1)/4)
ratings_df.head()

Unnamed: 0,item,user,rating
0,0,0,1.0
1,0,1,1.0
2,0,2,1.0
3,0,3,1.0
4,0,4,1.0


In [97]:
ratings_df = ratings_df.loc[(ratings_df['user'] < 5000) & (ratings_df['item'] < 5000)]
len(ratings_df)

99012

In [98]:
social_df = social_df[['user','target_user','trust_value']]
social_df['user'] = social_df['user'].map(user2idx)
social_df['target_user'] = social_df['target_user'].map(user2idx)
social_df['trust_value'] = social_df['trust_value'].apply(lambda x: 1 if x > 0 else 0)
social_df = social_df.loc[(social_df['user'] < 5000) & (social_df['target_user'] < 5000)]
len(social_df)
social_df.head()

Unnamed: 0,user,target_user,trust_value
91,4953.0,21.0,1
103,2140.0,148.0,1
134,2787.0,2785.0,1
156,2788.0,2785.0,1
170,3493.0,3492.0,1


In [90]:
class Rating(Dataset):
    def __init__(self, df):
        self.df = df
        self.users = self.df['user'].to_numpy(dtype=np.int64)
        self.items = self.df['item'].to_numpy(dtype=np.int64)
        self.ratings = self.df['rating'].to_numpy(dtype=np.int64)
        
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        user = self.users[index]
        item = self.items[index]
        rating = self.ratings[index]
        
        return user, item, rating
    
class Social(Dataset):
    def __init__(self, df):
        self.df = df
        self.users = self.df['user'].to_numpy(dtype=np.int64)
        self.target_users = self.df['target_user'].to_numpy(dtype=np.int64)
        self.trust_values = self.df['trust_value'].to_numpy(dtype=np.int64)
        
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        user = self.users[index]
        target_user = self.target_users[index]
        trust_value = self.trust_values[index]
        
        return user, target_user, trust_value

In [91]:
class SoRec(nn.Module):
    def __init__(self, num_users, num_items, num_factors):
        super(SoRec, self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.num_factors = num_factors
        
        self.user_embeddings = nn.Embedding(num_users, num_factors)
        self.item_embeddings = nn.Embedding(num_items, num_factors)
        self.social_embeddings = nn.Embedding(num_users, num_factors)
        
        self.user_embeddings.weight.data.normal_(0,1)
        self.item_embeddings.weight.data.normal_(0,1)
        
    def rating(self, user, item):
        user_embedding = self.user_embeddings(user)
        item_embedding = self.item_embeddings(item)
        
        return torch.sigmoid(torch.sum(user_embedding * item_embedding, dim=1))
    
    def social(self, user, target_user):
        user_embedding = self.user_embeddings(user)
        target_user_embedding = self.social_embeddings(target_user)
        
        return torch.sigmoid(torch.sum(user_embedding * target_user_embedding, dim=1))

In [92]:
def train(model, rating_loader, social_loader, test_loader, criterion, optimizer, device, epochs, alpha):
    model.train()
    
    for epoch in range(epochs):
        
        rating_loss = 0
        social_loss = 0
        test_loss = 0

        print(f'Epoch {epoch+1}')
        
        for user, item, rating in rating_loader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            pred = model.rating(user, item)
            loss = criterion(pred, rating)
            optimizer.zero_grad()
            
            loss.backward()
            optimizer.step()
            
            rating_loss += loss.item()
        
        rating_loss = rating_loss / len(rating_loader)
        
        
        for user, target_user, trust_value in social_loader:
            user, target_user, trust_value = user.to(device), target_user.to(device), trust_value.to(device)
            pred = model.social(user, target_user)
            loss = criterion(pred, trust_value)
            loss = loss * alpha
            optimizer.zero_grad()

            loss.backward()
            optimizer.step()
            
            social_loss += loss.item()
            
        social_loss = social_loss / len(social_loader)

        
        with torch.no_grad():
            for user, item, rating in test_loader:
                user, item, rating = user.to(device), item.to(device), rating.to(device)
                pred = model.rating(user, item)
                loss = criterion(pred, rating)
                
                test_loss += loss.item()
                
        test_loss = test_loss / len(test_loader)

        print(f'Epoch: {epoch}, Rating Loss: {rating_loss:.4f}, Social Loss: {social_loss:.4f}, Test Loss: {test_loss:.4f}')
            

In [99]:
train_rating, test_rating = train_test_split(ratings_df, test_size=0.2, random_state=42)
train_dataset, test_dataset = Rating(train_rating), Rating(test_rating)
social_dataset = Social(social_df)

train_dataloader, test_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True), DataLoader(test_dataset, batch_size=64, shuffle=True)
social_dataloader = DataLoader(social_dataset, batch_size=64, shuffle=True)

In [82]:
def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2) + 1e-6)

In [83]:
model = SoRec(5000, 5000, 32)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = RMSELoss

In [None]:
train(model, train_dataloader, social_dataloader, test_dataloader, criterion, optimizer, device, 10, 0.3)

In [84]:
model.train()

for epoch in range(10):
    
    rating_loss = 0
    social_loss = 0
    test_loss = 0

    print(f'Epoch {epoch+1}')
    
    for user, item, rating in train_dataloader:
        user, item, rating = user.to(device), item.to(device), rating.to(device)
        pred = model.rating(user, item)
        loss = criterion(pred, rating)
        optimizer.zero_grad()
        
        loss.backward()
        optimizer.step()
        
        rating_loss += loss.item()
    
    rating_loss = rating_loss / len(train_dataloader)
    
    
    for user, target_user, trust_value in social_dataloader:
        user, target_user, trust_value = user.to(device), target_user.to(device), trust_value.to(device)
        pred = model.social(user, target_user)
        loss = criterion(pred, trust_value)
        loss = loss * 0.3
        optimizer.zero_grad()

        loss.backward()
        optimizer.step()
        
        social_loss += loss.item()
        
    social_loss = social_loss / len(social_dataloader)

    
    with torch.no_grad():
        for user, item, rating in test_dataloader:
            user, item, rating = user.to(device), item.to(device), rating.to(device)
            pred = model.rating(user, item)
            loss = criterion(pred, rating)
            
            test_loss += loss.item()
            
    test_loss = test_loss / len(test_dataloader)

    print(f'Epoch: {epoch}, Rating Loss: {rating_loss:.4f}, Social Loss: {social_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 1
Epoch: 0, Rating Loss: 0.6566, Social Loss: 0.1965, Test Loss: 0.6582
Epoch 2
Epoch: 1, Rating Loss: 0.6304, Social Loss: 0.1891, Test Loss: 0.6571
Epoch 3
Epoch: 2, Rating Loss: 0.5942, Social Loss: 0.1814, Test Loss: 0.6560
Epoch 4
Epoch: 3, Rating Loss: 0.5625, Social Loss: 0.1746, Test Loss: 0.6553
Epoch 5
Epoch: 4, Rating Loss: 0.5347, Social Loss: 0.1682, Test Loss: 0.6531
Epoch 6
Epoch: 5, Rating Loss: 0.5097, Social Loss: 0.1621, Test Loss: 0.6498
Epoch 7
Epoch: 6, Rating Loss: 0.4863, Social Loss: 0.1562, Test Loss: 0.6453
Epoch 8
Epoch: 7, Rating Loss: 0.4641, Social Loss: 0.1501, Test Loss: 0.6386
Epoch 9
Epoch: 8, Rating Loss: 0.4423, Social Loss: 0.1437, Test Loss: 0.6296
Epoch 10
Epoch: 9, Rating Loss: 0.4199, Social Loss: 0.1366, Test Loss: 0.6186
