In [1]:
# we have already stored this in seperate file
import torch
from torch.utils.data import Dataset


class RatingsTrainDataset(Dataset):

    def __init__(self, ratings, all_product_ids):
        self.users, self.items, self.labels = self.get_dataset(ratings, all_product_ids)

    def __len__(self):
        return len(self.users)
  
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

    def get_dataset(self, ratings, all_product_ids):
        users, items, labels = [], [], []
        user_item_set = set(zip(ratings['user_id_int'], ratings['product_id_int']))

        num_negatives = 4
        for u, i in user_item_set:
            users.append(u)
            items.append(i)
            labels.append(1)
            for _ in range(num_negatives):
                negative_item = np.random.choice(all_product_ids)
                while (u, negative_item) in user_item_set:
                    negative_item = np.random.choice(all_product_ids)
                users.append(u)
                items.append(negative_item)
                labels.append(0)
        return torch.tensor(users), torch.tensor(items), torch.tensor(labels)

In [2]:
# we have already stored this in seperate file
import torch.nn as nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader

class NCF(pl.LightningModule):
    
    def __init__(self, num_users, num_items, ratings, all_product_ids):
        super().__init__()
        self.user_embedding = nn.Embedding(num_embeddings=num_users, embedding_dim=8)
        self.item_embedding = nn.Embedding(num_embeddings=num_items, embedding_dim=8)
        self.fc1 = nn.Linear(in_features=16, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=32)
        self.output = nn.Linear(in_features=32, out_features=1)
        self.ratings = ratings
        self.all_product_ids = all_product_ids
        
    def forward(self, user_input, item_input):
        
        user_embedded = self.user_embedding(user_input)
        item_embedded = self.item_embedding(item_input)

        vector = torch.cat([user_embedded, item_embedded], dim=-1)

        vector = nn.ReLU()(self.fc1(vector))
        vector = nn.ReLU()(self.fc2(vector))

        pred = nn.Sigmoid()(self.output(vector))

        return pred
    
    def training_step(self, batch, batch_idx):
        user_input, item_input, labels = batch
        predicted_labels = self(user_input, item_input)
        loss = nn.BCELoss()(predicted_labels, labels.view(-1, 1).float())
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters())

    def train_dataloader(self):
        return DataLoader(RatingsTrainDataset(self.ratings, self.all_product_ids),
                          batch_size=512, num_workers=0)

In [3]:
# we have already stored this in seperate file or we can create a seperate class that will contains all this 3 methods

import os
import pickle
import pandas as pd
def get_content_based_recommendations(product_id, company_id, offset, limit):
    if not os.path.exists(company_id):
        return []
    
    product_mapping_file = open(f'{company_id}/product_mapping.pkl', 'rb')
    product_mapping = pickle.load(product_mapping_file)
    
    if product_id not in product_mapping.keys():
        return []
    else:
        model_file = open(f'{company_id}/content_based_model.pkl', 'rb')
        model = pickle.load(model_file)

        product_ids_file = open(f'{company_id}/reverse_product_mapping.pkl', 'rb')
        reverse_product_mapping = pickle.load(product_ids_file)
        product_id = product_mapping[product_id]

        indices = model['series']
        cosine_sim = model['cosine_sim']
        product_ids = pd.Series(reverse_product_mapping.keys(), index=range(len(reverse_product_mapping.keys())))
        idx = indices[product_id]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)
        if len(sim_scores) < offset+1:
            return []
        sim_scores = sim_scores[offset+1 : offset+1+limit if offset+1+limit <= len(sim_scores) else len(sim_scores)]
        product_indices = [i[0] for i in sim_scores]
        return product_ids.iloc[product_indices].apply(lambda x : reverse_product_mapping.get(x)).tolist()

In [6]:
import pandas as pd
import numpy as np
import pickle
import torch
import os

def get_hybrid_recommendations(user_id, product_id, company_id, offset, limit):
    if not os.path.exists(company_id):
        return []
    
    original_user_file = open(f'{company_id}/user_mapping.pkl', 'rb')
    original_user_ids = pickle.load(original_user_file)
    
    if user_id not in original_user_ids.keys():
        return get_content_based_recommendations(product_id, company_id, offset, limit);
    else:
        product_mapping_file = open(f'{company_id}/product_mapping.pkl', 'rb')
        product_mapping = pickle.load(product_mapping_file)
        model_file = open(f'{company_id}/content_based_model.pkl', 'rb')
        model = pickle.load(model_file)

        product_ids_file = open(f'{company_id}/reverse_product_mapping.pkl', 'rb')
        reverse_product_mapping = pickle.load(product_ids_file)
        product_id = product_mapping[product_id]

        indices = model['series']
        cosine_sim = model['cosine_sim']
        product_ids = pd.Series(reverse_product_mapping.keys(), index=range(len(reverse_product_mapping.keys())))
        idx = indices[product_id]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)
        product_indices = [i[0] for i in sim_scores]
        product_ids = product_ids.iloc[product_indices].tolist()
        all_product_ids = np.array(product_ids[0 : 1000 if 1000 <= len(product_ids) else len(product_ids)])
    
        product_ids_file = open(f'{company_id}/reverse_product_mapping.pkl', 'rb')
        reverse_product_mapping = pickle.load(product_ids_file)
        model_file = open(f'{company_id}/collaborative_filtering_model.pkl', 'rb')
        model_data = pickle.load(model_file)
        user_id = original_user_ids[user_id]
        model = model_data['model']
        user_interacted_items = model_data['interacted_items']
        interacted_items = user_interacted_items[user_id]
        not_interacted_items = list(set(all_product_ids) - set(interacted_items))
        predicted_labels = np.squeeze(model(torch.tensor([user_id]*len(not_interacted_items)), torch.tensor(not_interacted_items)).detach().numpy())
        items = [not_interacted_items[i] for i in np.argsort(predicted_labels)[::-1].tolist()]
        if len(items) < offset+1:
            return []
        return [reverse_product_mapping.get(x) for x in items[offset : offset+limit if offset+limit <= len(items) else len(items)]]

In [7]:
get_hybrid_recommendations('A3J3BRHTDRFJ2G', 'B00005TQ09', 'xjkgkjshl', 0, 100)

['B00BGGDVOO',
 'B0002L5R78',
 'B007BJHETS',
 'B0079UAT0A',
 'B002BH3I9U',
 'B00007M1TZ',
 'B00825BZUY',
 'B009D79VH4',
 'B00884WH74',
 'B003ES5ZR8',
 'B005U0M9B8',
 'B005NGKR54',
 'B001196H3S',
 'B0097CZHAU',
 'B00066FH1U',
 'B0007Y794O',
 'B005H3Q57M',
 'B000UMX7FI',
 'B001WM73P0',
 'B004R7A9NU',
 'B000067O5G',
 'B005KSAG3S',
 'B00FNPD1OY',
 'B008U3038I',
 'B002W3IXZW',
 'B004RFBIUU',
 'B003MQWN40',
 'B0066636AS',
 'B00F3SOHNU',
 'B008X9ZBVI',
 'B0008D76L0',
 'B00ASLSQHK',
 'B008LURQ76',
 'B000BV8604',
 'B003STVG80',
 'B001H0BA24',
 'B001NS828K',
 'B0056YNA1Q',
 'B0019CSVMW',
 'B005QFH86S',
 'B00HVT27B8',
 'B0036WTDHK',
 'B005OFFH5Y',
 'B000EMWBV0',
 'B0001D3K8A',
 'B001TICH08',
 'B00568BV68',
 'B00BCGRX9M',
 'B0009JB7GI',
 'B002XVYZ82',
 'B007B5WHTE',
 'B004QK8FBG',
 'B000EXR0SI',
 'B001A5FH9S',
 'B008X9Z7N0',
 'B007B6YPAM',
 'B0002SQ0A4',
 'B001F6TXME',
 'B00IVPU7DG',
 'B000NB05MO',
 'B009924TSY',
 'B000VW2QRM',
 'B007RESFYK',
 'B00HQ883QW',
 'B002J46IYW',
 'B001FN3ZRQ',
 'B004J8HW