<a href="https://colab.research.google.com/github/meysamvarasteh92/hybrid_recsys/blob/main/hybrid_recsys.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from time import time
from time import time
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from google.colab import drive
drive.mount('/content/drive')
#tf.compat.v1.disable_eager_execution()
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization


In [None]:
%%capture
loss_fn = keras.losses.SparseCategoricalCrossentropy(
    reduction=tf.keras.losses.Reduction.NONE
)
loss_tracker = tf.keras.metrics.Mean(name="loss")
class MaskedLanguageModel(tf.keras.Model):
    def train_step(self, inputs):
        if len(inputs) == 3:
            features, labels, sample_weight = inputs
        else:
            features, labels = inputs
            sample_weight = None

        with tf.GradientTape() as tape:
            predictions = self(features, training=True)
            loss = loss_fn(labels, predictions, sample_weight=sample_weight)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Compute our own metrics
        loss_tracker.update_state(loss, sample_weight=sample_weight)

        # Return a dict mapping metric names to current value
        return {"loss": loss_tracker.result()}

    @property
    def metrics(self):
        # We list our `Metric` objects here so that `reset_states()` can be
        # called automatically at the start of each epoch
        # or at the start of `evaluate()`.
        # If you don't implement this property, you have to call
        # `reset_states()` yourself at the time of your choosing.
        return [loss_tracker]


In [None]:
%%capture
all_data = []
with open("/content/drive/My Drive/dataset.txt","r",errors="ignore") as f:
        all_data = f.readlines()


def encode(texts):
    encoded_texts = vectorize_layer(texts)
    return encoded_texts.numpy()

def get_vectorize_layer(texts, vocab_size, max_seq, special_tokens=["[MASK]"]):
    """Build Text vectorization layer

    Args:
      texts (list): List of string i.e input texts
      vocab_size (int): vocab size
      max_seq (int): Maximum sequence lenght.
      special_tokens (list, optional): List of special tokens. Defaults to ['[MASK]'].

    Returns:
        layers.Layer: Return TextVectorization Keras Layer
    """
    vectorize_layer = TextVectorization(
        max_tokens=vocab_size,
        output_mode="int",
        standardize=None,
        output_sequence_length=max_seq,
    )
    vectorize_layer.adapt(texts)

    # Insert mask token in vocabulary
    vocab = vectorize_layer.get_vocabulary()
    vocab = vocab[2: vocab_size - len(special_tokens)] + ["[mask]"]
    vectorize_layer.set_vocabulary(vocab)
    return vectorize_layer


vectorize_layer = get_vectorize_layer(
    all_data,
    3500,
    100,
    special_tokens=["[mask]"],
)

In [None]:
class parse_args():
    path='/content/drive/My Drive/DATASET/'
    save_dir='/content/drive/My Drive/DATASET/'
    train_ratio=8
    valid_ratio=1
    test_ratio=1
    verbose=10
    batch_size=256
    max_length=300
    vocab_size=8000
    num_negatives=4
    embed_size_item=200
    embed_size_user=100
    filter_sizes=[3,4,5]
    filter_num=100
    middle_dim=200
    dropout_keep_prob=0.6
    output_dim=50
    lr_net=1e-4
    train_loss=1
    trainable_flag=1
    num_epochs=301
    weight_size=50

In [None]:
import numpy as np
import random
import sys
import os
from sklearn.feature_extraction.text import TfidfVectorizer
import tensorflow as tf

             
class Dataset():
    
    
    def __init__(self,args):
        self.max_length=args.max_length
        self.vocab_size=args.vocab_size
        self.embed_size_item=args.embed_size_item
        self.batch_size=args.batch_size
        self.split_raw_data(args.path+"ml-1m_ratings.dat",args.save_dir,args.train_ratio,args.valid_ratio,args.test_ratio)
        self.train_tuple_list,self.valid_tuple_list,self.test_tuple_list,self.X_sequence,self.max_len_item,self.max_len_user,self.user_list,self.item_list,self.history,self.history_lab,self.pre_init=self. preprocessing_data(args.path)
        self.feed_list_train,self.feed_list_valid,self.feed_list_test=self.generate_batch()
        
        
    def split_raw_data(self,data_path, save_dir, train_ratio, valid_ratio, test_ratio):
        ### split data to training, validation and test set
         '''
         plain text data in the format of userID::itemID::rating
         each user and each item will have at least one instance in the training data
         :param save_dir: directory for saving processed data
         '''

         if not os.path.exists(data_path):
             sys.exit('invalid path for loading data')
         else:
             print( 'start processing raw data')

             # process rating and review
         all_tuple_list = []
         user2item = {}
         item2user = {}
         user2item2line = {}
         with open(data_path, 'r', errors='ignore') as f:
              for line in f.readlines():
                  content = line.strip().split('::')
                  u = content[0]
                  i = content[1]
                  all_tuple_list.append((u, i))
      
                  if u in user2item:
                      user2item[u].append(i)
                  else:
                      user2item[u] = [i]
                  if i in item2user:
                      item2user[i].append(u)
                  else:
                      item2user[i] = [u]
      
                  if u in user2item2line:
                      user2item2line[u][i] = line
                  else:
                      user2item2line[u] = {i: line}
         f.close()
         # split rating data
         train_set = set()
         for (u, item_list) in user2item.items():
             i = random.choice(item_list)
             train_set.add((u, i))
         for (i, user_list) in item2user.items():
              u = random.choice(user_list)
              train_set.add((u, i))
      
         total_num = len(all_tuple_list)
         train_num = int(train_ratio / (train_ratio + valid_ratio + test_ratio) * total_num)
         valid_num = int(valid_ratio / (train_ratio + valid_ratio + test_ratio) * total_num)
      
         while len(train_set) < train_num:
             train_set.add(random.choice(all_tuple_list))
         remains_list = list(set(all_tuple_list) - train_set)
      
         valid_set = set()
         while len(valid_set) < valid_num:
              valid_set.add(random.choice(remains_list))
         test_set = set(remains_list) - valid_set
         def write_to_file(save_path,data_set):
             with open(save_path, 'w', encoding='utf-8', errors='ignore') as f:
                  for (u, i) in data_set:
                      line = user2item2line[u][i].strip()
                      content = line.split('::')
                      new_content = '::'.join(content[2])
                      f.write(u + '::' + i + '::' + new_content + '\n')
             
             
         # save data
         if not os.path.exists(save_dir):
             os.makedirs(save_dir)   
         print('writing rating data to ' + save_dir)
         write_to_file(save_dir + 'train', train_set)
         write_to_file(save_dir + 'valid', valid_set)
         write_to_file(save_dir + 'test', test_set)

    
    
    def preprocessing_data(self,path):
        
        ### find the items that have description or comment (IMDB dataset)
        def document_id(document_name):
            with open (document_name,"r") as f:
                item_plot_id=set()
                all_lines=f.read().splitlines()
                for line in all_lines:
                    tmp=line.split('::')
                    item=tmp[0]
                    item_plot_id.add(item)
            f.close()
            return item_plot_id   
        
        
        ## capture history of data (the items that user has seen)
        def history_of_data(self,filename):
            with open(filename,'r') as f:
                hist={}
                hist_lab={}
                for line in f.readlines():
                    content=line.strip().split('::')
                    u=content[0]
                    i=content[1]
                    r=float(content[2])
                    if (i in self.item_plot_id):
                        if (self.user_dict[u] in hist ):
                            hist[self.user_dict[u]].append(self.item_dict[i])
                            hist_lab[self.user_dict[u]].append(r)

                        else:
                            hist[self.user_dict[u]]=[self.item_dict[i]]
                            hist_lab[self.user_dict[u]]=[r]

                
            return hist,hist_lab
                
        def user_item_id(self,filename):
            
            ### id of items that have ratings and document and construct dictionary of users and items 
            user_set = set()
            item_set = set()
            with open(filename, 'r', errors='ignore') as f:
                for line in f.readlines():
                    content = line.strip().split('::')
                    user_set.add(content[0])
                    if (content[1] in self.item_plot_id):
                        item_set.add(content[1])
            #max_idx_user=int(max(user_set))
            #max_idx_item=int(max(item_set))
            user_list=list(user_set)
            item_list=list(item_set)
            user_dict={ old:new for new,old in enumerate(user_list)}
            item_dict={ old:new for new,old in enumerate(item_list)}
            return user_dict,item_dict,user_list,item_list
        
        
        def read_from_file(self,filename,max_rating,min_rating):
            tuple_list = []
            with open(filename, 'r', errors='ignore') as f:
                for line in f.readlines():
                    content = line.strip().split('::')
                    u = content[0]
                    i =content[1]
                    r = float(content[2])
                    if max_rating < r:
                        max_rating = r
                    if min_rating > r:
                        min_rating = r
                    if (i in self.item_plot_id):
                        u_=self.user_dict[u]
                        i_=self.item_dict[i]
                        tuple_list.append((u_, i_, r))
            return tuple_list, max_rating, min_rating
                           
    
    
        
                
        ### split training, validation and test set 
        max_rating = -1
        min_rating = 1e10
        self.item_plot_id=document_id(path+"ml_plot.dat")
        self.user_dict,self.item_dict,user_list,self.item_list=user_item_id(self,path+"ml-1m_ratings.dat")
        train_tuple_list,max_r,min_r=read_from_file(self,path+"train",max_rating,min_rating)
        valid_tuple_list,max_r,min_r=read_from_file(self,path+"valid",max_rating,min_rating)
        test_tuple_list,max_r,min_r=read_from_file(self,path+"test",max_rating,min_rating)
        self.history_,self.history_lab_=history_of_data(self,path+"ml-1m_ratings.dat")   #### history of label, item and position or location of items 
        
        
        def document_plot(self,document_name):
        ####### construct document matrix (review of users )
            with open(document_name,"r") as f:
                map_idtoplot = {}
                all_lines=f.read().splitlines()
                for line in all_lines:
                    tmp=line.split('::')
                    index=tmp[0]
                    if (index in self.item_list):
                        i=self.item_dict[index]
                        plot=tmp[1].split('|')
                        eachid_plot = (' '.join(plot)).split()[:self.max_length]
                        map_idtoplot[i] = ' '.join(eachid_plot)
                            
                Raw_X = [map_idtoplot[i] for i in map_idtoplot.keys()]
                vectorizer = TfidfVectorizer(max_df=0.5, stop_words={
                                             'english'}, max_features=self.vocab_size)
                vectorizer.fit(Raw_X)
                vocab=vectorizer.vocabulary_
                X_sequence = {}
                map_idtoplot_list=[]
                for i in map_idtoplot.keys():
                    X_sequence[i]=[vocab[word]  for word in map_idtoplot[i].split() if word in vocab]
                f.close()
            return X_sequence,vocab
        
        
        X_sequence,vocab=document_plot(self,path+"ml_plot.dat")

        ### pre-init vocab with GLOVE
        def pre_init_glove(self,vocab,filename):
            embeddings_index = dict()
            with open (filename,"r",errors = 'ignore', encoding='utf8') as f:
                 for i,line in enumerate(f):
                     values=line.split()
                     word=values[0]
                     coefs = np.asarray(values[1:], dtype='float32') 
                     embeddings_index[word] = coefs
                 f.close()
                 embedding_matrix = np.zeros((self.vocab_size, self.embed_size_item))
                 for word, i in vocab.items():
                     embedding_vector = embeddings_index.get(word)
                     if embedding_vector is not None:
                         embedding_matrix[i] = embedding_vector


            return embedding_matrix
        
        pre_init=pre_init_glove(self,vocab,path+"glove.6B.200d.txt")
        
        ##### masking of user and items in order to have equal length (vector or matrix)
        def masking(self,flows,max_size,choice):
            if choice=='item':
                max_=max(flows,key=lambda k: len(flows[k]))   
                self.max_len_item=len(flows[max_])
                for i in flows.keys():
                    flows[i]=flows[i]+[str(max_size+1)]*(self.max_len_item-len(flows[i]))
                
                return flows
            
            elif choice=='user':
                max_=max(flows,key=lambda k: len(flows[k]))  
                self.max_len_user=100
                for i in flows.keys():
                    if(self.max_len_user>len(flows[i])):
                        #flows[i]=flows[i]+[str(max_size+1)]*(self.max_len_user-len(flows[i]))
                        flows[i]=flows[i]+['0']*(self.max_len_user-len(flows[i]))
                    else:
                        flows[i]=flows[i][:self.max_len_user]

                return flows
                
               
        
        
        X_sequence_=masking(self,X_sequence,self.vocab_size,'item')
        self.history=masking(self,self.history_,int(len(self.item_list)),'user')
        self.history_lab=masking(self,self.history_lab_,5,'user')
        return train_tuple_list,valid_tuple_list,test_tuple_list,X_sequence_,self.max_len_item,self.max_len_user,user_list,self.item_list,self.history,self.history_lab,pre_init



    
        
        
        def get_train_batch(self):
            feed_list_train=[]
            num_batches=len(self.train_tuple_list)//self.batch_size
            for i in range(num_batches):
                dict_train= get_single_train_batch(self,self.train_tuple_list,i)
                feed_list_train.append(dict_train)
            
            return feed_list_train
        

        def get_single_test_and_valid_batch(self,dataset,l):
          user_batch_, item_batch_,raring_batch_=[],[],[]
          begin=l*self.batch_size
          end=begin+self.batch_size
          for u,i,r in dataset[begin:end]:
              user_batch_.append(u)
              item_batch_.append(i)
              raring_batch_.append(r)
            ### item section
          input_seq_item_test=np.zeros([len(item_batch_),self.max_len_item])
          for i_,j_ in enumerate(item_batch_):
              input_seq_item_test[i_]=self.X_sequence[j_]
            ###user section 
          input_seq_user_test=np.zeros([len(user_batch_),self.max_len_user])
          for ii_,jj_ in enumerate(user_batch_):
              input_seq_user_test[ii_]=self.history[jj_]

          feed_dict={'user_input':input_seq_user_test_,'item_input':input_seq_item_test_,'label_input':input_seq_label_test_,'label':label}  
          return feed_dict
      
        
        def get_test_batch(self):
            feed_list_test=[]
            num_batches=len(self.test_tuple_list)//self.batch_size
            for i in range(num_batches):
                dict_test= get_single_test_and_valid_batch(self,self.test_tuple_list,i)
                feed_list_test.append(dict_test)
            
            return feed_list_test
        
        
        def get_valid_batch(self):
            feed_list_valid=[]
            num_batches=len(self.valid_tuple_list)//self.batch_size
            for i in range(num_batches):
                dict_valid= get_single_test_and_valid_batch(self,self.valid_tuple_list,i)
                feed_list_valid.append(dict_valid)
            
            return feed_list_valid

        feed_list_train=get_train_batch(self)
        fee_list_valid=get_valid_batch(self)
        fee_list_test=get_test_batch(self)

        
        return feed_list_train,fee_list_valid,fee_list_test

In [None]:
%%capture
args = parse_args()
dataset=Dataset(args)

In [None]:

class CMF:
    def __init__(self,args,dataset): 
        self.num_users=len(dataset.user_list)
        self.num_items=len(dataset.item_list)
        self.vocab_size=args.vocab_size
        self.filter_num=args.filter_num

    
    def _create_placeholders(self):
        with tf.name_scope("input_data"):
            self.user_input = tf.compat.v1.placeholder(tf.int32, shape=[None, None],name='user_input')	# the index of users
            self.item_input = tf.compat.v1.placeholder(tf.int32, shape=[None, None],name='item_input')	  #the index of documents
            self.label_input = tf.compat.v1.placeholder(tf.int32, shape=[None, None],name='label_input')	  #the index of labels
            self.label=tf.compat.v1.placeholder(tf.float32, shape=[None, 1],name='label')	  #the index of documents
            

    def _create_variables(self):  
        with tf.name_scope("embedding"):          # The embedding initialization is unknown now
            self.document_embedding = tf.Variable(self.pre_init,name='document_embedding', dtype=tf.float32,trainable=True)
            self.label_embedding=tf.Variable(tf.random.truncated_normal(shape=[6, self.embedding_size_user], mean=0.0, stddev=0.1),name='label_embedding1', dtype=tf.float32,trainable=True)

            ####  Variables for attention
            self.W_att = tf.Variable(tf.random.truncated_normal(shape=[self.embedding_size_user*2, self.weight_size], mean=0.0, stddev=tf.sqrt(tf.math.truediv(2.0, self.weight_size + self.embedding_size_user))),name='Weights_for_MLP', dtype=tf.float32, trainable=True)
            self.b_att = tf.Variable(tf.random.truncated_normal(shape=[1, self.weight_size], mean=0.0, stddev=tf.sqrt(tf.math.truediv(2.0, self.weight_size + self.embedding_size_user))),name='Bias_for_MLP', dtype=tf.float32, trainable=True)
            
            self.h_att = tf.Variable(tf.ones([self.weight_size, 1]), name='H_for_MLP', dtype=tf.float32,trainable=True)
           
   
    
    def _create_inference(self):
        with tf.compat.v1.variable_scope("inference",reuse=tf.compat.v1.AUTO_REUSE):
            self.item_look_up=tf.nn.embedding_lookup(self.document_embedding,self.item_input)    ### (batch_size,length_of_the_words,embedding_size)
            self.label_look_up=tf.nn.embedding_lookup(self.label_embedding,self.label_input)       ### (batch_size,1,embedding_size)
            self.item_look_up=tf.expand_dims(self.item_look_up,-1)                               ## (batch_size,length_of_the_words,embedding_size,1)
            
            
            # convolution

            pooled_outputs = []
            for filter_size in self.filter_sizes:
                with tf.name_scope(name='conv-maxpool-{}'.format(filter_size)):
                    filter_shape = [filter_size, self.embedding_size_item, 1, self.filter_num]
                    self.W = tf.compat.v1.get_variable(name='conv-W{}'.format(filter_size),shape= filter_shape)  # xavier_initializer by default
                    self.b = tf.compat.v1.get_variable(name='conv-b{}'.format(filter_size), shape=[self.filter_num], initializer=tf.constant_initializer(0.1))
                    conv = tf.nn.conv2d(input=self.item_look_up, filters=self.W, strides=[1, 1, 1,1], padding='VALID')
                    h = tf.nn.relu(conv + self.b)
                    pooled = tf.nn.max_pool(input= h, ksize=[1, self.max_seq - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID')  # (batch_size, 1, 1, filter_num)
                    pooled_outputs.append(pooled)
                 


            # concatenate results of max pooling
            filter_total_num = self.filter_num * len(self.filter_sizes)
            h_pool = tf.concat(values=pooled_outputs, axis=-1)
            h_pool_flat = tf.reshape(tensor=h_pool, shape=[-1, filter_total_num])
            
            
            
            
            # fully connected layers
            hidden_item = tf.compat.v1.layers.dense(inputs=h_pool_flat, units=self.middle_dim, activation=tf.nn.relu,
                                 kernel_initializer=tf.compat.v1.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                 bias_initializer=tf.constant_initializer(0.1))

            dropped_item = tf.nn.dropout(hidden_item, self.dropout_keep_prob)
            self.output_vec_item =tf.compat.v1.layers.dense(inputs=dropped_item, units=self.output_dim, activation=tf.nn.relu,
                                          kernel_initializer=tf.compat.v1.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                         bias_initializer=tf.constant_initializer(0.1))
            
            
            ### fully connected layers for user section
            self.output_vec_user=self.bert_end_to_end()
            input_MLP=tf.concat([self.output_vec_item,self.output_vec_user],axis=-1)
            hidden1=tf.compat.v1.layers.dense(inputs=input_MLP,units=100,activation=tf.nn.relu,
                                          kernel_initializer=tf.compat.v1.truncated_normal_initializer(mean=0.0, stddev=0.1),bias_initializer=tf.constant_initializer(0.1))
            hidden2=tf.compat.v1.layers.dense(inputs=hidden1,units=50,activation=tf.nn.relu,
                                          kernel_initializer=tf.compat.v1.truncated_normal_initializer(mean=0.0, stddev=0.1),bias_initializer=tf.constant_initializer(0.1))
            self.output=tf.compat.v1.layers.dense(inputs=hidden2,units=1,activation=tf.nn.relu,
                                          kernel_initializer=tf.compat.v1.truncated_normal_initializer(mean=0.0, stddev=0.1),
                                        bias_initializer=tf.constant_initializer(0.1))
          
    def _bert(self):      
      mlm_model = keras.models.load_model("/content/drive/My Drive/bert_mlm_v2.h5", custom_objects={"MaskedLanguageModel": MaskedLanguageModel})
      pretrained_bert_model = tf.keras.Model(mlm_model.input, mlm_model.get_layer("encoder_0/ffn_layernormalization").output )
      return pretrained_bert_model
    
    def bert_end_to_end(self):
      inputs=self.user_input
      sequence_output = self._bert()(inputs)
      pooled_output = layers.GlobalMaxPooling1D()(sequence_output)
      hidden_layer = layers.Dense(64, activation="relu")(pooled_output)
      outputs = layers.Dense(50, activation="sigmoid")(hidden_layer)
      return outputs
    
    #### define attention layer
    def _attention_MLP(self,q):
          with tf.name_scope("attention_MLP"):
              b = tf.shape(q)[0]
              n = tf.shape(q)[1]
              MLP_output = tf.matmul(q, self.W_att) + self.b_att 
              MLP_output = tf.nn.relu( MLP_output )
              A_ = tf.reshape(tf.matmul(MLP_output, self.h_att),[b,n]) 
              exp_A_=tf.math.exp(A_)
              exp_sum=tf.math.reduce_sum(exp_A_,1,keepdims=True)
              A = tf.expand_dims(tf.math.truediv(exp_A_, exp_sum),2) # (b, n, 1)
              return tf.math.reduce_sum(A * self.user_look_up, 1)   



    ### define loss (RMSE)
    def _create_loss(self):
        with tf.name_scope("loss"):
             loss1=tf.keras.losses.MSE(self.label,self.output)
             self.loss=(tf.math.sqrt(tf.math.maximum(loss1,1e-8)))
                      
                                              

    ### define optimizer (Adam)
          
    def _create_optimizer(self):
        with tf.name_scope("optimizer"):
             self.optimizer=tf.compat.v1.train.AdamOptimizer(self.lr_net).minimize(self.loss)
             


             
    def build_graph(self):
       self._create_placeholders()
       self._create_variables()
       self._create_inference()
       self._create_loss()
       self._create_optimizer()
        


#### training network
def training(flag,model,data,num_epochs):
    
    with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
         sess.run(tf.compat.v1.global_variables_initializer())
         ### initialize for training batch
         batch_begin = time()
         feed_list=data.feed_list_train
         feed_list_test=data.feed_list_test
         batch_time = time() - batch_begin
         ### call  dataset as batch
         num_batches_train=len(feed_list)
         num_batches_test=len(feed_list_test)
         batch_index_train=range(num_batches_train)
         batch_index_test=range(num_batches_test)
         #### training loop
         for epoch_count in range(num_epochs):
             train_begin=time()
             training_batch(batch_index_train,model,sess,feed_list)
             train_time = time() - train_begin
             #### evaluate the moddel  with test data
             if epoch_count % model.verbose==0:
                 if model.train_loss:
                    loss_begin = time()
                    train_loss=training_loss(batch_index_train,model,sess,feed_list)
                    test_loss = testing_loss(batch_index_test,model,sess,feed_list_test)
                    loss_time = time() - loss_begin
                 else:
                    loss_time, train_loss = 0, 0 

                 print('epoch:',epoch_count,'loss_test:',np.mean(test_loss),'loss_train:',np.mean(train_loss))
                 
### training the model for eatch batch in 1 epoch
def training_batch(batch_index,model,sess,feed_list):
    for idx in (batch_index):
        feed_dict_=feed_list[idx]
        feed_dict={model.user_input:feed_dict_['user_input'],model.item_input:feed_dict_['item_input'],model.label_input:feed_dict_['label_input'],model.label:feed_dict_['label']}
        sess.run([model.loss,model.optimizer],feed_dict)

### compute loss of training data
def training_loss(batch_index,model,sess,feed_list):
    train_loss=0.0
    for idx in batch_index:
        feed_dict_=feed_list[idx]
        feed_dict={model.user_input:feed_dict_['user_input'],model.item_input:feed_dict_['item_input'],model.label_input:feed_dict_['label_input'],model.label:feed_dict_['label']}
        train_loss+=sess.run(model.loss,feed_dict)
    return train_loss/len(batch_index)


### compute loss of test data
def testing_loss(batch_index_test,model,sess,feed_list):
    test_loss=0.0
    for idx in batch_index_test:
        feed_dict_=feed_list[idx]
        feed_dict={model.user_input:feed_dict_['user_input'],model.item_input:feed_dict_['item_input'],model.label_input:feed_dict_['label_input'],model.label:feed_dict_['label']}
        test_loss+=sess.run(model.loss,feed_dict)
    return test_loss/len(batch_index_test)


In [None]:
if __name__=='__main__': 
    with tf.device('/GPU:0'):
        args = parse_args()
        dataset=Dataset(args)
        model=CMF(args,dataset)
        tf.compat.v1.disable_eager_execution()
        model.build_graph()
        training(flag=1, model, dataset,args.num_epochs)