In [55]:
import tensorflow as tf

In [56]:
import tensorflow.keras as keras

In [57]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
# import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

# Utils

In [58]:
import array
import collections

from typing import Dict, List, Optional, Text, Tuple

def _create_feature_dict() -> Dict[Text, List[tf.Tensor]]:
  return {"STOCKCODE": [], "RATING": [], "GICS": [], "STOCKNAME": [], "UNIX_TS": []}

def _sample_list(
    feature_lists: Dict[Text, List[tf.Tensor]],
    num_examples_per_list: int,
    random_state: Optional[np.random.RandomState] = None,
) -> Tuple[tf.Tensor, tf.Tensor]:
  """Function for sampling a list example from given feature lists."""
  if random_state is None:
    random_state = np.random.RandomState()

  sampled_indices = random_state.choice(
      range(len(feature_lists["STOCKCODE"])),
      size=num_examples_per_list,
      replace=False,
  )
  sampled_STOCKCODE = [
      feature_lists["STOCKCODE"][idx] for idx in sampled_indices
  ]
  sampled_RATING = [
      feature_lists["RATING"][idx]
      for idx in sampled_indices
  ]
  sampled_GICS = [
      feature_lists["GICS"][idx] for idx in sampled_indices
  ]
  sampled_STOCKNAME = [
      feature_lists["STOCKNAME"][idx]
      for idx in sampled_indices
  ]
  sampled_UNIX_TS = [
      feature_lists["UNIX_TS"][idx] for idx in sampled_indices
  ]

  return (
      tf.stack(sampled_STOCKCODE, 0),
      tf.stack(sampled_RATING, 0),
      tf.stack(sampled_GICS, 0),
      tf.stack(sampled_STOCKNAME, 0),
      tf.stack(sampled_UNIX_TS, 0)
  )


def sample_listwise(
    rating_dataset: tf.data.Dataset,
    num_list_per_user: int = 10,
    num_examples_per_list: int = 10,
    seed: Optional[int] = None,
) -> tf.data.Dataset:
  
  random_state = np.random.RandomState(seed)

  example_lists_by_user = collections.defaultdict(_create_feature_dict)

  movie_title_vocab = set()
  for example in rating_dataset:
    user_id = example["CDSACCNO"].numpy()
    example_lists_by_user[user_id]["STOCKCODE"].append(
        example["STOCKCODE"])
    example_lists_by_user[user_id]["RATING"].append(
        example["RATING"])
    example_lists_by_user[user_id]["GICS"].append(
        example["GICS"])
    example_lists_by_user[user_id]["STOCKNAME"].append(
        example["STOCKNAME"])
    example_lists_by_user[user_id]["UNIX_TS"].append(
        example["UNIX_TS"])
    
    movie_title_vocab.add(example["STOCKNAME"].numpy())

    

  tensor_slices = {"CDSACCNO": [], "STOCKCODE": [], "RATING": [], "GICS": [], "STOCKNAME": [], "UNIX_TS": []}

  for user_id, feature_lists in example_lists_by_user.items():
    for _ in range(num_list_per_user):

      # Drop the user if they don't have enough ratings.
      if len(feature_lists["STOCKNAME"]) < num_examples_per_list:
        continue

        '''sampled_STOCKCODE, 0),
      tf.stack(sampled_RATING, 0),
      tf.stack(sampled_GICS, 0),
      tf.stack(sampled_STOCKNAME, 0),
      tf.stack(sampled_UNIX_TS'''

      sampled_STOCKCODE, sampled_RATING, sampled_GICS, sampled_STOCKNAME, sampled_UNIX_TS  = _sample_list(
          feature_lists,
          num_examples_per_list,
          random_state=random_state,
      )
      tensor_slices["CDSACCNO"].append(user_id)
      tensor_slices["STOCKCODE"].append(sampled_STOCKCODE)
      tensor_slices["RATING"].append(sampled_RATING)
      tensor_slices["GICS"].append(sampled_GICS)
      tensor_slices["STOCKNAME"].append(sampled_STOCKNAME)
      tensor_slices["UNIX_TS"].append(sampled_UNIX_TS)

  return tf.data.Dataset.from_tensor_slices(tensor_slices)

# Work

In [59]:
portfolios = tf.data.Dataset.load("../../data/portfolios_tfds_lists")

In [60]:
train_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/train_lists").cache() #data\ratings_train
test_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/test_lists").cache()

In [61]:
train_v1 = sample_listwise(
    train_ds,
    num_list_per_user=50,
    num_examples_per_list=5,
    seed=42
)
test_v1 = sample_listwise(
    test_ds,
    num_list_per_user=1,
    num_examples_per_list=5,
    seed=42
)

In [62]:
next(iter(train_v1))

{'CDSACCNO': <tf.Tensor: shape=(), dtype=string, numpy=b'RPS-23479-LI/00'>,
 'STOCKCODE': <tf.Tensor: shape=(5,), dtype=string, numpy=array([b'CIC', b'AMSL', b'RAL', b'REG', b'AEL'], dtype=object)>,
 'RATING': <tf.Tensor: shape=(5,), dtype=float32, numpy=array([4., 2., 5., 5., 2.], dtype=float32)>,
 'GICS': <tf.Tensor: shape=(5,), dtype=string, numpy=
 array([b'Materials', b'Health Care Equipment & Services',
        b'Food Beverage & Tobacco', b'Consumer Durables & Apparel',
        b'Capital Goods'], dtype=object)>,
 'STOCKNAME': <tf.Tensor: shape=(5,), dtype=string, numpy=
 array([b'C I C HOLDINGS PLC', b'ASIRI SURGICAL HOSPITAL PLC',
        b'RENUKA AGRI FOODS PLC', b'REGNIS (LANKA) PLC',
        b'ACCESS ENGINEERING PLC'], dtype=object)>,
 'UNIX_TS': <tf.Tensor: shape=(5,), dtype=float32, numpy=
 array([1.6475418e+09, 1.6681050e+09, 1.7100954e+09, 1.7030970e+09,
        1.6655994e+09], dtype=float32)>}

In [63]:
next(iter(train_v1.take(1)))['STOCKCODE'].shape[0]

5

In [64]:
# train_v1.save("../../data/train_lists_ds")
# test_v1.save("../../data/test_lists_ds")

In [65]:
train_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/train").cache() #data\ratings_train
test_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/test").cache()
portfolios = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/portfolios_tfds").cache()

train_list_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/train_lists_ds").batch(64).cache()
test_list_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/test_lists_ds").batch(64).cache()

items_ids = portfolios.batch(10000).map(lambda x: x["STOCKCODE"])
item_names = portfolios.batch(10000).map(lambda x: x["STOCKNAME"])
item_GICS = portfolios.batch(10000).map(lambda x: x["GICS"])

user_ids = portfolios.batch(10000).map(lambda x: x["CDSACCNO"])

unique_item_ids = np.unique(np.concatenate(list(items_ids)))
unique_item_names = np.unique(np.concatenate(list(item_names)))
unique_item_gics = np.unique(np.concatenate(list(item_GICS)))

unique_user_ids = np.unique(np.concatenate(list(user_ids)))

In [66]:
    from item_embedding import ItemModel
    from user_embedding import UserModel
    
    item_model = ItemModel(
      unique_item_ids = unique_item_ids,
      unique_item_names = unique_item_names,
      unique_item_gics = unique_item_gics
    )

    user_model = UserModel(
      # use_timestamp = self.use_timestamp,
      unique_user_ids = unique_user_ids, 
      # timestamps = self.timestamps, 
      # timestamp_buckets = self.timestamp_buckets
    )

In [67]:
test_batch = next(iter(train_v1.batch(2)))
test_batch

{'CDSACCNO': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'RPS-23479-LI/00', b'RPS-23479-LI/00'], dtype=object)>,
 'STOCKCODE': <tf.Tensor: shape=(2, 5), dtype=string, numpy=
 array([[b'CIC', b'AMSL', b'RAL', b'REG', b'AEL'],
        [b'RICH', b'TAFL', b'ASIY', b'MHDL', b'HPWR']], dtype=object)>,
 'RATING': <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
 array([[4., 2., 5., 5., 2.],
        [3., 3., 2., 2., 3.]], dtype=float32)>,
 'GICS': <tf.Tensor: shape=(2, 5), dtype=string, numpy=
 array([[b'Materials', b'Health Care Equipment & Services',
         b'Food Beverage & Tobacco', b'Consumer Durables & Apparel',
         b'Capital Goods'],
        [b'Capital Goods', b'Food Beverage & Tobacco',
         b'Diversified Financials', b'Real Estate', b'Utilities']],
       dtype=object)>,
 'STOCKNAME': <tf.Tensor: shape=(2, 5), dtype=string, numpy=
 array([[b'C I C HOLDINGS PLC', b'ASIRI SURGICAL HOSPITAL PLC',
         b'RENUKA AGRI FOODS PLC', b'REGNIS (LANKA) PLC',
         b'ACCES

In [68]:
item_inp = (test_batch['STOCKCODE'], test_batch['GICS'])

In [69]:
item_ = item_model(item_inp)

item_

<tf.Tensor: shape=(2, 5, 16), dtype=float32, numpy=
array([[[ 0.02027139,  0.03421683,  0.02332293, -0.00800562,
          0.02147484, -0.04843592, -0.02278808, -0.0417578 ,
         -0.0095832 ,  0.03392471, -0.04290285, -0.00182943,
         -0.00256759, -0.01172252,  0.00864651,  0.01223781],
        [-0.01971546,  0.0445578 ,  0.00176143, -0.0092337 ,
         -0.0124269 , -0.04070895,  0.02197586, -0.03128083,
          0.00594584,  0.0028032 ,  0.00245892,  0.02495909,
         -0.01231412, -0.02232845,  0.01659507,  0.01471267],
        [ 0.04241122, -0.00896342, -0.01146943,  0.03083274,
         -0.03076559, -0.00294992, -0.04222196,  0.04735878,
         -0.0498374 ,  0.03867574,  0.01815366,  0.03661953,
          0.01398901,  0.03755877,  0.02769006, -0.02311521],
        [ 0.02542457,  0.03366946,  0.04267896, -0.00769471,
         -0.008756  , -0.02367683,  0.02326988, -0.00152274,
          0.04824715, -0.0225346 ,  0.01807832,  0.01734182,
         -0.03202378,  0.04491

In [70]:
user_inp = (test_batch['CDSACCNO'])
user_inp

<tf.Tensor: shape=(2,), dtype=string, numpy=array([b'RPS-23479-LI/00', b'RPS-23479-LI/00'], dtype=object)>

In [71]:
(x) = user_inp
x

<tf.Tensor: shape=(2,), dtype=string, numpy=array([b'RPS-23479-LI/00', b'RPS-23479-LI/00'], dtype=object)>

In [72]:
user_ = user_model(user_inp)
user_

<tf.Tensor: shape=(2, 32), dtype=float32, numpy=
array([[-0.01576972, -0.02980126,  0.03946653,  0.03699971,  0.0421758 ,
         0.01095795,  0.02795155, -0.04394399,  0.02154333,  0.02242194,
         0.04396934, -0.01271524,  0.01019721,  0.03456024, -0.02960328,
         0.04786277,  0.01620526,  0.02566824, -0.00571634, -0.02289718,
         0.02162603, -0.02583183, -0.04870712, -0.04304484,  0.04947359,
        -0.02751162,  0.01810508, -0.0017694 , -0.04887902, -0.01054118,
        -0.01974896,  0.02722856],
       [-0.01576972, -0.02980126,  0.03946653,  0.03699971,  0.0421758 ,
         0.01095795,  0.02795155, -0.04394399,  0.02154333,  0.02242194,
         0.04396934, -0.01271524,  0.01019721,  0.03456024, -0.02960328,
         0.04786277,  0.01620526,  0.02566824, -0.00571634, -0.02289718,
         0.02162603, -0.02583183, -0.04870712, -0.04304484,  0.04947359,
        -0.02751162,  0.01810508, -0.0017694 , -0.04887902, -0.01054118,
        -0.01974896,  0.02722856]], dtyp

In [73]:
list_length = test_batch['STOCKCODE'].shape[1]
list_length

5

In [74]:
tf.expand_dims(user_, 1)

<tf.Tensor: shape=(2, 1, 32), dtype=float32, numpy=
array([[[-0.01576972, -0.02980126,  0.03946653,  0.03699971,
          0.0421758 ,  0.01095795,  0.02795155, -0.04394399,
          0.02154333,  0.02242194,  0.04396934, -0.01271524,
          0.01019721,  0.03456024, -0.02960328,  0.04786277,
          0.01620526,  0.02566824, -0.00571634, -0.02289718,
          0.02162603, -0.02583183, -0.04870712, -0.04304484,
          0.04947359, -0.02751162,  0.01810508, -0.0017694 ,
         -0.04887902, -0.01054118, -0.01974896,  0.02722856]],

       [[-0.01576972, -0.02980126,  0.03946653,  0.03699971,
          0.0421758 ,  0.01095795,  0.02795155, -0.04394399,
          0.02154333,  0.02242194,  0.04396934, -0.01271524,
          0.01019721,  0.03456024, -0.02960328,  0.04786277,
          0.01620526,  0.02566824, -0.00571634, -0.02289718,
          0.02162603, -0.02583183, -0.04870712, -0.04304484,
          0.04947359, -0.02751162,  0.01810508, -0.0017694 ,
         -0.04887902, -0.01054

In [75]:
user_re = tf.repeat(
        tf.expand_dims(user_, 1), [list_length], axis=1)
user_re

<tf.Tensor: shape=(2, 5, 32), dtype=float32, numpy=
array([[[-0.01576972, -0.02980126,  0.03946653,  0.03699971,
          0.0421758 ,  0.01095795,  0.02795155, -0.04394399,
          0.02154333,  0.02242194,  0.04396934, -0.01271524,
          0.01019721,  0.03456024, -0.02960328,  0.04786277,
          0.01620526,  0.02566824, -0.00571634, -0.02289718,
          0.02162603, -0.02583183, -0.04870712, -0.04304484,
          0.04947359, -0.02751162,  0.01810508, -0.0017694 ,
         -0.04887902, -0.01054118, -0.01974896,  0.02722856],
        [-0.01576972, -0.02980126,  0.03946653,  0.03699971,
          0.0421758 ,  0.01095795,  0.02795155, -0.04394399,
          0.02154333,  0.02242194,  0.04396934, -0.01271524,
          0.01019721,  0.03456024, -0.02960328,  0.04786277,
          0.01620526,  0.02566824, -0.00571634, -0.02289718,
          0.02162603, -0.02583183, -0.04870712, -0.04304484,
          0.04947359, -0.02751162,  0.01810508, -0.0017694 ,
         -0.04887902, -0.0105411

In [76]:
user_re.shape, item_.shape

(TensorShape([2, 5, 32]), TensorShape([2, 5, 16]))

In [77]:
concatenated_embeddings = tf.concat([user_re, item_], 2)
concatenated_embeddings

<tf.Tensor: shape=(2, 5, 48), dtype=float32, numpy=
array([[[-0.01576972, -0.02980126,  0.03946653,  0.03699971,
          0.0421758 ,  0.01095795,  0.02795155, -0.04394399,
          0.02154333,  0.02242194,  0.04396934, -0.01271524,
          0.01019721,  0.03456024, -0.02960328,  0.04786277,
          0.01620526,  0.02566824, -0.00571634, -0.02289718,
          0.02162603, -0.02583183, -0.04870712, -0.04304484,
          0.04947359, -0.02751162,  0.01810508, -0.0017694 ,
         -0.04887902, -0.01054118, -0.01974896,  0.02722856,
          0.02027139,  0.03421683,  0.02332293, -0.00800562,
          0.02147484, -0.04843592, -0.02278808, -0.0417578 ,
         -0.0095832 ,  0.03392471, -0.04290285, -0.00182943,
         -0.00256759, -0.01172252,  0.00864651,  0.01223781],
        [-0.01576972, -0.02980126,  0.03946653,  0.03699971,
          0.0421758 ,  0.01095795,  0.02795155, -0.04394399,
          0.02154333,  0.02242194,  0.04396934, -0.01271524,
          0.01019721,  0.0345602

In [78]:
next(iter(train_v1))

{'CDSACCNO': <tf.Tensor: shape=(), dtype=string, numpy=b'RPS-23479-LI/00'>,
 'STOCKCODE': <tf.Tensor: shape=(5,), dtype=string, numpy=array([b'CIC', b'AMSL', b'RAL', b'REG', b'AEL'], dtype=object)>,
 'RATING': <tf.Tensor: shape=(5,), dtype=float32, numpy=array([4., 2., 5., 5., 2.], dtype=float32)>,
 'GICS': <tf.Tensor: shape=(5,), dtype=string, numpy=
 array([b'Materials', b'Health Care Equipment & Services',
        b'Food Beverage & Tobacco', b'Consumer Durables & Apparel',
        b'Capital Goods'], dtype=object)>,
 'STOCKNAME': <tf.Tensor: shape=(5,), dtype=string, numpy=
 array([b'C I C HOLDINGS PLC', b'ASIRI SURGICAL HOSPITAL PLC',
        b'RENUKA AGRI FOODS PLC', b'REGNIS (LANKA) PLC',
        b'ACCESS ENGINEERING PLC'], dtype=object)>,
 'UNIX_TS': <tf.Tensor: shape=(5,), dtype=float32, numpy=
 array([1.6475418e+09, 1.6681050e+09, 1.7100954e+09, 1.7030970e+09,
        1.6655994e+09], dtype=float32)>}

In [51]:
from recommender import Recommender

model = Recommender(
    # use_timestamp = True,
    portfolios = portfolios
    )

model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

train_v1 = train_v1.shuffle(10000).batch(16)

model.fit(
    train_v1, 
    epochs=20, 
    verbose = 1,
    )

Epoch 1/20
********** (1, None, None, None, None, 32) **********


TypeError: in user code:

    File "c:\Users\bpadmin\anaconda3\envs\atrad_cars_v2\lib\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\bpadmin\anaconda3\envs\atrad_cars_v2\lib\site-packages\keras\engine\training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\bpadmin\anaconda3\envs\atrad_cars_v2\lib\site-packages\keras\engine\training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\bpadmin\anaconda3\envs\atrad_cars_v2\lib\site-packages\tensorflow_recommenders\models\base.py", line 68, in train_step
        loss = self.compute_loss(inputs, training=True)
    File "d:\dev work\recommender systems\Atrad_CARS\code\v3_listwise\recommender.py", line 123, in compute_loss
        scores = self(features)
    File "c:\Users\bpadmin\anaconda3\envs\atrad_cars_v2\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_file6zchcepq.py", line 14, in tf__call
        user_embedding_repeated = ag__.converted_call(ag__.ld(tf).repeat, (ag__.converted_call(ag__.ld(tf).expand_dims, (ag__.ld(user_embeddings), 1), None, fscope), [ag__.ld(list_length)]), dict(axis=1), fscope)

    TypeError: Exception encountered when calling layer 'recommender_4' (type Recommender).
    
    in user code:
    
        File "d:\dev work\recommender systems\Atrad_CARS\code\v3_listwise\recommender.py", line 107, in call  *
            user_embedding_repeated = tf.repeat(
    
        TypeError: Failed to convert elements of [None] to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.
    
    
    Call arguments received by layer 'recommender_4' (type Recommender):
      • features={'UNIX_TS': 'tf.Tensor(shape=(None, None, None, None, 5), dtype=float32)', 'CDSACCNO': 'tf.Tensor(shape=(None, None, None, None), dtype=string)', 'STOCKCODE': 'tf.Tensor(shape=(None, None, None, None, 5), dtype=string)', 'GICS': 'tf.Tensor(shape=(None, None, None, None, 5), dtype=string)', 'STOCKNAME': 'tf.Tensor(shape=(None, None, None, None, 5), dtype=string)'}
