In [1]:
import tensorflow as tf
import tensorflow_recommenders as tfrs

import numpy as np
import pandas as pd

In [2]:
test_ds = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/test").cache()

portfolios = tf.data.Dataset.load("D:/dev work/recommender systems/Atrad_CARS/data/portfolios_tfds").cache()

items_ids = portfolios.batch(10000).map(lambda x: x["STOCKCODE"])
item_names = portfolios.batch(10000).map(lambda x: x["STOCKNAME"])
item_GICS = portfolios.batch(10000).map(lambda x: x["GICS"])

user_ids = portfolios.batch(10000).map(lambda x: x["CDSACCNO"])

unique_item_ids = np.unique(np.concatenate(list(items_ids)))
unique_item_names = np.unique(np.concatenate(list(item_names)))
unique_item_gics = np.unique(np.concatenate(list(item_GICS)))

unique_user_ids = np.unique(np.concatenate(list(user_ids)))

# need these to initialize timestamp embedding layers in future steps

timestamps = np.concatenate(list(portfolios.map(lambda x: x["UNIX_TS"]).batch(100)))
max_timestamp = timestamps.max()
min_timestamp = timestamps.min()

timestamp_buckets = np.linspace(
    min_timestamp, max_timestamp, num=1000,
)

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [5]:
item_embedding_dims = {
    'item_id_dim' : 8,
    'item_gics_dim' : 8,
    'item_name_dim' : 16
}

user_embedding_dims = {
    'user_id_dim' : 16,
    'user_ts_dim' : 15
}

In [6]:
from retrieval_recommender import Retriever

retriever = Retriever(
    use_timestamp = True,
    portfolios = portfolios,
    item_embedding_dims = item_embedding_dims,
    user_embedding_dims = user_embedding_dims
)

retriever.load_weights("D:/dev work/recommender systems/ATRAD_CARS/model_weights/2024_04_29/tf_retrival_2024_04_29_13_31")

retriever.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))


In [8]:
# from ranker_recommender import Ranker

# ranker = Ranker(
#     loss = tf.keras.losses.MeanSquaredError(),
#     portfolios = portfolios
# )

# ranker.load_weights(r"D:\dev work\recommender systems\Atrad_CARS\model_weights\2024_04_29\tf_listwise_ranking_2024_04_29_16_42")
# ranker.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))


In [7]:
stock_info = pd.read_excel('../../data/stock_data.xlsx')
stock_info = stock_info.drop(['Unnamed: 0','buisnesssummary'],axis = 1)
stock_info = stock_info.rename(columns = {
    'symbol':'STOCKCODE',
    'name' : 'STOCKNAME',
    'gics_code' : 'GICS'
})
stock_info = stock_info[~stock_info['GICS'].isna()]

stock_info.shape
print("items data shape :: {}".format(stock_info.shape))

items_ds = tf.data.Dataset.from_tensor_slices(stock_info.to_dict(orient= 'list'))

items data shape :: (280, 3)


# Retriever function

In [8]:
items_identiifiers = items_ds.map(lambda x: x["STOCKCODE"])
items_identiifiers = next(iter(items_identiifiers.batch(len(items_identiifiers))))
items_identiifiers.shape

TensorShape([280])

In [9]:
index = tfrs.layers.factorized_top_k.BruteForce(retriever.user_model)
retriever_item_model = retriever.item_model
mapped_items = items_ds.batch(len(items_ds)).map(lambda x : retriever_item_model(x, map_ = True))

mapped_items_tensor = next(iter(mapped_items))
index.index(mapped_items_tensor, items_identiifiers)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x1a568dd2d30>

In [10]:
# a test user ID & timestamp examples

test_user = str('HNB-254-LC/00')
test_timestamp = 1641999190.0

In [88]:
_, recommendations = index(
    (
        tf.constant([test_user]),
        tf.constant([test_timestamp])
        )
    )

recommendations = [reco.decode('utf-8') for reco in recommendations.numpy().flatten()]
print(f"Recommendations for user %s: {recommendations}" %(test_user))
# titles[0]

Recommendations for user HNB-254-LC/00: ['ACL', 'KCAB', 'TYRE', 'CARE', 'HASU', 'RICH', 'CCS', 'EXPO', 'HAYC', 'PARQ']


# Ranker function

In [89]:
stock_info.head(2)

Unnamed: 0,STOCKCODE,STOCKNAME,GICS
0,HBS,hSenid Business Solutions PLC,45103010 - Application Software
1,TYRE,KELANI TYRES PLC,Automobiles & Components


In [90]:
code2name = dict(zip(stock_info.STOCKCODE, stock_info.STOCKNAME))
code2gics = dict(zip(stock_info.STOCKCODE, stock_info.GICS))

In [93]:
names = np.array([code2name[code] for code in recommendations])
gics = np.array([code2gics[code] for code in recommendations])

In [105]:
names

array(['ACL CABLES PLC', 'KELANI CABLES PLC', 'KELANI TYRES PLC',
       'PRINTCARE PLC', 'HNB ASSURANCE PLC',
       'RICHARD PIERIS AND COMPANY PLC', 'CEYLON COLD STORES PLC',
       'EXPOLANKA HOLDINGS PLC', 'HAYCARB PLC', 'SWISSTEK (CEYLON) PLC'],
      dtype='<U30')

In [104]:
user = {
    'CDSACCNO' : np.array([test_user]),
    'STOCKCODE' : np.array(recommendations).reshape(-1,10),
    'GICS' : gics.reshape(-1,10),
    'STOCKNAME' : names.reshape(-1,10)
    }

In [109]:
user['STOCKNAME']

array([['ACL CABLES PLC', 'KELANI CABLES PLC', 'KELANI TYRES PLC',
        'PRINTCARE PLC', 'HNB ASSURANCE PLC',
        'RICHARD PIERIS AND COMPANY PLC', 'CEYLON COLD STORES PLC',
        'EXPOLANKA HOLDINGS PLC', 'HAYCARB PLC', 'SWISSTEK (CEYLON) PLC']],
      dtype='<U30')

In [112]:
pred_ratings = ranker(user)
pred_ratings.numpy().flatten()

(1, 10, 32)  |  (1, 10, 64)


array([2.1672614, 2.6092675, 1.1385504, 1.1015868, 1.8887022, 1.2175096,
       2.1446908, 5.282713 , 1.5482672, 1.1381485], dtype=float32)

In [117]:
recommendations_w_ratings = pd.DataFrame()
recommendations_w_ratings['STOCKCODE'] = recommendations
recommendations_w_ratings['PRED_RATING'] = pred_ratings.numpy().flatten()
recommendations_w_ratings = recommendations_w_ratings.sort_values( by = ['PRED_RATING'], ascending= False)
recommendations_w_ratings

Unnamed: 0,STOCKCODE,PRED_RATING
7,EXPO,5.282713
1,KCAB,2.609267
0,ACL,2.167261
6,CCS,2.144691
4,HASU,1.888702
8,HAYC,1.548267
5,RICH,1.21751
2,TYRE,1.13855
9,PARQ,1.138149
3,CARE,1.101587
