# Making predictions with the trained RL agent

In [1]:
import torch
import pandas as pd
from src.recsys_rl import LogToEpisodeConverter, OfflineEnv, DQN, OfflineDQNAgent

In [2]:
# load temporary data (zodra datapreprocessing class klaar is, kan deze weg)
import polars as pl
train_data = pl.read_parquet('data/events_sample.parquet').to_pandas()
train_data.head()

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid,new_session_mark,session_id,session_start_time,session_length,gssid
0,2015-09-03 22:01:27.081,1532,0,303715,,False,2,2015-09-03 22:01:27.081,0 days 00:05:00.350000,0000001532201509032201270300
1,2015-09-03 22:06:27.431,1532,0,303715,,True,2,2015-09-03 22:01:27.081,0 days 00:05:00.350000,0000001532201509032206270300
2,2015-08-31 19:46:03.121,4248,0,281838,,False,3,2015-08-31 19:46:03.121,0 days 00:01:43.021000,0000004248201508311946030103
3,2015-08-31 19:47:46.142,4248,0,44977,,True,3,2015-08-31 19:46:03.121,0 days 00:01:43.021000,0000004248201508311947460103
4,2015-08-28 15:02:47.606,4531,0,1571,,False,4,2015-08-28 15:02:47.606,0 days 00:02:01.135000,0000004531201508281502470121


In [4]:
data_converter = LogToEpisodeConverter()
n_history = 3

data_converter.load_dataset(train_data)
data_converter.set_rewards({0: 5, 1: 8, 2: 10})
data_converter.create_ssar_tensor_episodes(n_history = n_history,
                                               mode='cpu_predicting')

Data loaded successfully.
Rewards set successfully.
Episodes created successfully.


In [6]:
# Load the trained agent
state_size = len(data_converter.tensor_episodes[0][0][0])  # Size of the state
action_size = max([t[1].item() for traj in data_converter.tensor_episodes for t in traj]) + 1  # Number of actions
agent = OfflineDQNAgent(state_size, action_size, mode='cpu_predicting')
agent.load_model('trained agents/DQN trained agent 20240530_085334 n_hist1.pth')


In [18]:
# Make predictions
sample_states = [
    [0, 0, 0],
    [0, 0, 303715],
    [0, 281838,  44977]] 
n_predictions = 5  # Number of top actions to predict

# Predict top actions
predicted_actions = agent.predict(sample_states, n_predictions)

# Output the predicted actions
print(f"Predicted top {n_predictions} actions: {predicted_actions}")

Predicted top 5 actions: [[168055, 221042, 319895, 175798, 411722], [169029, 267648, 128657, 155628, 329188], [139659, 50957, 50419, 146254, 236193]]
