In [None]:
# run following commands when this script is executed in google colab
# takes care of cloning the repository and changing the working directory to the repository so objects can be imported from src folder in the repository.
!git clone https://github.com/meeslindhout/Master-Thesis-Project---Cold-Start-Recommender-System-Session-based.git
import os
os.chdir('Master-Thesis-Project---Cold-Start-Recommender-System-Session-based')

# Analysis of cold start problem in session-based recommendation
A comparison of sesison item knn vs adding deep reinforcement learning to session-based recommendation to solve the cold start problem.

In [1]:
# from datapreprocessing import DataPreprocessor

# data= DataPreprocessor()
# data.load_data('retailrocket')

    ## Training an Reinforcement Learning Agent for Session-based Recommendation

In [3]:
# load temporary data (zodra datapreprocessing class klaar is, kan deze weg)
import polars as pl
train_data = pl.read_parquet('data/events_sample.parquet').to_pandas()
train_data.head()

Unnamed: 0,timestamp,visitorid,event,itemid,transactionid,new_session_mark,session_id,session_start_time,session_length,gssid
0,2015-09-03 22:01:27.081,1532,0,303715,,False,2,2015-09-03 22:01:27.081,0 days 00:05:00.350000,0000001532201509032201270300
1,2015-09-03 22:06:27.431,1532,0,303715,,True,2,2015-09-03 22:01:27.081,0 days 00:05:00.350000,0000001532201509032206270300
2,2015-08-31 19:46:03.121,4248,0,281838,,False,3,2015-08-31 19:46:03.121,0 days 00:01:43.021000,0000004248201508311946030103
3,2015-08-31 19:47:46.142,4248,0,44977,,True,3,2015-08-31 19:46:03.121,0 days 00:01:43.021000,0000004248201508311947460103
4,2015-08-28 15:02:47.606,4531,0,1571,,False,4,2015-08-28 15:02:47.606,0 days 00:02:01.135000,0000004531201508281502470121


In [1]:
from src.recsys_rl import LogToEpisodeConverter, OfflineEnv, DQN, OfflineDQNAgent

Loading and prepairing the data for offline training of the reinforcement learning agent.

In [4]:
data_converter = LogToEpisodeConverter()
n_history = 3

data_converter.load_dataset(train_data)
data_converter.set_rewards({0: 5, 1: 8, 2: 10})
data_converter.create_ssar_tensor_episodes(n_history = n_history,
                                               mode='gpu_training')

Data loaded successfully.
Rewards set successfully.
Episodes created successfully.


create an environment for the agent to interact with the data.

In [5]:
env = OfflineEnv(data_converter.tensor_episodes, 
                 n_history)

initialize the agent and connect it to the environment.

In [6]:
agent = OfflineDQNAgent(state_size = env.observation_space.shape[0], 
                        action_size = env.action_space.n,
                        learning_rate=3e-4,
                        n_history=1,
                        mode='gpu_training'
                        )

In [7]:
num_episodes = 1_000
batch_size = 512
target_update_freq = 10

In [8]:
for episode in range(num_episodes):
    state = env.reset()
    done = False
    episode_reward = 0
    while not done:
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.step(state, action, reward, next_state, done)
        state = next_state
        episode_reward += reward

    agent.train(batch_size)
    if episode % target_update_freq == 0:
        agent.update_target_model()

    agent.kpi_tracker['episode_rewards'].append(episode_reward)
    print(f"Episode {episode + 1}/{num_episodes} completed with reward: {episode_reward}")

Episode 1/1000 completed with reward: 5
Episode 2/1000 completed with reward: 5
Episode 3/1000 completed with reward: 5
Episode 4/1000 completed with reward: 15
Episode 5/1000 completed with reward: 5
Episode 6/1000 completed with reward: 10
Episode 7/1000 completed with reward: 5
Episode 8/1000 completed with reward: 5
Episode 9/1000 completed with reward: 20
Episode 10/1000 completed with reward: 5
Episode 11/1000 completed with reward: 5
Episode 12/1000 completed with reward: 30
Episode 13/1000 completed with reward: 5
Episode 14/1000 completed with reward: 20
Episode 15/1000 completed with reward: 5
Episode 16/1000 completed with reward: 56
Episode 17/1000 completed with reward: 5
Episode 18/1000 completed with reward: 10
Episode 19/1000 completed with reward: 5
Episode 20/1000 completed with reward: 8
Episode 21/1000 completed with reward: 44
Episode 22/1000 completed with reward: 5
Episode 23/1000 completed with reward: 15
Episode 24/1000 completed with reward: 259
Episode 25/100

KeyboardInterrupt: 

## Saving the trained agent 
including extra scripts to save it in google drive or to download the model

In [None]:
agent.save_model('sampled_retailrocket')

In [None]:
!ls 'trained agents/'

In [None]:
!du --block-size=MB 'trained agents/DQN trained agent 20240530_085334 n_hist1.pth'