In [1]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

df: pd.DataFrame = pd.read_pickle('./data/dataset.pkl')

df

Unnamed: 0,id,mission,ID,history,negatives
5643,4122_2024-10-02,activity_1,2,"[[15, 0], [24, 0], [0, 0], [1, 0], [26, 0], [4...","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
5644,4122_2024-10-02,mobility_2,21,"[[15, 0], [24, 0], [0, 0], [1, 0], [26, 0], [4...","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14..."
6086,5027_2024-10-02,mobility_1,20,"[[23, 0], [22, 0], [24, 1], [24, 1], [1, 0], [...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14,..."
6087,5027_2024-10-02,activity_9,11,"[[23, 0], [22, 0], [24, 1], [24, 1], [1, 0], [...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14,..."
6088,5027_2024-10-02,exp_100,18,"[[23, 0], [22, 0], [24, 1], [24, 1], [1, 0], [...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14,..."
...,...,...,...,...,...
91346,7213_2024-10-31,activity_1,2,"[[4, 1], [26, 0], [17, 1], [20, 0], [6, 1], [2...","[0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
91347,7213_2024-10-31,action_2,1,"[[4, 1], [26, 0], [17, 1], [20, 0], [6, 1], [2...","[0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
91348,7213_2024-10-31,episode_5,16,"[[4, 1], [26, 0], [17, 1], [20, 0], [6, 1], [2...","[0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
91350,7219_2024-10-31,quiz_1,24,"[[16, 0], [4, 0], [1, 0], [13, 0], [22, 0], [4...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14,..."


In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class SequentialRecommendations(Dataset):
    def __init__(self, df: pd.DataFrame, n_negative_samples=10):
        super().__init__()
        self.positives = torch.from_numpy(df['ID'].values).view(-1, 1).to(DEVICE)        
        self.histories = [torch.from_numpy(h).to(DEVICE) for h in df['history']]
        self.negatives = [torch.tensor(n).to(DEVICE) for n in df['negatives']]
        self.n_negative_samples = n_negative_samples
    
    def __len__(self):
        return len(self.positives)
    
    def __getitem__(self, idx):
        pos = self.positives[idx]
        history = self.histories[idx]

        neg = torch.randperm(len(self.negatives[idx]))[:self.n_negative_samples]
        neg = self.negatives[idx][neg]

        return history, pos, neg

dataset = SequentialRecommendations(df)
dataset[0]

(tensor([[15,  0],
         [24,  0],
         [ 0,  0],
         [ 1,  0],
         [26,  0],
         [ 4,  1],
         [22,  1],
         [ 6,  1],
         [11,  0]], device='cuda:0'),
 tensor([2], device='cuda:0'),
 tensor([23, 20,  7, 16, 19, 29, 14, 26,  3, 24], device='cuda:0'))

In [3]:
import torch.nn as nn
import torch.nn.functional as F

class GRU4Rec(nn.Module):
    def __init__(self, n_items, hidden_size=100, n_layers=1):
        super().__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        self.embedding = nn.Embedding(n_items, hidden_size)
        self.gru = nn.GRU(hidden_size + 1, hidden_size, n_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, n_items)

    def forward(self, x):
        items = x[:, :, 0]
        items = self.embedding(items)
        x = torch.cat((items, x[:, :, 1:]), dim=-1)

        h0 = torch.zeros(self.n_layers, x.size(0), self.hidden_size).to(DEVICE)
        _, out = self.gru(x, h0)
        out = out.view(-1, self.hidden_size)
        out = self.fc(out)
        return out

class BPRmax(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
    
    def forward(self, positives: torch.Tensor, negatives: torch.Tensor):
        distances = positives - negatives.max(dim=1, keepdim=True).values
        return - torch.sum(F.logsigmoid(distances), dim=0, keepdim=True)

def train(model: nn.Module, dataset: SequentialRecommendations, n_epochs=10):
    model.to(DEVICE)
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = BPRmax()
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    for _ in (bar := tqdm(range(n_epochs))):
        epoch_loss = 0
        for history, pos, neg in tqdm(dataloader, leave=False):
            optimizer.zero_grad()
            out: torch.Tensor = model(history)
            pos_out = out.gather(1, pos)
            neg_out = out.gather(1, neg)
            loss = criterion(pos_out, neg_out)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        bar.set_postfix(loss=epoch_loss/len(dataloader))
    
    return model

In [4]:
import random

class MissionRecommender:
    def __init__(self, missions, model, epsilon):
        """
        Initialize the recommender.

        Args:
        missions (list of dict): List of mission objects, each with 'ID', 'type', and 'target'.
        model (torch.nn.Module): PyTorch module to rank missions based on user history.
        epsilon (float): Probability of selecting a random mission.
        """
        self.missions = missions
        self.model = model
        self.epsilon = epsilon

    def recommend(self, user_history, num_recommendations):
        """
        Generate a set of mission recommendations based on the policy.

        Args:
        user_history (list of tuples): User's past interactions [(mission_id, outcome), ...].
        num_recommendations (int): Number of missions to recommend.

        Returns:
        list of dict: Recommended missions.
        """
        recommendations = []
        used_mission_ids = self._get_used_missions(user_history)
        assigned_types = set()

        while len(recommendations) < num_recommendations:
            if self._use_random_selection():
                self._add_random_mission(recommendations, assigned_types)
            else:
                self._add_ranked_mission(recommendations, user_history, used_mission_ids, assigned_types)

        return recommendations

    def _get_used_missions(self, user_history):
        """
        Extract the IDs of missions with positive outcomes from the user's history.

        Args:
        user_history (list of tuples): User's past interactions.

        Returns:
        set: Mission IDs with positive outcomes.
        """
        return {m[0] for m in user_history if m[1] > 0}

    def _use_random_selection(self):
        """
        Decide whether to select a mission randomly based on epsilon.

        Returns:
        bool: True if random selection is chosen, False otherwise.
        """
        return random.random() < self.epsilon

    def _add_random_mission(self, recommendations, assigned_types):
        """
        Add a random mission to the recommendations if possible.

        Args:
        recommendations (list): Current recommendation list.
        assigned_types (set): Set of already assigned types.
        """
        random_mission = self._select_random_mission(assigned_types)
        if random_mission:
            recommendations.append(random_mission)
            assigned_types.add(random_mission["type"])

    def _add_ranked_mission(self, recommendations, user_history, used_mission_ids, assigned_types):
        """
        Add the highest-ranked mission to the recommendations.

        Args:
        recommendations (list): Current recommendation list.
        user_history (list of tuples): User's past interactions.
        used_mission_ids (set): IDs of missions to exclude from recommendations.
        assigned_types (set): Set of already assigned types.
        """
        ranked_missions = self._rank_missions(user_history, used_mission_ids)
        for mission in ranked_missions:
            if mission["type"] not in assigned_types:
                recommendations.append(mission)
                assigned_types.add(mission["type"])
                self._replace_existing_random_of_same_type(recommendations, mission)
                break

    def _replace_existing_random_of_same_type(self, recommendations, mission):
        """
        Replace any random mission of the same type in recommendations with the ranked mission.

        Args:
        recommendations (list): Current recommendation list.
        mission (dict): The ranked mission to be added.
        """
        recommendations[:] = [
            rec for rec in recommendations if rec["type"] != mission["type"] or rec == mission
        ]

    def _select_random_mission(self, assigned_types):
        """
        Select a random mission that is not of an already assigned type.

        Args:
        assigned_types (set): Types already assigned.

        Returns:
        dict or None: Random mission or None if no valid mission found.
        """
        available_missions = [m for m in self.missions if m["type"] not in assigned_types]
        return random.choice(available_missions) if available_missions else None

    def _rank_missions(self, user_history, used_mission_ids):
        """
        Query the model for a ranked list of missions, excluding used ones.

        Args:
        user_history (list of tuples): User's past interactions.
        used_mission_ids (set): Set of mission IDs to exclude.

        Returns:
        list of dict: Ranked list of missions.
        """
        # Convert user history to PyTorch tensor
        history_tensor = torch.from_numpy(user_history).view(1, -1, 2).to(DEVICE)
        
        # Get mission scores from the model
        with torch.no_grad():
            mission_scores = self.model(history_tensor).squeeze(0)  # Assume model outputs a (num_missions,) tensor

        # Rank missions by score
        mission_ranking = torch.argsort(mission_scores, descending=True).tolist()
        
        # Exclude missions with positive outcomes
        ranked_missions = [
            m for idx in mission_ranking
            for m in self.missions
            if m["ID"] == idx and m["ID"] not in used_mission_ids
        ]
        return ranked_missions


In [5]:
missions = df[['ID', 'mission']] \
    .drop_duplicates() \
    .sort_index()

missions['type'] = missions['mission'].str.split('_').str[0]
missions['target'] = missions['mission'].str.split('_').str[1]
missions.drop(columns=['mission'], inplace=True)
missions = missions.to_dict(orient='records')

history = df.loc[91351, 'history']

In [11]:
model = GRU4Rec(n_items=len(missions), hidden_size=16, n_layers=1)
model = train(model, dataset)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

  0%|          | 0/454 [00:00<?, ?it/s]

In [22]:
recommender = MissionRecommender(missions, model, epsilon=0.3)
recommender.recommend(history, 3)

[{'ID': 11, 'type': 'activity', 'target': '9'},
 {'ID': 22, 'type': 'mobility', 'target': '3'},
 {'ID': 26, 'type': 'quiz', 'target': '3'}]