<a href="https://colab.research.google.com/github/p4zaa/OTTO-Multi-Objective-Recommender-System/blob/main/%5BDEVELOP1_4%5D_OTTO_%E2%80%93_Multi_Objective_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Version Logs
* [View in my Github](https://github.com/p4zaa/OTTO-Multi-Objective-Recommender-System)

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install -q torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install -q torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install -q torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install -q torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install -q torch-geometric

In [3]:
import pandas as pd, numpy as np
from itertools import product
import io, os, json

import time

from sklearn.metrics import mean_squared_error

import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = "plotly_white"

import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.datasets import MovieLens
from torch_geometric.nn import to_hetero
from torch_geometric.nn import HeteroConv, GCNConv, SAGEConv, GATConv, Linear, GraphConv
from torch_geometric.data import Data, HeteroData

!pip install -q sentence-transformers

%matplotlib inline

# Load Competition Dataset

In [4]:

from google.colab import files
files.upload() # expire any previous token(s) and upload recreated token
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets list


Saving kaggle.json to kaggle.json
rm: cannot remove '/root/.kaggle': No such file or directory
ref                                                             title                                           size  lastUpdated          downloadCount  voteCount  usabilityRating  
--------------------------------------------------------------  ---------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
meirnizri/covid19-dataset                                       COVID-19 Dataset                                 5MB  2022-11-13 15:47:17          13231        375  1.0              
thedevastator/analyzing-credit-card-spending-habits-in-india    Credit Card Spending Habits in India           319KB  2022-12-14 07:30:37            772         32  1.0              
michals22/coffee-dataset                                        Coffee dataset                                  24KB  2022-12-15 20:02:12           2884         72  1.0     

In [5]:
!kaggle datasets download 'radek1/otto-full-optimized-memory-footprint' -p /content/kaggle/ --unzip

Downloading otto-full-optimized-memory-footprint.zip to /content/kaggle
 99% 1.07G/1.09G [00:06<00:00, 203MB/s]
100% 1.09G/1.09G [00:06<00:00, 171MB/s]


## Files
- **train.jsonl** - the training data, which contains full session data
  * `session` - the unique session id
  * `events` - the time ordered sequence of events in the session
    * `aid` - the article id (product code) of the associated event
    * `ts` - the Unix timestamp of the event
    * `type` - the event type, i.e., whether a product was clicked, added to the user's cart, or ordered during the session
###### {'clicks': 0, 'carts': 1, 'orders': 2}
- **test.jsonl** - the test data, which contains truncated session data
  * your task is to predict the next `aid` clicked after the session truncation, as well as the the remaining `aids` that are added to `carts` and `orders`; you may predict up to 20 values for each session `type`
- **sample_submission.csv** - a sample submission file in the correct format

In [4]:
df = pd.read_parquet('/content/kaggle/train.parquet')

In [5]:
# sample 10%(frac=0.1) of data
df = df.sample(frac=0.001, replace=False)

In [6]:
df

Unnamed: 0,session,aid,ts,type
100186684,3479265,321317,1660051526,0
129433089,5104661,1053683,1659884541,0
143499463,6008018,795232,1660423516,0
85005190,2718783,247375,1659730899,0
11037391,216123,298495,1659992027,0
...,...,...,...,...
146768061,6247517,1560437,1660677217,0
182915720,9296892,1811814,1660656827,0
126702454,4944715,194379,1660224525,0
137465226,5607554,887081,1660395779,0


In [7]:
df.isna().sum()

session    0
aid        0
ts         0
type       0
dtype: int64

In [8]:
df['type'].unique()

array([0, 1, 2], dtype=uint8)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 216716 entries, 100186684 to 107956587
Data columns (total 4 columns):
 #   Column   Non-Null Count   Dtype
---  ------   --------------   -----
 0   session  216716 non-null  int32
 1   aid      216716 non-null  int32
 2   ts       216716 non-null  int32
 3   type     216716 non-null  uint8
dtypes: int32(3), uint8(1)
memory usage: 4.3 MB


# Construct Graph Data (memory-efficient optimized)

### [Use new code instead] Construct `edge_index`

In [10]:
#def to_tuple(row):
#    return tuple(row)

In [11]:
# also drop the ts and type column
#connectivity = df.drop(columns=['ts', 'type']).apply(to_tuple, axis=1)

In [12]:
#connectivity

In [13]:
# Old code
'''
# session index dict
#session = sorted(df['session'].unique())
session = df['session'].unique()
session_nodes_idx = {id:idx for idx, id in enumerate(session)}

# aid(article id) index dict
#aid = sorted(df['aid'].unique())
aid = df['aid'].unique()
aid_nodes_idx = {id:idx for idx, id in enumerate(aid)}
'''

"\n# session index dict\n#session = sorted(df['session'].unique())\nsession = df['session'].unique()\nsession_nodes_idx = {id:idx for idx, id in enumerate(session)}\n\n# aid(article id) index dict\n#aid = sorted(df['aid'].unique())\naid = df['aid'].unique()\naid_nodes_idx = {id:idx for idx, id in enumerate(aid)}\n"

In [14]:
# use this code if the above cause memory crash
# very slow but memory good
'''
def get_node_indices(data, key):
  for id in data[key].unique():
    yield id, next(i for i, v in enumerate(data[key]) if v == id)

session_nodes_idx = dict(get_node_indices(df, 'session'))
aid_nodes_idx = dict(get_node_indices(df, 'aid'))
'''

"\ndef get_node_indices(data, key):\n  for id in data[key].unique():\n    yield id, next(i for i, v in enumerate(data[key]) if v == id)\n\nsession_nodes_idx = dict(get_node_indices(df, 'session'))\naid_nodes_idx = dict(get_node_indices(df, 'aid'))\n"

In [15]:
# /!\ this cause memory crashed in a very large data
'''
i_session = []
i_aid = []
for session, aid in connectivity_list:
  i_session.append(user_nodes_idx[user])
  i_aid.append(item_nodes_idx[item])

indice = [i_session, i_aid]
edge_index = torch.Tensor(indice).type(torch.long)
'''

'\ni_session = []\ni_aid = []\nfor session, aid in connectivity_list:\n  i_session.append(user_nodes_idx[user])\n  i_aid.append(item_nodes_idx[item])\n\nindice = [i_session, i_aid]\nedge_index = torch.Tensor(indice).type(torch.long)\n'

In [16]:
# Work but still got memory crash in very large data
'''
def get_indices(connectivity_list, user_nodes_idx, item_nodes_idx):
  for user, item in connectivity_list:
    yield user_nodes_idx[user], item_nodes_idx[item]

edge_index = torch.Tensor(list(get_indices(connectivity, session_nodes_idx, aid_nodes_idx))).type(torch.int64).t()
'''

'\ndef get_indices(connectivity_list, user_nodes_idx, item_nodes_idx):\n  for user, item in connectivity_list:\n    yield user_nodes_idx[user], item_nodes_idx[item]\n\nedge_index = torch.Tensor(list(get_indices(connectivity, session_nodes_idx, aid_nodes_idx))).type(torch.int64).t()\n'

### `edge_index` new code construct

In [17]:
# session index dict
session = df['session'].unique()
source_idx = {id:idx for idx, id in enumerate(session)}

# aid(article id) index dict
aid = df['aid'].unique()
target_idx = {id:idx for idx, id in enumerate(aid)}

In [18]:
connected = df[['session', 'aid']]
connected['session'] = connected['session'].map(source_idx)
connected['aid'] = connected['aid'].map(target_idx)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  connected['session'] = connected['session'].map(source_idx)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  connected['aid'] = connected['aid'].map(target_idx)


In [19]:
source = connected['session']
target = connected['aid']
edge_index = torch.tensor((source.values, target.values))

  edge_index = torch.tensor((source.values, target.values))


### Nodes and Edges Attribute

In [20]:
## Nodes Atrributes
session_num_nodes = df['session'].nunique()
aid_num_nodes = df['aid'].nunique()
aid_features = torch.rand((aid_num_nodes, 300)) # Create (random) article features with shape [num_node_aid, dimensions]

## Edges Atrributes
edge_index = edge_index
edge_label = torch.tensor(df['type'].values).type(torch.int64)

In [21]:
edge_label

tensor([0, 0, 0,  ..., 0, 0, 0])

In [22]:
edge_index.shape

torch.Size([2, 216716])

In [23]:
aid_features.shape

torch.Size([127728, 300])

In [24]:
edge_label.shape

torch.Size([216716])

### Construct HeteroData

In [25]:
node_types = {
    'session': {
        'num_nodes': session_num_nodes
    },
    'aid': {
        'x': aid_features
    }
}

edge_types = {
    ('session', 'event', 'aid'): {
        'edge_index': edge_index,
        'edge_label': edge_label
    }#,
    #('session', 'cart', 'aid'): {
        
    #},
    #('session', 'buy', 'aid'): {
        
    #}
}

In [26]:
data = HeteroData({**node_types, **edge_types})

In [27]:
data

HeteroData(
  [1msession[0m={ num_nodes=208170 },
  [1maid[0m={ x=[127728, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 216716],
    edge_label=[216716]
  }
)

In [28]:
data.metadata()

(['session', 'aid'], [('session', 'event', 'aid')])

In [29]:
node_types, edge_types = data.metadata()
print('Node types:', node_types)
print('Edge types:',edge_types)

Node types: ['session', 'aid']
Edge types: [('session', 'event', 'aid')]


In [30]:
print('Isolated nodes?', data.has_isolated_nodes())
print('Self loops?', data.has_self_loops())
print('Undirected graph? ', data.is_undirected())

Isolated nodes? False
Self loops? False
Undirected graph?  False


# Graph-based Modeling [follow [this](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/hetero_link_pred.py) sample]

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Construct Undirected Graph

In [32]:
# add sesion features for message passing:
data['session'].x = torch.rand(data['session'].num_nodes, 300)

In [33]:
# Add user node features for message passing:
#data['session'].x = torch.eye(data['session'].num_nodes, device=device)
#del data['session'].num_nodes

In [34]:
# Add a reverse ('movie', 'rev_rates', 'user') relation for message passing:
data = T.ToUndirected()(data)

In [35]:
del data['aid', 'rev_event', 'session'].edge_label  # Remove "reverse" label.

In [36]:
data

HeteroData(
  [1msession[0m={
    num_nodes=208170,
    x=[208170, 300]
  },
  [1maid[0m={ x=[127728, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 216716],
    edge_label=[216716]
  },
  [1m(aid, rev_event, session)[0m={ edge_index=[2, 216716] }
)

### Calculate Weight

In [37]:
data['session', 'aid'].edge_label.shape

torch.Size([216716])

In [38]:
# We have an unbalanced dataset with many labels for rating 3 and 4, and very
# few for 0 and 1. Therefore we use a weighted MSE loss.

counts = torch.bincount(data['session', 'aid'].edge_label)

# Set weights normalized by (max count/each count)
weight = counts.max() / counts

In [39]:
counts

tensor([194616,  16908,   5192])

In [40]:
weight

tensor([ 1.0000, 11.5103, 37.4838])

In [41]:
data_dict = {'type': (counts, '# rows','coral'), 'weights': (weight, 'weights','royalblue')}

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=np.arange(6), y=counts.detach().cpu().numpy(),
               name = 'nb rows', line_color= 'coral'))
fig.add_trace(
    go.Scatter(x=np.arange(6), y=weight.detach().cpu().numpy(),
               name = 'weights', line_color= 'royalblue'),  secondary_y=True)


fig.update_yaxes(title_text="# rows", secondary_y=False)
fig.update_yaxes(title_text="weights", secondary_y=True)
fig.update_xaxes(title_text="Type")
fig

### [Follow [this sample](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/hetero_link_pred.py)] Train/Val/Test Link Level Splits

In [42]:
# Perform a link-level split into training, validation, and test edges:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    neg_sampling_ratio=0.0,
    edge_types=[('session', 'event', 'aid')],
    rev_edge_types=[('aid', 'rev_event', 'session')],
)(data)

In [43]:
train_data['session','aid']

{'edge_index': tensor([[  2964,  40252, 190900,  ...,  98245, 102531,  97505],
        [  2881,  17633, 119480,  ...,  49186,  72697,   6671]]), 'edge_label': tensor([0, 0, 0,  ..., 0, 0, 1]), 'edge_label_index': tensor([[  2964,  40252, 190900,  ...,  98245, 102531,  97505],
        [  2881,  17633, 119480,  ...,  49186,  72697,   6671]])}

In [44]:
train_data['session','aid'].edge_label

tensor([0, 0, 0,  ..., 0, 0, 1])

### [New weight calculation code]

In [45]:
# We have an unbalanced dataset with many labels for rating 3 and 4, and very
# few for 0 and 1. Therefore we use a weighted MSE loss.
weight = torch.bincount(train_data['session', 'aid'].edge_label)
weight = weight.max() / weight

In [46]:
weight

tensor([ 1.0000, 11.5025, 37.7202])

### Model and loss functions

#### Loss function

In [47]:
def weighted_mse_loss(pred, target, weight=None):
    weight = 1. if weight is None else weight[target].to(pred.dtype)
    return (weight * (pred - target.to(pred.dtype)).pow(2)).mean()

#### Model

In [48]:
class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['session'][row], z_dict['aid'][col]], dim=-1)

        z = self.lin1(z).relu()
        z = self.lin2(z)

        return z.view(-1)


class Model(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels)
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels)

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict)
        return self.decoder(z_dict, edge_label_index)

In [49]:
model = Model(hidden_channels=32).to(device)

In [50]:
# Due to lazy initialization, we need to run one model step so the number
# of parameters can be inferred:
with torch.no_grad():
    model.encoder(train_data.x_dict, train_data.edge_index_dict)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [51]:
def train():
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['session', 'aid'].edge_label_index)
    target = train_data['session', 'aid'].edge_label
    loss = weighted_mse_loss(pred, target, weight)
    loss.backward()
    optimizer.step()
    return float(loss)

In [52]:
@torch.no_grad()
def test(data):
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['session', 'aid'].edge_label_index)
    pred = pred.clamp(min=0, max=2)
    target = data['session', 'aid'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse)

In [53]:
for epoch in range(0, 300):
    loss = train()
    train_rmse = test(train_data)
    val_rmse = test(val_data)
    test_rmse = test(test_data)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_rmse:.4f}, '
          f'Val: {val_rmse:.4f}, Test: {test_rmse:.4f}')

Epoch: 000, Loss: 4.2428, Train: 1.9153, Val: 1.1977, Test: 1.2167
Epoch: 001, Loss: 5.0976, Train: 0.4163, Val: 0.4136, Test: 0.4058
Epoch: 002, Loss: 5.1917, Train: 0.5954, Val: 0.4383, Test: 0.4336
Epoch: 003, Loss: 2.2964, Train: 1.2834, Val: 0.6717, Test: 0.6791
Epoch: 004, Loss: 2.1217, Train: 1.0288, Val: 0.5956, Test: 0.5992
Epoch: 005, Loss: 1.8127, Train: 0.8573, Val: 0.5334, Test: 0.5336
Epoch: 006, Loss: 1.8325, Train: 0.8676, Val: 0.5393, Test: 0.5395
Epoch: 007, Loss: 1.8255, Train: 1.0637, Val: 0.6053, Test: 0.6088
Epoch: 008, Loss: 1.8315, Train: 1.0163, Val: 0.5900, Test: 0.5926
Epoch: 009, Loss: 1.8075, Train: 0.9153, Val: 0.5512, Test: 0.5519
Epoch: 010, Loss: 1.8039, Train: 0.9032, Val: 0.5413, Test: 0.5417
Epoch: 011, Loss: 1.8078, Train: 0.9867, Val: 0.5644, Test: 0.5663
Epoch: 012, Loss: 1.7995, Train: 1.0181, Val: 0.5697, Test: 0.5722
Epoch: 013, Loss: 1.8080, Train: 0.9395, Val: 0.5377, Test: 0.5386
Epoch: 014, Loss: 1.7983, Train: 0.9049, Val: 0.5240, Test: 0.

In [54]:
model

Model(
  (encoder): GraphModule(
    (conv1): ModuleDict(
      (session__event__aid): SAGEConv((-1, -1), 32, aggr=mean)
      (aid__rev_event__session): SAGEConv((-1, -1), 32, aggr=mean)
    )
    (conv2): ModuleDict(
      (session__event__aid): SAGEConv((-1, -1), 32, aggr=mean)
      (aid__rev_event__session): SAGEConv((-1, -1), 32, aggr=mean)
    )
  )
  (decoder): EdgeDecoder(
    (lin1): Linear(64, 32, bias=True)
    (lin2): Linear(32, 1, bias=True)
  )
)

In [55]:
data

HeteroData(
  [1msession[0m={
    num_nodes=208170,
    x=[208170, 300]
  },
  [1maid[0m={ x=[127728, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 216716],
    edge_label=[216716]
  },
  [1m(aid, rev_event, session)[0m={ edge_index=[2, 216716] }
)

In [56]:
model.encoder(data.x_dict, data.edge_index_dict)

{'session': tensor([[-9.7412e-01,  2.4478e+00,  2.3534e-01,  ..., -4.6157e-01,
          -6.2345e-01,  4.3177e-01],
         [-9.3550e-01,  2.2810e+00, -9.0241e-02,  ..., -6.0263e-01,
          -6.2245e-01,  4.3918e-01],
         [-9.8137e-01,  2.4772e+00,  2.7850e-02,  ..., -6.1465e-01,
          -6.0377e-01,  4.9269e-01],
         ...,
         [-9.4631e-01,  2.3232e+00, -2.6920e-03,  ..., -6.7439e-01,
          -6.0488e-01,  4.4904e-01],
         [-1.0267e+00,  2.3541e+00,  5.4554e-02,  ..., -5.5205e-01,
          -5.4983e-01,  5.0213e-01],
         [-1.3512e+00,  2.7143e+00,  2.5843e-01,  ..., -5.5363e-01,
          -7.9118e-01,  5.5559e-01]], grad_fn=<AddBackward0>),
 'aid': tensor([[-1.1588, -0.2248,  0.1874,  ...,  2.2923, -0.3588,  1.6453],
         [-1.0812, -0.1628,  0.0266,  ...,  2.3042, -0.3995,  1.6671],
         [-1.0300, -0.0753,  0.0894,  ...,  2.2286, -0.4006,  1.6088],
         ...,
         [-1.0446, -0.2571, -0.0177,  ...,  2.5165, -0.5805,  1.7302],
         [-1.1

# Test dataset

In [57]:
df = pd.read_parquet('/content/kaggle/test.parquet')

In [58]:
df = df.sample(frac=0.01, replace=False)

In [59]:
df

Unnamed: 0,session,aid,ts,type
5465461,14190516,976033,1662203664,0
1438664,13217943,265219,1661860240,0
4078581,13850857,1374621,1662058660,0
3272684,13655975,1777026,1661970824,0
357851,12972164,644558,1661768631,1
...,...,...,...,...
5580349,14218711,1477811,1662210929,0
366675,12974024,1748111,1661768243,0
962819,13108220,386901,1661797473,0
1797244,13302176,439795,1661865215,0


## Construct heterogenous graph for test

In [60]:
# session index dict
session = df['session'].unique()
source_idx = {id:idx for idx, id in enumerate(session)}

# aid(article id) index dict
aid = df['aid'].unique()
target_idx = {id:idx for idx, id in enumerate(aid)}

In [61]:
connected = df[['session', 'aid']]
connected['session'] = connected['session'].map(source_idx)
connected['aid'] = connected['aid'].map(target_idx)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [62]:
source = connected['session']
target = connected['aid']
edge_index = torch.tensor((source.values, target.values)).type(torch.int64)

In [63]:
## Nodes Atrributes
session_num_nodes = df['session'].nunique()
aid_num_nodes = df['aid'].nunique()
aid_features = torch.rand((aid_num_nodes, 300)) # Create (random) article features with shape [num_node_aid, dimensions]

## Edges Atrributes
edge_index = edge_index
edge_label = torch.tensor(df['type'].values).type(torch.int64)

In [64]:
node_types = {
    'session': {
        'num_nodes': session_num_nodes
    },
    'aid': {
        'x': aid_features
    }
}

edge_types = {
    ('session', 'event', 'aid'): {
        'edge_index': edge_index,
        'edge_label': edge_label,
        'edge_label_index': edge_index
    }#,
    #('session', 'cart', 'aid'): {
        
    #},
    #('session', 'buy', 'aid'): {
        
    #}
}

In [65]:
Rtest_data = HeteroData({**node_types, **edge_types})

In [66]:
data

HeteroData(
  [1msession[0m={
    num_nodes=208170,
    x=[208170, 300]
  },
  [1maid[0m={ x=[127728, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 216716],
    edge_label=[216716]
  },
  [1m(aid, rev_event, session)[0m={ edge_index=[2, 216716] }
)

In [67]:
Rtest_data

HeteroData(
  [1msession[0m={ num_nodes=63734 },
  [1maid[0m={ x=[49176, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 69281],
    edge_label=[69281],
    edge_label_index=[2, 69281]
  }
)

In [68]:
# add sesion features for message passing:
Rtest_data['session'].x = torch.rand(Rtest_data['session'].num_nodes, 300)

Rtest_data = T.ToUndirected()(Rtest_data)
del Rtest_data['aid', 'rev_event', 'session'].edge_label  # Remove "reverse" label.

In [69]:
Rtest_data

HeteroData(
  [1msession[0m={
    num_nodes=63734,
    x=[63734, 300]
  },
  [1maid[0m={ x=[49176, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 69281],
    edge_label=[69281],
    edge_label_index=[2, 69281]
  },
  [1m(aid, rev_event, session)[0m={ edge_index=[2, 69281] }
)

## Testing

In [70]:
with torch.no_grad():
    model.eval()
    gg = model(Rtest_data.x_dict, Rtest_data.edge_index_dict, Rtest_data['session', 'aid'].edge_label_index)

In [71]:
data['session', 'aid'].edge_label

tensor([0, 0, 0,  ..., 0, 0, 0])

In [72]:
Rtest_data['session', 'aid'].edge_label

tensor([0, 0, 0,  ..., 0, 0, 0])

In [73]:
gg

tensor([0.7743, 1.0169, 0.9359,  ..., 1.2656, 1.1711, 0.8962])

In [74]:
data

HeteroData(
  [1msession[0m={
    num_nodes=208170,
    x=[208170, 300]
  },
  [1maid[0m={ x=[127728, 300] },
  [1m(session, event, aid)[0m={
    edge_index=[2, 216716],
    edge_label=[216716]
  },
  [1m(aid, rev_event, session)[0m={ edge_index=[2, 216716] }
)

# [TEMPORARY DROP]

In [75]:
# Temporary comment
'''
class GNNEncoder(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, conv):
        super().__init__()
        # conv(#in_channels, #out_channels)
        ''''''
        in_channels (int or tuple): 
            Size of each input sample, or :obj:`-1` to
            derive the size from the first input(s) to the forward method.
            A tuple corresponds to the sizes of source and target
            dimensionalities.
        ''''''
        self.conv1 = conv((-1, -1), hidden_channels)
        self.conv2 = conv((-1, -1), out_channels)
        self.linear1 = Linear(-1, out_channels)
        self.linear2 = Linear(-1, out_channels)

    def forward(self, x, edge_index):
        x0 = self.linear1(x)
        x2 = self.conv1(x0, edge_index).relu()
        x3 = self.conv2(x2, edge_index)
        x4 = self.linear2(x2 + x3)
        # Add combined layer to reduce over-smoothing
        return x4

class EdgeDecoder(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, 1)

    def forward(self, z_dict, edge_label_index):
        row, col = edge_label_index
        z = torch.cat([z_dict['session'][row], z_dict['aid'][col]], dim=-1)
        z = self.lin1(z).relu()
        z = self.lin2(z)
        return z.view(-1)

class Model(torch.nn.Module):
    def __init__(self, hidden_channels,  conv=SAGEConv):
        super().__init__()
        self.encoder = GNNEncoder(hidden_channels, hidden_channels,  conv) # Initialize GNNEncoder
        self.encoder = to_hetero(self.encoder, data.metadata(), aggr='sum')
        self.decoder = EdgeDecoder(hidden_channels) # Initialze EdgeDecoder

    def forward(self, x_dict, edge_index_dict, edge_label_index):
        z_dict = self.encoder(x_dict, edge_index_dict) # Here the call from model.encoder(...)
        return self.decoder(z_dict, edge_label_index)
'''

"\nclass GNNEncoder(torch.nn.Module):\n    def __init__(self, hidden_channels, out_channels, conv):\n        super().__init__()\n        # conv(#in_channels, #out_channels)\n        \n        in_channels (int or tuple): \n            Size of each input sample, or :obj:`-1` to\n            derive the size from the first input(s) to the forward method.\n            A tuple corresponds to the sizes of source and target\n            dimensionalities.\n        \n        self.conv1 = conv((-1, -1), hidden_channels)\n        self.conv2 = conv((-1, -1), out_channels)\n        self.linear1 = Linear(-1, out_channels)\n        self.linear2 = Linear(-1, out_channels)\n\n    def forward(self, x, edge_index):\n        x0 = self.linear1(x)\n        x2 = self.conv1(x0, edge_index).relu()\n        x3 = self.conv2(x2, edge_index)\n        x4 = self.linear2(x2 + x3)\n        # Add combined layer to reduce over-smoothing\n        return x4\n\nclass EdgeDecoder(torch.nn.Module):\n    def __init__(self, hid

In [76]:
'''
def train(train_data, model, optimizer, loss=weighted_mse_loss):
    model.train()
    optimizer.zero_grad()
    pred = model(train_data.x_dict, train_data.edge_index_dict,
                 train_data['session', 'aid'].edge_label_index)
    target = train_data['session', 'aid'].edge_label
    loss = weighted_mse_loss(pred, target, weight)
    loss.backward()
    optimizer.step()
    return float(loss.sqrt())
'''

"\ndef train(train_data, model, optimizer, loss=weighted_mse_loss):\n    model.train()\n    optimizer.zero_grad()\n    pred = model(train_data.x_dict, train_data.edge_index_dict,\n                 train_data['session', 'aid'].edge_label_index)\n    target = train_data['session', 'aid'].edge_label\n    loss = weighted_mse_loss(pred, target, weight)\n    loss.backward()\n    optimizer.step()\n    return float(loss.sqrt())\n"

In [77]:
## set pred.clamp
'''
@torch.no_grad()
def test(data, model, metric=F.mse_loss):
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict,
                 data['session', 'aid'].edge_label_index)
    pred = pred.clamp(min=0, max=2)
    target = data['session', 'aid'].edge_label.float()
    rmse = F.mse_loss(pred, target).sqrt()
    return float(rmse) # Return RMSE loss
'''

"\n@torch.no_grad()\ndef test(data, model, metric=F.mse_loss):\n    model.eval()\n    pred = model(data.x_dict, data.edge_index_dict,\n                 data['session', 'aid'].edge_label_index)\n    pred = pred.clamp(min=0, max=2)\n    target = data['session', 'aid'].edge_label.float()\n    rmse = F.mse_loss(pred, target).sqrt()\n    return float(rmse) # Return RMSE loss\n"

In [78]:
#from tqdm import tqdm
#from IPython.display import clear_output

In [79]:
"""
def train_test(model, model_params, learning_rate=0.01, e_patience = 10, min_acc= 0.05, n_epochs=500):
    t0 = time.time()

    model = model(**model_params) # Define the model

    # Due to lazy initialization, we need to run one model step so the number
    # of parameters can be inferred:
    with torch.no_grad():
        model.encoder(train_data.x_dict, train_data.edge_index_dict) # Run once with torch.no_grad() to get parameter for optimizer below

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    k=0
    loss, train_rmse, val_rmse, test_rmse = [], [], [], []
    train_wrmse, val_wrmse, test_wrmse = [], [], []
    for epoch in tqdm(range(n_epochs)):
        # Call train fuction here >> return loss
        loss += [train(train_data, model, optimizer, loss=weighted_mse_loss)]
        
        # Call test function here >> return RMSE loss
        train_wrmse += [test(train_data, model, metric=weighted_mse_loss)]
        train_rmse += [test(train_data, model, metric=F.mse_loss)]
        
        val_wrmse += [test(val_data, model, metric=weighted_mse_loss)]
        val_rmse += [test(val_data, model, metric=F.mse_loss)]
        
        test_wrmse += [test(test_data, model, metric=weighted_mse_loss)]
        test_rmse += [test(test_data, model, metric=F.mse_loss)]

        if epoch+1 %10==0:
            print(f'Epoch: {epoch+1:03d}, Loss: {loss[-1]:.4f}, Train: {train_rmse[-1]:.4f}, '
                  f'Val: {val_rmse[-1]:.4f}, Test: {test_rmse[-1]:.4f}')

        results = pd.DataFrame({
            'loss': loss,
            'train_rmse': train_rmse, 'val_rmse': val_rmse, 'test_rmse': test_rmse,
            'train_wrmse': train_wrmse, 'val_wrmse': val_wrmse, 'test_wrmse': test_wrmse,
            'time':(time.time()-t0)/60
        })
        
        ## Debugging
        #clear_output()
        '''
        print('\nloss: ', loss, 
              '\ntrain_rmse: ', train_rmse, 
              '\nval_rmse: ', val_rmse, 
              '\ntest_rmse: ', test_rmse,
              '\ntrain_wrmse: ', train_wrmse, 
              '\nval_wrmse: ', val_wrmse, 
              '\ntest_wrmse: ', test_wrmse,
              '\ntime: ', (time.time()-t0)/60)
        '''
        #visualize_loss(results, metric='wrmse').show()
        #print(results.to_string())

        # enable early stopping
        if (epoch > 1) and abs(loss[-1]/loss[-2]-1) < min_acc :
            k += 1
        if k> e_patience:
            print('Early stopping')
            break

    return results, model
"""

"\ndef train_test(model, model_params, learning_rate=0.01, e_patience = 10, min_acc= 0.05, n_epochs=500):\n    t0 = time.time()\n\n    model = model(**model_params) # Define the model\n\n    # Due to lazy initialization, we need to run one model step so the number\n    # of parameters can be inferred:\n    with torch.no_grad():\n        model.encoder(train_data.x_dict, train_data.edge_index_dict) # Run once with torch.no_grad() to get parameter for optimizer below\n\n    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n\n    k=0\n    loss, train_rmse, val_rmse, test_rmse = [], [], [], []\n    train_wrmse, val_wrmse, test_wrmse = [], [], []\n    for epoch in tqdm(range(n_epochs)):\n        # Call train fuction here >> return loss\n        loss += [train(train_data, model, optimizer, loss=weighted_mse_loss)]\n        \n        # Call test function here >> return RMSE loss\n        train_wrmse += [test(train_data, model, metric=weighted_mse_loss)]\n        train_rmse +=

In [80]:
'''
def visualize_loss(results, metric='rmse'):
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=results.index, y=results['train_'+metric], name = 'train_'+metric))
    fig.add_trace(go.Scatter(x=results.index, y=results['val_'+metric], name = 'val_'+metric))
    fig.add_trace(go.Scatter(x=results.index, y=results['test_'+metric], name = 'test_'+metric))
    fig.add_trace(go.Scatter(x=results.index, y=results['loss'], name = 'loss'))

    fig.update_yaxes(title_text=metric.upper())
    fig.update_xaxes(title_text="Epoch")

    return fig
'''

'\ndef visualize_loss(results, metric=\'rmse\'):\n    fig = go.Figure()\n\n    fig.add_trace(go.Scatter(x=results.index, y=results[\'train_\'+metric], name = \'train_\'+metric))\n    fig.add_trace(go.Scatter(x=results.index, y=results[\'val_\'+metric], name = \'val_\'+metric))\n    fig.add_trace(go.Scatter(x=results.index, y=results[\'test_\'+metric], name = \'test_\'+metric))\n    fig.add_trace(go.Scatter(x=results.index, y=results[\'loss\'], name = \'loss\'))\n\n    fig.update_yaxes(title_text=metric.upper())\n    fig.update_xaxes(title_text="Epoch")\n\n    return fig\n'

In [81]:
'''
N_EPOCHS = 300
E_PATIENCE = 50
LEARNING_RATE = 0.01

model_params = {"hidden_channels":32, 'conv':SAGEConv}

results, trained_model = train_test(
    Model, model_params, learning_rate=LEARNING_RATE, e_patience = E_PATIENCE, n_epochs=N_EPOCHS)
'''

'\nN_EPOCHS = 300\nE_PATIENCE = 50\nLEARNING_RATE = 0.01\n\nmodel_params = {"hidden_channels":32, \'conv\':SAGEConv}\n\nresults, trained_model = train_test(\n    Model, model_params, learning_rate=LEARNING_RATE, e_patience = E_PATIENCE, n_epochs=N_EPOCHS)\n'

In [82]:
#visualize_loss(results, metric='wrmse')

In [83]:
#trained_model.encoder(test_data.x_dict, test_data.edge_index_dict)['session']

In [84]:
#trained_model.state_dict()['encoder.linear2.session.weight']

# Recommendation

In [85]:
@torch.no_grad()
def recommendation(user_id, model, x_dict, edge_index_dict):
  # Get model decoder
  #model = Model(**model_params)
  with torch.no_grad():
    encoder = model.encoder(data.x_dict, data.edge_index_dict)

  # Get node representations for users and movies
  user_representations = encoder['session']
  movie_representations = encoder['aid']

  # Compute the dot product between user and movie representations to get edge weights
  edge_weights = user_representations.mm(movie_representations.T)

  # Make predictions for each user by taking the top k largest edge weights
  k = 20  # number of recommendations to make
  _, top_k_indices = edge_weights.topk(k, dim=1)
  recommendations = top_k_indices.numpy()

  # Print recommendations for the first user
  print(f'Recommendations for user {user_id}: {recommendations[user_id]}')

In [86]:
#session = 1
#recommendation(session, model, Rtest_data.x_dict, Rtest_data.edge_index_dict)

In [87]:
#session = 2
#recommendation(session, model, data.x_dict, data.edge_index_dict)

In [88]:
#session = 999
#recommendation(session, model, data.x_dict, data.edge_index_dict)

# Submission

Submission File
For each `session` id and `type` combination in the test set, you must predict the `aid` values in the `label` column, which is space delimited. You can predict up to 20 `aid` values per row. The file should contain a header and have the following format:

```
session_type,labels
12906577_clicks,135193 129431 119318 ...
12906577_carts,135193 129431 119318 ...
12906577_orders,135193 129431 119318 ...
12906578_clicks, 135193 129431 119318 ...
etc.
```