# imports

In [None]:
import pickle
from srgnn_datasets import SRGNN_Map_Dataset, Augment_Matrix_Dataset, SRGNN_sampler, Clusters_Matrix_Dataset
from utils import load_model, get_dataset, load_model_gm
import os

from torch.utils.data import DataLoader

import numpy as np
import pandas as pd

from tqdm import tqdm

import torch
from pytorch_lightning import Trainer
from math import ceil

In [2]:
torch.set_float32_matmul_precision('medium')

In [3]:
def get_metrics_by_hand(model, dataloaders):
    m=[]
    for i, d in enumerate(dataloaders):
        hit,mrr=[],[]
        for batch in tqdm(d, total=ceil(normal_test_dataset.length/opt.batchSize)):
            batch=[x.to('cuda') for x in batch]
            sub_scores, targets=model.predict_step(batch)
            targets=targets.flatten()
            for score, target in zip(sub_scores, targets):
                correct_pred=torch.isin(target - 1, score)
                hit.append(correct_pred.cpu().numpy())
                if not correct_pred:
                    mrr.append(0)
                else:
                    mrr.append(1 / (torch.where(score == target - 1)[0][0] + 1).cpu().numpy())
        m.append((i, 100*np.average(hit),100*np.average(mrr)))
    return m


# otto-recsys

In [4]:
runs=['7dt7uu2i', '31mccq76', 'f42h3b7i', 'z5gthlar', 'icirdid1',
    'q5ncdos3',
 '20x2q8ap',
 'edk5gwaj',
 'eyipcpfr',
 'u1769vwh',
 '7toa2ybx',
 'zrkfqp80',
 '0dqi0cx4',
 'leop64a1',
 'c4l0cw03',
 'rufekv8o',
 'zja7utqy',
 'lc8hf28y',
 'pi1q0ni6',
 '78wwq4e4',

'7oziz2pv',
'77r7ocil',
'umy0p2nx',
'ch778pru',
'dn0mhesi',
'qpljbobb',
's8wy4wxs',
        ]


In [None]:
run_id=runs[0]

model,opt=load_model(run_id)
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue

    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [7]:
result_df=pd.DataFrame(res_df)

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_noise_p','augment_alg']].groupby(['augment_matrix', 'augment_noise_p', 'augment_alg']).mean()

# yoochoose 1/64

## baseline models

In [None]:
run_id='7zi9x9w8'

model,opt=load_model(run_id)
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))

In [None]:
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )

In [None]:
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [9]:
runs=[
    '7zi9x9w8'
]

In [None]:
for run_id in runs:
    model,opt=load_model(run_id)
    print('Metrics on normal Adjacency matrix')
    print(run_id)
    trainer.test(model, normal_test_dataloader)

## augmented models

In [12]:
runs=[ # yoochoose1/64
    'qgkxyze7',
    'r1xr7g4v',
    'itxri54t',
    '7vvqd0ib',
    'jefoas5f',
    'ai6ytfw2',
    'xm3z645m',
    'zf87zj40',    
]

In [None]:
res_df=[]
for run_id in runs:
    model,opt=load_model(run_id)
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    print('Distnace Augmentation:', opt.augment_matrix,
        'Clusters:', opt.augment_clusters,
          'Categories:', opt.augment_categories,
          'Noise std: ', opt.augment_std,
          'base model', opt.augment_old_run_id,
          )
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    metrics['augment_nogmm']=opt.augment_nogmm
    res_df.append(metrics)

In [None]:
res_df=pd.DataFrame(res_df)
res_df.drop(columns='run_id').groupby('augment_nogmm').mean()

In [None]:
results={}
for run_id in runs:
    model,opt=load_model(run_id)
    #dataset=get_dataset(opt)
   # am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
     #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
      #                   )
    print(run_id)
    print('Distnace Augmentation:', opt.augment_matrix,
        'Clusters:', opt.augment_clusters,
          'Categories:', opt.augment_categories,
          'Noise std: ', opt.augment_std,
          'base model', opt.augment_old_run_id,
          )
    results[run_id]=get_metrics_by_hand(model, [normal_test_dataloader])

In [28]:
manual_res_df=pd.DataFrame(results).T.reset_index()
manual_res_df[['DataLoader_id','hit','mrr']]=pd.DataFrame(manual_res_df[0].to_list(), columns=['DataLoader_id','hit','mrr'])
manual_res_df.rename(columns={'index':'run_id'}, inplace=True)
manual_res_df.drop(columns=0, inplace=True)

In [None]:
fullres_df

In [None]:
res_df=res_df.merge(manual_res_df,  on='run_id')
res_df.drop(columns='run_id').groupby('augment_nogmm').mean()

## final comparison 

In [15]:
runs="""jcohtdpz
c7lixdzn
9r9e7dsx
lx4jtu4g
au07imlq
u337t04h
k7eoaxze
ln32zyo6
ud3mdfu3
zw2j8lcv
k598voxs
r5p9ms90
qhcln02h
1ttxe7ox
cqw7xz4i
gd66gitx
alh21te8
da4ptrb0
khflo2wo
uyyjj6l8
554tdh98
eoart48b
had1602b
0d2jc7tz
8r761av3
svgb60go
4627kuot
ye95z6m4""".split('\n')


In [27]:
runs=['ahsuiwog', 'e8j91dlm', 'sn0yz16r', 'zhy3fnyf', 'c31yywda']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue

    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [29]:
result_df=pd.DataFrame(res_df)

In [None]:
# noise only
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_alg']].groupby(['augment_matrix','augment_alg']).mean()

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_alg']].groupby(['augment_matrix','augment_alg']).mean()

# yoochoose 1/4

In [21]:
runs="""3j5nkw4m
fxrwajhe
vmbjvdhs
6vpf6swz
hbuhmnxw
za9sjn76
bchj1eho""".split('\n')


In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue

    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [23]:
result_df=pd.DataFrame(res_df)

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_alg']]

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_alg']].groupby(['augment_matrix','augment_alg']).mean()

# diginetica

## baseline models

In [None]:
run_id='qm2ur7o3'

model,opt=load_model(run_id)
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))

In [None]:
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )

In [None]:
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [8]:
runs=[ ## default hparams, as in paper
    'run-20240916_203347-izcd5fci',
    'run-20240916_200756-q3fjvapa',
]

In [13]:
runs=[
    ## my hparams
    'qm2ur7o3',
    '3abge2uq',
    '4dm99qnd',
    'jxgwsuta',
]

In [None]:
res_df=[]
for run_id in runs:
    model,opt=load_model(run_id)
    print('Metrics on normal Adjacency matrix')
    print(run_id)
    metrics=trainer.test(model, normal_test_dataloader)[0]
    metrics['run_id']=run_id
    res_df.append(metrics)

In [None]:
pd.DataFrame(res_df)[['test_loss','test_hit','test_mrr']].mean()

## only noise

In [10]:
runs=['s4uxxqad', 'rbwxpsy6', 'ba46mnkl', 'h2a8ujnt']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)
res_df=pd.DataFrame(res_df)
res_df.groupby('augment_matrix')[['test_loss', 'test_hit', 'test_mrr',]].mean()

## augmented models

In [21]:
runs=[ 
    '9kf534bm',
    'nbhakjb7',
    'fmm07us9',
    'wtqp9kti',
    'dlnjkkym',
    'rrqcjbjh',
    '4kxamho0',
    'cx8cnx7m',    
    'seb1ybp1',    
    'bgqjpxh8',    
    'r8d4fcqv',    
    'ey3bjtz5',    
    '8npur3kf',  
    '21wtiveg',
    'run-20240916_191647-vyrrq40z',
    'run-20240916_185628-j8kqiv89',
]

In [12]:
runs=[
    'run-20240916_191647-vyrrq40z',
    'run-20240916_185628-j8kqiv89',
]

In [21]:
### 4 times per each # gmms
runs=['yp1n5fb3', 'naj1qrsy', 'pl1wvihn', 's7r181i0', 'fp4lprkj',
       'yrq35hmp', 'pxgponas', '31hc2s4t', 'v94f9jmw', '9wa54kna',
       'xbpg8ncx', 'kltkp2te', 'z7mdp78e', 'o9t9gt9v', 'qdxqwnrw',
       '0zl3ui7x', '7jkmaij6', 'ud4ap40l', '5k1ezsql', 'zkgt4s6x',
       'egqyw7yg', '6ixyzjk9', 'ub1h5qr3', '8iea3bxp', '8f9v8z85',
       'f80nrp4r', 'b60x4mtc', 'm8n6p80h', '26kyds0s', '628jlwv2',
       'dwgw5a3v', '18j6ce86', 'b5pfaja0', 'rwhslx2m', 'gt0rzzmz',
       'xscwi8xw', 'kkpuxgtn']+['c7pzwt2z', '17uejl8y', '57zrr3vh', 'op22qkq4']+['uxt9w9tv', '1qly30ga', 'y7yqfh84', '6i71w436']

In [31]:
runs=[
    'run-20241002_162623-kkpuxgtn',
    'run-20241002_160557-9ueod1xy',
]

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [23]:
res_df=pd.DataFrame(res_df)

In [None]:
res_df.drop(columns='run_id').groupby(['augment_nogmm','augment_old_run_id'])[['test_loss', 'test_hit', 'test_mrr',]].mean()

In [None]:
res_df.drop(columns='run_id').groupby(['augment_alg','augment_nogmm'])[['test_loss', 'test_hit', 'test_mrr',]].mean()

In [None]:
res_df.loc[res_df.augment_nogmm==8][['test_loss', 'test_hit', 'test_mrr','augment_alg', 'augment_old_run_id']]

In [None]:
display(res_df)

In [None]:
results={}
for run_id in runs:
  try:
    model,opt=load_model(run_id)
  except IndexError:
      continue    #dataset=get_dataset(opt)
  # am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
  #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
  #                   )
  print(run_id)
  print('Distnace Augmentation:', opt.augment_matrix,
      'Clusters:', opt.augment_clusters,
        'Categories:', opt.augment_categories,
        'Noise std: ', opt.augment_std,
        'base model', opt.augment_old_run_id,
        )
  metrics=get_metrics_by_hand(model, [normal_test_dataloader])
  results[run_id]=metrics
  print(metrics)

In [16]:
manual_res_df=pd.DataFrame(results).T.reset_index()
manual_res_df[['DataLoader_id','hit','mrr']]=pd.DataFrame(manual_res_df[0].to_list(), columns=['DataLoader_id','hit','mrr'])
manual_res_df.rename(columns={'index':'run_id'}, inplace=True)
manual_res_df.drop(columns=0, inplace=True)

In [None]:
res_df=res_df.merge(manual_res_df,  on='run_id')

In [None]:
res_df.head()

In [None]:
res_df.drop(columns='run_id').groupby('augment_nogmm')[['test_loss', 'test_hit', 'hit_x','test_mrr','mrr_x']].mean()

## categories runs for comparison

In [2]:
runs=['c7pzwt2z', '17uejl8y', '57zrr3vh', 'op22qkq4']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [10]:
res_df=pd.DataFrame(res_df)

In [None]:
res_df.drop(columns='run_id').groupby(['augment_nogmm'])[['test_loss', 'test_hit', 'test_mrr',]].mean()

## i2i distance

In [18]:
runs=['yfd2pi3n', 'ss45a3b3', 'nwgcf1hh', '8llxhkxm']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [20]:
res_df=pd.DataFrame(res_df)

In [None]:
res_df.drop(columns='run_id').groupby(['augment_nogmm'])[['test_loss', 'test_hit', 'test_mrr',]].mean()

In [None]:
results={}
for run_id in runs:
  try:
    model,opt=load_model(run_id)
  except IndexError:
      continue    #dataset=get_dataset(opt)
  # am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
  #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
  #                   )
  print(run_id)
  print('Distnace Augmentation:', opt.augment_matrix,
      'Clusters:', opt.augment_clusters,
        'Categories:', opt.augment_categories,
        'Noise std: ', opt.augment_std,
        'base model', opt.augment_old_run_id,
        )
  metrics=get_metrics_by_hand(model, [normal_test_dataloader])
  results[run_id]=metrics
  print(metrics)
manual_res_df=pd.DataFrame(results).T.reset_index()
manual_res_df[['DataLoader_id','hit','mrr']]=pd.DataFrame(manual_res_df[0].to_list(), columns=['DataLoader_id','hit','mrr'])
manual_res_df.rename(columns={'index':'run_id'}, inplace=True)
manual_res_df.drop(columns=0, inplace=True)
res_df=res_df.merge(manual_res_df,  on='run_id')

In [None]:
res_df.drop(columns='run_id').groupby('augment_nogmm')[['test_loss', 'test_hit', 'hit','test_mrr','mrr']].mean()

## i2i no normalization aug_p=1

In [8]:
runs=['s2qhugqc', '1jvw1mc3', 'gpx6kyhh', 'vx4bvbxf']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)
res_df=pd.DataFrame(res_df)
res_df.groupby('augment_matrix')[['test_loss', 'test_hit', 'test_mrr',]].mean()

## i2i no normalization

In [7]:
runs=['o6eky4nz', '0todhcp5', 'czhmlmnc', 'cc0e54iw']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)
res_df=pd.DataFrame(res_df)
res_df.groupby('augment_matrix')[['test_loss', 'test_hit', 'test_mrr',]].mean()

## kmeans augmented runs

In [12]:
runs=['uxt9w9tv', '1qly30ga', 'y7yqfh84', '6i71w436']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [14]:
res_df=pd.DataFrame(res_df)

In [None]:
res_df.drop(columns='run_id').groupby(['augment_nogmm'])[['test_loss', 'test_hit', 'test_mrr',]].mean()

In [None]:
results={}
for run_id in runs:
  try:
    model,opt=load_model(run_id)
  except IndexError:
      continue    #dataset=get_dataset(opt)
  # am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
  #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
  #                   )
  print(run_id)
  print('Distnace Augmentation:', opt.augment_matrix,
      'Clusters:', opt.augment_clusters,
        'Categories:', opt.augment_categories,
        'Noise std: ', opt.augment_std,
        'base model', opt.augment_old_run_id,
        )
  metrics=get_metrics_by_hand(model, [normal_test_dataloader])
  results[run_id]=metrics
  print(metrics)
manual_res_df=pd.DataFrame(results).T.reset_index()
manual_res_df[['DataLoader_id','hit','mrr']]=pd.DataFrame(manual_res_df[0].to_list(), columns=['DataLoader_id','hit','mrr'])
manual_res_df.rename(columns={'index':'run_id'}, inplace=True)
manual_res_df.drop(columns=0, inplace=True)
res_df=res_df.merge(manual_res_df,  on='run_id')

In [None]:
res_df.drop(columns='run_id').groupby('augment_nogmm')[['test_loss', 'test_hit', 'hit','test_mrr','mrr']].mean()

## hidden size = 16

In [4]:
run_id='run-20240930_183501-kmnrqi6i'

aug_run_ids=[
    'run-20241002_140249-xros8ub6',
    'run-20241002_131153-wijus07w',
    'run-20241002_124004-pkv65nix',
    'run-20241002_121528-v3ukvj2b',
]

In [None]:
model,opt=load_model(run_id)
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )

In [None]:
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [None]:
for test_run_id in [run_id]+aug_run_ids:
    model,opt=load_model(test_run_id)

    print(test_run_id, opt.augment_matrix, opt.augment_alg)
    trainer.test(model, normal_test_dataloader)

## augment + GM layer

In [None]:
runs=['v9h49456', 'c7ihc3ll', 'jadnjsru', 'pbreuu0o']
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model_gm(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)
res_df=pd.DataFrame(res_df)
res_df.groupby('augment_matrix')[['test_loss', 'test_hit', 'test_mrr',]].mean()

In [11]:
runs=['zq77vmnb','fpu7ucbf']+['12bdl9rw', 'wsix33ma', '1lh9waxz', 'nqk6cfps']+['n02c6569', 'q9w5okna', 'ltt7vsmv', '7o8vwuxv']

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model_gm(run_id)
    except IndexError:
       continue
    #dataset=get_dataset(opt)
    #am_test_dataloader=DataLoader(dataset,    num_workers=os.cpu_count(),  
    #                       sampler=SRGNN_sampler(dataset, opt.batchSize, shuffle=False, drop_last=False)
    #                     )
    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [15]:
res_df=pd.DataFrame(res_df)

In [None]:
res_df.groupby('augment_matrix')[['test_loss', 'test_hit', 'test_mrr',]].mean()

In [None]:
res_df

# TAGNN

## otto-recsys

In [6]:
runs = ['5f8cxkla', 'bqpaq7d1', 'onjpsmm2',
        '14oq6k8n',
  'uebl28zc',
  'tfeek0n2',
  '6u14u7kn',
  'gcvf1rcu',
  'ih8175d2',
  'l2w97z5v',
  '1uwq99zx',
  '4l1c4ls3']

In [None]:
run_id=runs[0]

model,opt=load_model(run_id)
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue

    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [9]:
result_df=pd.DataFrame(res_df)

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_noise_p','augment_alg']].groupby(['augment_matrix', 'augment_noise_p', 'augment_alg']).mean()

## yoochoose 1/64

In [4]:
runs = [
 'r7bs25i7',
 '2ssf7s2q',
 'hucuxl8g',
 '1iwv7ul8',
 'sqvv043i',
 '6hkj647n'   
]

In [None]:
run_id=runs[0]

model,opt=load_model(run_id)
opt.batchSize=32
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue

    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [7]:
result_df=pd.DataFrame(res_df)

In [None]:
result_df

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_noise_p','augment_alg']].groupby(['augment_matrix', 'augment_noise_p', 'augment_alg']).mean()

## Diginetica

In [4]:
runs = ['uasx1mwm', 'kig9t38y', 'i0o5vrfp', 'keifx1eb','w0dp5onn','paiznfvc']

In [None]:
run_id=runs[0]

model,opt=load_model(run_id)
opt.batchSize=32
test_data = pickle.load(open('../datasets/' + opt.dataset + '/test.txt', 'rb'))
normal_test_dataset=SRGNN_Map_Dataset(test_data, shuffle=False)

normal_test_dataloader=DataLoader(normal_test_dataset, 
                            num_workers=os.cpu_count(),  
                            sampler=SRGNN_sampler(normal_test_dataset, opt.batchSize, shuffle=False, drop_last=False)
                            )
trainer=Trainer(limit_test_batches=ceil(normal_test_dataset.length/opt.batchSize),
                limit_predict_batches=ceil(normal_test_dataset.length/opt.batchSize))

In [None]:
res_df=[]
for run_id in runs:
    try:
      model,opt=load_model(run_id)
    except IndexError:
       continue

    print(run_id)
    metrics=trainer.test(model, {#'augmented':am_test_dataloader, 
                                 'normal':normal_test_dataloader})[0 ]
    metrics['run_id']=run_id
    for k,v in opt.__dict__.items():
       metrics[k]=v
    res_df.append(metrics)

In [7]:
result_df=pd.DataFrame(res_df)

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'run_id','augment_matrix','augment_alg']]

In [None]:
result_df[['test_loss', 'test_hit', 'test_mrr', 'augment_matrix','augment_noise_p','augment_alg']].groupby(['augment_matrix', 'augment_noise_p', 'augment_alg']).mean()