In [53]:
import sys
sys.path.insert(1, '/Users/madisonthantu/Desktop/DREAM/t-recs')
from trecs.metrics import MSEMeasurement, InteractionSpread, InteractionSpread, InteractionSimilarity, RecSimilarity, RMSEMeasurement, InteractionMeasurement
from trecs.components import Users

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
from collections import defaultdict

sys.path.insert(1, '/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research')
from prelim_experiments.param_experiments.chaney_utils import (
    load_sim_results,
    graph_relative_to_ideal,
    merge_results,
    graph_metrics,
    graph_metrics_by_axis,
    graph_relative_to_global_by_axis,
    transform_relative_to_global,
    graph_histogram_metric_by_axis
)
from wrapper.models.bubble import BubbleBurster
from src.utils import compute_constrained_clusters, create_global_user_pairs, user_topic_mapping, create_cluster_user_pairs, load_and_process_movielens, compute_embeddings
from wrapper.metrics.clustering_metrics import MeanCosineSim, MeanDistanceFromCentroid, MeanCosineSimPerCluster, MeanDistanceFromCentroidPerCluster
from param_experiments.chaney_utils import *

import warnings
warnings.simplefilter("ignore")

import itertools

In [54]:
num_attributes=15
max_iter=1000
num_clusters = 10

data_path = '/Users/madisonthantu/Desktop/DREAM/data/ml-100k/u.data'
binary_ratings_matrix = load_and_process_movielens(file_path=data_path)
user_representation, item_representation = compute_embeddings(binary_ratings_matrix, n_attrs=num_attributes, max_iter=max_iter)

item_cluster_ids, item_cluster_centers = compute_constrained_clusters(embeddings=item_representation.T, name='item_clusters', n_clusters=num_clusters)
user_cluster_ids, user_cluster_centers = compute_constrained_clusters(embeddings=user_representation, name='user_clusters', n_clusters=num_clusters)
inter_cluster_user_pairs, intra_cluster_user_pairs = create_cluster_user_pairs(user_cluster_ids)
                
users = Users(
    actual_user_profiles=user_representation, 
    repeat_interactions=False,
    drift=0.05,
    attention_exp=-0.2
)

Calculating embeddings ...
... Calculated embeddings.
Calculating constrained clusters ...
... Calculated constrained clusters.
Calculating constrained clusters ...
... Calculated constrained clusters.


In [55]:
metrics_w_diagnostics_list = [
    MSEMeasurement(diagnostics=True),
    MeanCosineSim(pairs=intra_cluster_user_pairs, name='mean_intra_cluster_cosine_sim', diagnostics=True),
    InteractionSimilarity(pairs=intra_cluster_user_pairs,diagnostics=True, name="test_interaction_similarity")
]
metrics_wo_diagnostics_list = [InteractionSpread()]
metrics_list = metrics_w_diagnostics_list + metrics_wo_diagnostics_list

In [56]:
train_timesteps = 5
run_timesteps = 10
repeated_training = 1

bubble = BubbleBurster(
    actual_user_representation=users, 
    actual_item_representation=item_representation,
    item_topics=item_cluster_ids,
    num_attributes=num_attributes,
    num_items_per_iter=10,
    record_base_state=True
)
bubble.add_metrics(*metrics_list)
bubble.startup_and_train(timesteps=train_timesteps)
bubble.run(timesteps=run_timesteps, train_between_steps=repeated_training)
bubble.close() # end logging

100%|██████████| 5/5 [00:01<00:00,  3.36it/s]
100%|██████████| 10/10 [00:15<00:00,  1.51s/it]


In [57]:
measurements = bubble.get_measurements()

In [78]:
result_metrics = defaultdict(list)

In [80]:
for metric in metrics_list:
    result_metrics[metric.name].append(process_measurement(bubble, metric.name))
    # print(metric.name)

In [81]:
result_metrics

defaultdict(list,
            {'mse': [1,
              [0.09144564835006835,
               0.0957836629552253,
               0.10007743851719117,
               0.1045839146053082,
               0.10928760765607394,
               1.0237273979777277,
               1.0996808708861778,
               1.1542909053893755,
               1.1908593622563945,
               1.21663097462914,
               1.233791885307998,
               1.2446184647657303,
               1.2515526175522285,
               1.255069074910466,
               1.2573158439584424]],
             'mean_intra_cluster_cosine_sim': [1,
              [0.3813411811521992,
               0.3928420847534733,
               0.40403600079712304,
               0.4164313433904737,
               0.4275106078651485,
               0.44020599574112285,
               0.45180532321370703,
               0.46474422634822904,
               0.4779642747484245,
               0.4916239165939388,
               0.50625873505

In [8]:
diag_list = ["mean", "std", "median", "min", "max", "skew"]
diagnostics['mse'][diag_list]
# diagnostics.keys()

Unnamed: 0,mean,std,median,min,max,skew
0,0.01423,0.008812,0.012875,0.000516,0.063828,1.158491
1,0.0157,0.008883,0.014421,0.001303,0.064469,1.086996
2,0.017275,0.008961,0.01627,0.002484,0.065686,1.055427
3,0.018897,0.009,0.017574,0.003644,0.066405,0.985687
4,0.02055,0.009053,0.019319,0.003664,0.06874,0.936281
5,0.022242,0.009081,0.020968,0.005221,0.068781,0.883361
6,0.675294,0.213838,0.654317,0.24871,1.905435,1.100111
7,0.741508,0.214519,0.722215,0.259617,1.871262,0.690505
8,0.787108,0.214238,0.770538,0.251582,1.643552,0.418738
9,0.818678,0.213941,0.809003,0.252717,1.548824,0.265251


In [48]:
def process_diagnostic(metric, diagnostics_list):
    return metric.get_diagnostics()[diagnostics_list].to_dict(orient='list')
    # print (metric.get_diagnostics()[diagnostics_list]).to_dict(orient='list')
    # print(metric.get_diagnostics()[diagnostics_list])
        

In [52]:
diag_list = ["mean", "std", "median", "min", "max", "skew"]

all_diagnostics = defaultdict()

for metric in metrics_w_diagnostics_list:
    print(process_diagnostic(metric, diag_list))
    # break

{'mean': [0.01423034384390422, 0.015700215619471602, 0.017274662639813407, 0.0188965687659223, 0.020549854672851614, 0.02224243482023592, 0.6752944962777849, 0.7415084150547003, 0.7871081987649307, 0.8186776140446618, 0.8385147446112352, 0.8503411072595964, 0.8579596211155391, 0.8623721374839589, 0.8648982253007456, 0.8656020674313918], 'std': [0.008812077902400869, 0.008882883294071022, 0.008960951493973872, 0.008999999375182564, 0.009052753555376592, 0.009080838741648375, 0.21383831390594774, 0.2145185044967923, 0.21423787597576882, 0.2139408054074752, 0.21181619047210554, 0.20943224550651618, 0.20726735724407142, 0.2059075974239598, 0.20558514751981716, 0.20581280532745544], 'median': [0.012875172148730508, 0.014420596024314612, 0.016270240789735452, 0.01757421957670112, 0.019319338742851144, 0.02096823500702274, 0.654316841883011, 0.7222150848693425, 0.770538353317996, 0.8090033480899467, 0.8257736663633666, 0.8401841507809195, 0.8501737502112439, 0.8544121137712829, 0.851673031732

In [50]:
all_diagnostics.keys()

dict_keys(['mse', 'mean_intra_cluster_cosine_sim', 'test_interaction_similarity'])

In [51]:
all_diagnostics

defaultdict(None,
            {'mse': {'mean': [0.01423034384390422,
               0.015700215619471602,
               0.017274662639813407,
               0.0188965687659223,
               0.020549854672851614,
               0.02224243482023592,
               0.6752944962777849,
               0.7415084150547003,
               0.7871081987649307,
               0.8186776140446618,
               0.8385147446112352,
               0.8503411072595964,
               0.8579596211155391,
               0.8623721374839589,
               0.8648982253007456,
               0.8656020674313918],
              'std': [0.008812077902400869,
               0.008882883294071022,
               0.008960951493973872,
               0.008999999375182564,
               0.009052753555376592,
               0.009080838741648375,
               0.21383831390594774,
               0.2145185044967923,
               0.21423787597576882,
               0.2139408054074752,
               0.2118161904

In [33]:
# all_diagnostics["mean_intra_cluster_cosine_sim"]["mean"]
all_diagnostics["mean_intra_cluster_cosine_sim"]

{'mean': [0.38668417516389225,
  0.3965249054620953,
  0.40568720826165044,
  0.4150593164833908,
  0.4251472717287612,
  0.43477801899072094,
  0.44494760630456953,
  0.4563853532388864,
  0.4692559344667218,
  0.48238198872849586,
  0.4955697414200381,
  0.5095465406356313,
  0.5234856280243864,
  0.5375847796201138,
  0.5513077506639585,
  0.5656502844550566],
 'std': [0.3130538026464558,
  0.30809349862964763,
  0.3033884136380633,
  0.29899826599682855,
  0.29434667445050583,
  0.2899322475792117,
  0.284998006866479,
  0.27898396690819766,
  0.27198214106039337,
  0.26514274246228065,
  0.2577685196784447,
  0.25016749018093365,
  0.2416793598825684,
  0.23318320073633586,
  0.22520467986772968,
  0.21689623737770208],
 'median': [0.31915778425485847,
  0.3332336295114793,
  0.34499767097837974,
  0.35767947869514466,
  0.371528955335117,
  0.38508960979467843,
  0.39813721368688515,
  0.4130074848564508,
  0.4299549276466046,
  0.4467844926506554,
  0.46313841549528634,
  0.4804

In [15]:
measurements["mean_intra_cluster_cosine_sim"]

[0.38668417516389225,
 0.3965249054620953,
 0.40568720826165044,
 0.4150593164833908,
 0.4251472717287612,
 0.43477801899072094,
 0.44494760630456953,
 0.4563853532388864,
 0.4692559344667218,
 0.48238198872849586,
 0.4955697414200381,
 0.5095465406356313,
 0.5234856280243864,
 0.5375847796201138,
 0.5513077506639585,
 0.5656502844550566]

In [83]:
bubble.get_system_state().keys()

dict_keys(['predicted_users', 'actual_user_scores', 'predicted_items', 'predicted_user_scores', 'timesteps'])

In [89]:
np.array([])

array([], dtype=float64)