In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
import pickle
import os
from sklearn.decomposition import NMF, PCA
from sklearn.cluster import KMeans
from importlib import reload

import sys
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '/Users/madisonthantu/Desktop/DREAM/t-recs')
from trecs.metrics import MSEMeasurement, InteractionSpread, InteractionSpread, InteractionSimilarity, RecSimilarity, RMSEMeasurement, InteractionMeasurement
from trecs.components import Users
import trecs.matrix_ops as mo
from trecs.models import ContentFiltering
import seaborn as sns

sys.path.insert(1, '..')
import src.globals as globals
from wrapper.models.bubble import BubbleBurster
from src.utils import *
from src.plotting import plot_measurements 
from src.scoring_functions import cosine_sim, entropy, content_fairness
from wrapper.metrics.evaluation_metrics import *
from wrapper.metrics.evaluation_metrics import CosineSim
random_state = np.random.seed(42)
plt.style.use("seaborn")

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

  plt.style.use("seaborn")


In [2]:
n_attrs=20
max_iter=1000
n_clusters=25
binary_ratings_matrix = load_and_process_movielens(file_path='/Users/madisonthantu/Desktop/DREAM/data/ml-100k/u.data')

# Get user and item representations using NMF
user_representation, item_representation = create_embeddings(binary_ratings_matrix, n_attrs=n_attrs, max_iter=max_iter)

num_users = user_representation.shape[0]
num_items = item_representation.shape[1]
print(f'Number of items: {num_items}')
print(f'Number of users: {num_users}')

users = Users(
    actual_user_profiles=user_representation, 
    repeat_interactions=False, 
)

# Define topic clusters using NMF
item_cluster_ids, item_cluster_centers = get_clusters(item_representation.T, name='item', n_clusters=n_clusters, n_attrs=n_attrs, max_iter=max_iter)
user_cluster_ids, user_cluster_centers = get_clusters(user_representation, name='user', n_clusters=n_clusters, n_attrs=n_attrs, max_iter=max_iter)


# measurements = [
#     InteractionMeasurement(),
#     MSEMeasurement(),  
#     CosineSim(),
#     CosineSimPenalty(),
# ]

# Create BubbleBurster instance
recommender = BubbleBurster(
    user_representation=user_representation,
    item_representation=item_representation,
    item_topics=item_cluster_ids,
)

# add an MSE measurement
recommender.add_metrics(InteractionMeasurement(), MSEMeasurement(), CosineSim(), CosineSimPenalty())
# Run for 5 time steps
recommender.run(timesteps=5)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Loaded embeddings.
Number of items: 1682
Number of users: 943
Loaded clusters.
Loaded clusters.


100%|██████████| 5/5 [00:04<00:00,  1.00it/s]


In [3]:
measurements = recommender.get_measurements()
for i in measurements.keys():
    print(i)

interaction_histogram
mse
cos_sim
cos_sim_penalty
timesteps


In [4]:
measurements['cos_sim'][-1]

array([[1.50029729, 0.55459898, 4.35158201, ..., 0.        , 0.        ,
        4.22801487],
       [5.827362  , 0.04503505, 3.1045015 , ..., 3.56720479, 0.        ,
        0.        ],
       [0.01691043, 0.15668429, 0.64933786, ..., 2.58822517, 0.        ,
        1.07003921],
       ...,
       [5.39305317, 0.        , 2.42443198, ..., 0.08502001, 0.        ,
        0.        ],
       [0.7757051 , 0.53929879, 3.6072633 , ..., 0.8400577 , 0.        ,
        4.11137302],
       [1.08305808, 9.69671499, 0.        , ..., 0.        , 3.65501614,
        0.        ]])

In [5]:
measurements['cos_sim_penalty'][-1]

array([[0.85236761, 0.15120537, 0.44796589, ..., 0.03986426, 0.03986426,
        0.00553374],
       [1.43515193, 0.13067748, 0.23301177, ..., 0.09979839, 0.12709297,
        0.12709297],
       [0.04765707, 0.07352691, 0.10814812, ..., 0.02766491, 0.03709302,
        0.02870783],
       ...,
       [2.45211916, 0.04640799, 0.23021797, ..., 0.04594617, 0.04640799,
        0.04640799],
       [0.29600421, 0.12885741, 0.23883559, ..., 0.06703023, 0.07292693,
        0.03598388],
       [0.38377198, 1.11540677, 0.05683475, ..., 0.05683475, 0.02327457,
        0.05683475]])

In [6]:
recommender.predicted_scores.value

array([[8.27506326e-01, 1.16887102e-01, 4.51617456e-01, ...,
        0.00000000e+00, 0.00000000e+00, 7.94963399e-03],
       [1.36633258e+00, 4.03486107e-03, 1.36963818e-01, ...,
        8.37746738e-03, 0.00000000e+00, 0.00000000e+00],
       [1.07331627e-02, 3.80007375e-02, 7.75484855e-02, ...,
        1.64541470e-02, 0.00000000e+00, 2.31520453e-03],
       ...,
       [2.45964170e+00, 0.00000000e+00, 2.08054302e-01, ...,
        3.88381536e-04, 0.00000000e+00, 0.00000000e+00],
       [2.30834331e-01, 6.13234700e-02, 2.01981292e-01, ...,
        2.50387788e-03, 0.00000000e+00, 4.17068378e-03],
       [3.37767810e-01, 1.15553917e+00, 0.00000000e+00, ...,
        0.00000000e+00, 2.98997445e-03, 0.00000000e+00]])

In [7]:
recommender.run(timesteps=2)
print(measurements['timesteps'])
measurements['cos_sim'][-1]

100%|██████████| 2/2 [00:02<00:00,  1.01s/it]

[0 1 2 3 4 5]





array([[0.97118103, 0.40998671, 3.04508728, ..., 0.        , 0.        ,
        3.12555555],
       [3.93955648, 0.04026906, 2.0873394 , ..., 2.58658051, 0.        ,
        0.        ],
       [0.01207888, 0.11191735, 0.46381276, ..., 1.84873227, 0.        ,
        0.76431372],
       ...,
       [3.85218084, 0.        , 1.73173713, ..., 0.06072858, 0.        ,
        0.        ],
       [0.47163929, 0.39521147, 2.50545769, ..., 0.61660929, 0.        ,
        3.01291566],
       [0.77361291, 6.92622499, 0.        , ..., 0.        , 2.61072582,
        0.        ]])