In [3]:
import sys
sys.path.insert(1, '/Users/madisonthantu/Desktop/DREAM/t-recs')
from trecs.metrics import MSEMeasurement, InteractionSpread, InteractionSpread, InteractionSimilarity, RecSimilarity, RMSEMeasurement, InteractionMeasurement
from trecs.components import Users

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.ndimage import gaussian_filter1d
from collections import defaultdict

sys.path.insert(1, '/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research')
from wrapper.models.bubble import BubbleBurster
from src.utils import compute_constrained_clusters, create_global_user_pairs, user_topic_mapping, create_cluster_user_pairs, load_and_process_movielens, compute_embeddings
from wrapper.metrics.clustering_metrics import MeanCosineSim, MeanDistanceFromCentroid, MeanCosineSimPerCluster, MeanDistanceFromCentroidPerCluster
from src.chaney_utils import *

import warnings
warnings.simplefilter("ignore")

import itertools
import os

from src.utils import user_topic_mapping as user_topic_mapping_func
from src.plotting import *

from sklearn.decomposition import PCA #Principal Component Analysis
from sklearn.manifold import TSNE #T-Distributed Stochastic Neighbor Embedding
from sklearn.cluster import KMeans #K-Means Clustering
from sklearn.preprocessing import StandardScaler

import plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.subplots import make_subplots

In [4]:
num_sims = 1
num_users = 943
num_clusters = 15

results_paths = {
    'repeated_training': ['sim_results/simulation1/repeated_training'],
    'single_training': ['sim_results/simulation1/single_training']
}
results_file = ["sim_results.pkl"]
repeated_training_results = merge_results(results_paths['repeated_training'], results_file)
single_training_results = merge_results(results_paths['single_training'], results_file)
results = {
    'single_training':single_training_results,
    'repeated_training':repeated_training_results
}
metric_keys = list(repeated_training_results.keys())
model_keys = list(repeated_training_results[metric_keys[0]].keys())
num_timesteps = len(repeated_training_results[metric_keys[0]][model_keys[0]][0])

environment_file = ["sim_environment.pkl"]
repeated_training_env = merge_results(results_paths['repeated_training'], environment_file)
single_training_env = merge_results(results_paths['single_training'], environment_file)
environments = {
    'repeated_training': repeated_training_env,
    'single_training': single_training_env
}
env_keys = list(repeated_training_env.keys())
model_keys = list(repeated_training_env[env_keys[0]].keys())

model_names_readable = {
    'baseline_myopic':'Myopic',
    'repeated_items_repeat_interactions':'Repeatable',
    'probabilistic':'Probabilistic',
    'random':'Random',
    'random_interleaving':'Random Interleaving',
    'xquad_binary_0.1': "Binary XquAD, α=0.1",
    'xquad_binary_0.25': "Binary XquAD, α=0.25",
    'xquad_smooth_0.1': "Smooth XquAD, α=0.1",
    'xquad_smooth_0.25': "Smooth XquAD, α=0.25"
}

In [5]:
def user_topic_distance(user_embeddings, topic_centroids):
    user_topic_mapping = user_topic_mapping_func(user_embeddings, topic_centroids)
    user_topic_centroids = topic_centroids[user_topic_mapping, :]
    user_topic_distance = np.linalg.norm(np.subtract(user_embeddings, user_topic_centroids), axis=1)
    return user_topic_distance

In [6]:
def generate_user_cluster_stats_df(df, num_clusts=15, name=''):
    result_df = pd.DataFrame(columns=['Cluster ID', 'No. initial users', 'Initial mean distance', 'Initial SD', 'No. final users', 'Final mean distance', 'Final SD'])
    result_df.name = name
    
    for clust_id in range(num_clusts):
        data = [
            clust_id,
            df[df.initial_cluster == clust_id].shape[0],
            df[df.initial_cluster == clust_id]['t_1'].mean(),
            df[df.initial_cluster == clust_id]['t_1'].std(),
            df[df.final_cluster == clust_id].shape[0],
            df[df.final_cluster == clust_id]['t_100'].mean(),
            df[df.final_cluster == clust_id]['t_100'].std()
        ]
        result_df.loc[len(result_df.index)] = data
    
    result_df['Cluster ID'] = result_df['Cluster ID'].astype('int')
    result_df.set_index('Cluster ID', inplace=True)    
    return result_df

In [7]:
def generate_user_topic_stats_df(df, num_clusts=15, name=''):
    result_df = pd.DataFrame(columns=['Cluster ID', 'No. initial users', 'Initial mean distance', 'Initial SD', 'No. final users', 'Final mean distance', 'Final SD'])
    result_df.name = name
    for clust_id in range(num_clusts):
        data = [
            int(clust_id),
            df[df.initial_topic_cluster == clust_id].shape[0],
            df[df.initial_topic_cluster == clust_id]['initial_topic_dist'].mean(),
            df[df.initial_topic_cluster == clust_id]['initial_topic_dist'].std(),
            df[df.final_topic_cluster == clust_id].shape[0],
            df[df.final_topic_cluster == clust_id]['final_topic_dist'].mean(),
            df[df.final_topic_cluster == clust_id]['final_topic_dist'].std()
        ]
        result_df.loc[len(result_df.index)] = data
        
    result_df['Cluster ID'] = result_df['Cluster ID'].astype('int')
    result_df.set_index('Cluster ID', inplace=True)    
        
    return result_df

In [42]:
def plot_violin(df, plot_subject, model_name='', training_type=-1, num_clusters=15):
    print(model_names_readable[model_name])
    fig = go.Figure()

    cluster_ids = [c for c in range(num_clusters)]
    
    if plot_subject == 'user_cluster':
        x_initial, x_final = 'initial_cluster', 'final_cluster'
        y_initial, y_final = 't_1', 't_100'
        initial_label = 'Initial user cluster assignment'
        final_label = 'Final user cluster assignment'
        plot_title = f"Distance from user embedding to cluster centroid - {model_name}"
    elif plot_subject == 'user_topic':
        x_initial, x_final = 'initial_topic_cluster', 'final_topic_cluster'
        y_initial, y_final = 'initial_topic_dist', 'final_topic_dist'
        plot_title = f"Distance from user embedding to topic centroid - {model_name}"
        initial_label = 'Initial user-topic mapping'
        final_label = 'Final user-topic mapping'
    
    if training_type > -1:
        df_plot = df[df['repeated_training'] == training_type]
        if training_type == 1:
            plot_title += ", Repeated training"
            # plot_title = "Repeated training"
        elif training_type == 0:
            plot_title += ", Single training"
            # plot_title = "Single training"
    else:
        df_plot = df
    
    initial_legend = True
    final_legend = True
    for clust_id in cluster_ids:
        # Plotting distribution of INITIAL user distance to INITIAL cluster by cluster
        fig.add_trace(go.Violin(x=df_plot[x_initial][df_plot[x_initial] == clust_id],
                                y=df_plot[y_initial][df_plot[x_initial] == clust_id],
                                legendgroup=f"Initial",
                                name=initial_label,
                                box_visible=True,
                                line_color='lightseagreen',
                                meanline_visible=True,
                                showlegend=(initial_legend),
                                ))
        # Plotting distribution of FINAL user distance to FINAL cluster 
        fig.add_trace(go.Violin(x=df_plot[x_final][df_plot[x_final] == clust_id],
                                y=df_plot[y_final][df_plot[x_final] == clust_id],
                                legendgroup='Final',
                                name=final_label,
                                box_visible=True,
                                line_color='mediumpurple',
                                meanline_visible=True,
                                showlegend=(final_legend)
                                ))
        if df_plot[y_initial][df_plot[x_initial] == clust_id].size > 0:
            initial_legend = False
        if df_plot[y_final][df_plot[x_final] == clust_id].size > 0:
            final_legend = False
        
    # fig.update_layout(violinmode='group')
    width_offset = 100 if len(plot_title)>80 else 0
    fig.update_layout(
        height=300,
        width=700 + width_offset,
        title={
        'text': plot_title,
        'y':0.95,
        'x':0.05,
        'xanchor': 'left',
        'yanchor': 'top'},
        xaxis_title="Cluster ID",
        yaxis_title="Distance",
        # legend_title="Cluster assignment before v. after ",
        xaxis = dict(ticktext=cluster_ids, tick0=0, dtick=1),
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
        margin=dict(l=20, r=10, t=50, b=20),
    )
    return fig

In [82]:
def plot_bar(df, plot_subject, model_name='', plot_var = 'No. users', num_clusters=15):
    index = [f"{i}" for i in range(num_clusters)]
    if plot_subject == 'user_cluster':
        plot_title = f"{plot_var} per user cluster - {model_names_readable[model_name]}"
        single_training_df = generate_user_cluster_stats_df(df[df['repeated_training'] == 0], num_clusters, name="User cluster count, single training")
        repeated_training_df = generate_user_cluster_stats_df(df[df['repeated_training'] == 1], num_clusters, name="User cluster count, repeated training")
        plot_legend = dict(yanchor="top", y=0.99, xanchor="left", x=0.01, font=dict(size= 11))
        # plot_legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99)
    elif plot_subject == 'user_topic':
        plot_title = f"{plot_var} per user-topic mapping - {model_names_readable[model_name]}"
        single_training_df = generate_user_topic_stats_df(df[df['repeated_training'] == 0], num_clusters, name="User cluster count, single training")
        repeated_training_df = generate_user_topic_stats_df(df[df['repeated_training'] == 1], num_clusters, name="User cluster count, repeated training")
        plot_legend = dict(yanchor="top", y=0.99, xanchor="right", x=0.99, font=dict(size= 11))
    # Environment variable to plot
    print(model_names_readable[model_name])
    env_var = env_vars[plot_var]
    
    plot_df = pd.concat(
        [   single_training_df[env_var],
            repeated_training_df[env_var],],
        axis=1,
        keys=["Single training", "Repeated training"])

    # Create a figure with the right layout
    fig = go.Figure(layout=go.Layout(height=600,
                                     width=1000,
                                     barmode="relative",
                                     yaxis_showticklabels=False,
                                     yaxis_showgrid=False,
                                     yaxis_range=[0, plot_df.groupby(axis=1, level=0).sum().max().max() * 1.25],
                                     # Secondary y-axis overlayed on the primary one and not visible
                                     yaxis2=go.layout.YAxis(visible=False,
                                                            matches="y",
                                                            overlaying="y",
                                                            anchor="x",),
                                     font=dict(size=24),
                                     legend_x=0,
                                     legend_y=1,
                                     legend_orientation="h",
                                     hovermode="x",
                                     # margin=dict(b=0,t=10,l=0,r=10)
                                    ))

    # Add the traces
    for i, t in enumerate(colors):
        for j, col in enumerate(plot_df[t].columns):
            if (plot_df[t][col] == 0).all():
                continue
            fig.add_bar(
                x=plot_df.index,
                y=plot_df[t][col],
                # Set the right yaxis depending on the selected product (from enumerate)
                yaxis=f"y{i + 1}",
                # Offset the bar trace, offset needs to match the width
                # The values here are in milliseconds, 1billion ms is ~1/3 month
                offsetgroup=str(i),
                offset=(i - 1) * 1/2,
                width=1/2,
                legendgroup=t,
                legendgrouptitle_text=t,
                name=col,
                marker_color=colors[t][col],
                text=plot_df[t][col],
                marker_line=dict(width=2, color="#333"),
                hovertemplate="%{y}<extra></extra>"
            )
            
    # print(plot_title)
    fig.update_layout(
        height=275,
        width=650,
        uniformtext_minsize=8, uniformtext_mode='hide',
        title=dict(text=plot_title, font=dict(size=18)),
        # title=None,
        xaxis_title=dict(text="Cluster ID", font=dict(size=15)),
        yaxis_title=dict(text=plot_var, font=dict(size=17)),
        # legend_title="Cluster assignment before v. after ",
        xaxis = dict(ticktext=index, tick0=0, dtick=1, tickfont=dict(size=15)),
        legend=plot_legend,
        legend_grouptitlefont=dict(size=14),
        # margin=dict(l=20, r=10, t=10, b=20),
        margin=dict(l=20, r=40, t=40, b=20),
    )
    # fig.show()
    return fig

    
# Define some colors for the product, revenue pairs
colors = {
    "Single training": {
        "No. initial users": "#F28F1D",
        "No. final users": "#F6C619",
        "Initial mean distance": "#F28F1D",
        "Final mean distance": "#F6C619",
        "Initial SD": "#F28F1D",
        "Final SD": "#F6C619",
    },
    "Repeated training": {
        "No. initial users": "#2B6045",
        "No. final users": "#5EB88A",
        "Initial mean distance": "#2B6045",
        "Final mean distance": "#5EB88A",
        "Initial SD": "#2B6045",
        "Final SD": "#5EB88A",
    },
}

env_vars = {
        'No. users':['No. initial users', 'No. final users'],
        'Mean distance from centroid':['Initial mean distance', 'Final mean distance'],
        'SD':['Initial SD', 'Final SD']
    }    

In [85]:
model = model_keys[8]
print(model)

path = f"figures"
fig_path = f"{path}/{model}"

if not os.path.exists(path):
    os.mkdir(path)
if not os.path.exists(fig_path):
    os.mkdir(fig_path)

xquad_smooth_0.25


# Difference between user cluster assignment initial v. user cluster assignment final

In [86]:
# Defining user cluster df
"""
Difference between user cluster assignment initial v. user cluster assignment final
"""

print(model)

training_types = ['single_training', 'repeated_training']

df_user_clusters = pd.DataFrame()

col_names = [f"t_{i+1}" for i in range(num_timesteps)]

for training in training_types:
    model_env = dict([(k, environments[training][k][model][0]) for k in env_keys])
    user_dist = results[training]['user_distance_from_cluster_centroid'][model][0]
    df_user_clust_dist = pd.DataFrame.from_dict(dict(zip(col_names, user_dist)))
    df_user_clust_dist['user_id'] = np.arange(num_users)
    if training == 'single_training':
        df_user_clust_dist['repeated_training'] = np.full(num_users, 0)
    else:
        df_user_clust_dist['repeated_training'] = np.full(num_users, 1)
    df_user_clust_dist['initial_cluster'] = model_env['user_cluster_assignments']
    df_user_clust_dist['final_cluster']  = user_topic_mapping_func(model_env['actual_user_representation_final'], model_env['user_cluster_centroids'])
    
    df_user_clusters = pd.concat([df_user_clusters, df_user_clust_dist], axis=0)

xquad_smooth_0.25


In [88]:
res = plot_bar(df_user_clusters, 'user_cluster', model, plot_var = 'No. users', num_clusters=15)
res.show()
file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_cluster_assignments/bar_plots/{model}_user_cluster_assignments.png"
print(file_path)
# res.write_image(file_path)

user_cluster
xquad_smooth_0.25
Smooth XquAD, α=0.25


/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_cluster_assignments/bar_plots/xquad_smooth_0.25_user_cluster_assignments.png


In [51]:
res = plot_violin(df_user_clusters, 'user_cluster', model, 0)
res.show()
file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_cluster_assignments/violin_plots/single_training_{model}_user_cluster_assignments.png"
print(file_path)
# res.write_image(file_path)

user_cluster
xquad_smooth_0.25
Smooth XquAD, α=0.25


/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_cluster_assignments/violin_plots/single_training_xquad_smooth_0.25_user_cluster_assignments.png


In [18]:
res = plot_violin(df_user_clusters, 'user_cluster', model, 1)
res.show()

file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_cluster_assignments/violin_plots/repeated_training_{model}_user_cluster_assignments.png"
print(file_path)
# res.write_image(file_path)

user_cluster
xquad_smooth_0.25
Smooth XquAD, α=0.25


/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_cluster_assignments/violin_plots/repeated_training_xquad_smooth_0.25_user_cluster_assignments.png


In [19]:
# len("distance from user embedding to cluster centroid - random interleaving, repeated training")

In [20]:
# generate_user_cluster_stats_df(df_user_clusters[df_user_clusters['repeated_training'] == 0], num_clusters, name="User cluster count, single training")

# Difference between user-topic mapping initial v. user-topic mapping final

In [21]:
# Defining user-topic mapping df
"""
# Difference between user-topic mapping initial v. user-topic mapping final
"""
print(model)

training_types = ['single_training', 'repeated_training']

curr_vars = ['actual_user_representation_initial', 'user_item_cluster_mapping', 'item_cluster_centroids', 'actual_user_representation_final']

df_topic_mapping = pd.DataFrame()

for training in training_types:
    model_env = dict([(k, environments[training][k][model][0]) for k in curr_vars])
    
    assert(np.array_equal(model_env['user_item_cluster_mapping'], user_topic_mapping_func(model_env['actual_user_representation_initial'], model_env['item_cluster_centroids'])))
    
    data = {
        'user_id': np.arange(num_users),
        'initial_topic_cluster': model_env['user_item_cluster_mapping'],
        'initial_topic_dist': user_topic_distance(model_env['actual_user_representation_initial'], model_env['item_cluster_centroids']),
        'final_topic_cluster': user_topic_mapping_func(model_env['actual_user_representation_final'], model_env['item_cluster_centroids']),
        'final_topic_dist': user_topic_distance(model_env['actual_user_representation_initial'], model_env['item_cluster_centroids'])
    }
    if training == 'single_training':
        data['repeated_training'] = np.full(num_users, 0)
    else:
        data['repeated_training'] = np.full(num_users, 1)
        
    df_data = pd.DataFrame(data)
    
    df_topic_mapping = pd.concat([df_topic_mapping, df_data], axis=0)    

xquad_smooth_0.25


In [22]:
res = plot_bar(df_topic_mapping, 'user_topic', model, plot_var = 'No. users', num_clusters=15)
res.show()

file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/bar_plots/{model}_user_topic_mapping.png"
print(file_path)
# res.write_image(file_path)

user_topic
xquad_smooth_0.25
Smooth XquAD, α=0.25


/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/bar_plots/xquad_smooth_0.25_user_topic_mapping.png


In [23]:
# res = plot_violin(df_topic_mapping, 'user_topic', model_names_readable[model], -1)
# res.show()


# file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/violin_plots/{model}_user_topic_mapping.png"
# print(file_path)
# res.write_image(file_path)

In [24]:
res = plot_violin(df_topic_mapping, 'user_topic', model, 0)
res.show()

file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/violin_plots/single_training_{model}_user_topic_mapping.png"
print(file_path)
# res.write_image(file_path)

user_topic
xquad_smooth_0.25
Smooth XquAD, α=0.25


/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/violin_plots/single_training_xquad_smooth_0.25_user_topic_mapping.png


In [25]:
res = plot_violin(df_topic_mapping, 'user_topic', model, 1)
res.show()

file_path = f"/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/violin_plots/repeated_training_{model}_user_topic_mapping.png"
print(file_path)
# res.write_image(file_path)

user_topic
xquad_smooth_0.25
Smooth XquAD, α=0.25


/Users/madisonthantu/Desktop/DREAM/T-RECS-RS-research/Figures/user_topic_mapping/violin_plots/repeated_training_xquad_smooth_0.25_user_topic_mapping.png


In [90]:
index = [f"{i}" for i in range(num_clusters)]

single_training_df = generate_user_topic_stats_df(df_topic_mapping[df_topic_mapping['repeated_training'] == 0], num_clusters, name="User cluster count, single training")
repeated_training_df = generate_user_topic_stats_df(df_topic_mapping[df_topic_mapping['repeated_training'] == 1], num_clusters, name="User cluster count, repeated training")
# single_training_df
# # df_topic_mapping

# env_vars = list(single_training_df.columns)
env_vars = {
    'No. users':['No. initial users', 'No. final users'],
    'Mean distance from centroid':['Initial mean distance', 'Final mean distance'],
    'SD':['Initial SD', 'Final SD']
}
# Environment variable to plot
plot_var = 'No. users'
env_var = env_vars[plot_var]

df_user_topic_dist = pd.concat(
    [
        single_training_df[env_var],
        repeated_training_df[env_var],
    ],
    axis=1,
    keys=["Single training", "Repeated training"]
)
df_user_topic_dist


Unnamed: 0_level_0,Single training,Single training,Repeated training,Repeated training
Unnamed: 0_level_1,No. initial users,No. final users,No. initial users,No. final users
Cluster ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0.0,5.0,0.0,2.0
1,903.0,819.0,903.0,827.0
2,0.0,0.0,0.0,0.0
3,0.0,2.0,0.0,0.0
4,0.0,2.0,0.0,5.0
5,0.0,7.0,0.0,8.0
6,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,1.0
9,18.0,25.0,18.0,23.0


In [40]:
dfs = []
for training_type in environments.keys():
    user_cluster_rep = pd.DataFrame(columns=['No. Initial', 'No. Final'])
    for model in model_keys:
        curr_model = dict(zip(env_keys, [environments[training_type][env_k][model][0] for env_k in env_keys]))
        data = {
            'No. Initial': np.unique(curr_model['user_cluster_assignments']).size,
            'No. Final': np.unique(user_topic_mapping_func(curr_model['actual_user_representation_final'], curr_model['user_cluster_centroids'])).size,
        }
        user_cluster_rep = user_cluster_rep.append(pd.DataFrame(data, index=[model_names_readable[model]]))
    dfs.append(user_cluster_rep)
    
user_cluster_rep = pd.concat([dfs[0],
                            dfs[1],],
                           axis=1,
                           keys=["Single training", "Repeated training"])
user_cluster_rep

Unnamed: 0_level_0,Single training,Single training,Repeated training,Repeated training
Unnamed: 0_level_1,No. Initial,No. Final,No. Initial,No. Final
Myopic,15,7,15,8
Repeatable,15,9,15,9
Probabilistic,15,9,15,6
Random,15,6,15,5
Random Interleaving,15,5,15,6
"Binary XquAD, α=0.1",15,7,15,6
"Binary XquAD, α=0.25",15,7,15,6
"Smooth XquAD, α=0.1",15,6,15,7
"Smooth XquAD, α=0.25",15,6,15,7


In [41]:
colors = {
    "Single training": {
        "No. Final": "#F28F1D",
        "No. Initial": "#F6C619",
    },
    "Repeated training": {
        "No. Final": "#2B6045",
        "No. Initial": "#5EB88A",
    },
}

# Create a figure with the right layout
fig = go.Figure(layout=go.Layout(height=600,
                                    width=1000,
                                    barmode="relative",
                                    yaxis_showticklabels=False,
                                    yaxis_showgrid=False,
                                    yaxis_range=[0, user_cluster_rep.groupby(axis=1, level=0).sum().max().max() * 1.25],
                                    # Secondary y-axis overlayed on the primary one and not visible
                                    yaxis2=go.layout.YAxis(visible=False,
                                                        matches="y",
                                                        overlaying="y",
                                                        anchor="x",),
                                    font=dict(size=24),
                                    legend_x=0,
                                    legend_y=1,
                                    legend_orientation="h",
                                    hovermode="x",
                                    # margin=dict(b=0,t=10,l=0,r=10)
                                ))

# Add the traces
for i, t in enumerate(colors):
    for j, col in enumerate(['No. Final']):
        if (user_cluster_rep[t][col] == 0).all():
            continue
        fig.add_bar(
            x=user_cluster_rep.index,
            y=user_cluster_rep[t][col],
            # Set the right yaxis depending on the selected product (from enumerate)
            yaxis=f"y{i + 1}",
            # Offset the bar trace, offset needs to match the width
            # The values here are in milliseconds, 1billion ms is ~1/3 month
            offsetgroup=str(i),
            offset=(i - 1) * 1/2,
            width=1/2,
            legendgroup=t,
            legendgrouptitle_text=t,
            name=col,
            marker_color=colors[t][col],
            text=user_cluster_rep[t][col],
            marker_line=dict(width=2, color="#333"),
            hovertemplate="%{y}<extra></extra>"
        )
        
fig.update_layout(
    uniformtext_minsize=18, #uniformtext_mode='hide',
    title=dict(text=f"No. Initial User Clusters v. No. Final User Clusters",font=dict(size=20)),
    xaxis_title=dict(text="Model",font=(dict(size=15))),
    yaxis_title=dict(text="No. User Clusters",font=(dict(size=15))),
    font=dict(size=12),
    xaxis = dict(ticktext=index, tick0=0, dtick=1, tickangle=-45),
    # yaxis = dict(showticklabels = True),
    legend=dict(yanchor="top", y=0.99, xanchor="right", x=0.99),
    margin=dict(l=75, r=60, t=75, b=60),
)

fig.show()

0 No. Final
0 No. Final
