In [1]:
# Import the pandas library
import pandas as pd
import plotly.express as px
import numpy as np
from sklearn.preprocessing import LabelEncoder
import plotly.graph_objects as go

# Read the Excel file
# Replace 'filename.xlsx' with the path to your Excel file
df = pd.read_excel('result.xlsx', usecols=range(1, 10))
df['cluster'] = df['cluster'].astype(str)


In [2]:
from sklearn.preprocessing import LabelEncoder

def transform_dataframe(df):
    # Create a label encoder object
    le = LabelEncoder()

    # Create new columns for encoded values
    df['Event name encoded'] = le.fit_transform(df['Event name'].astype(str))
    df['Interest1 encoded'] = le.fit_transform(df['Interest1'].astype(str))
    df['Host name encoded'] = le.fit_transform(df['Host name'].astype(str))
    df['User age bracket encoded'] = le.fit_transform(df['User age bracket'].astype(str))
    df['concatenated'] = df['Event name encoded'].astype(str) + df['Interest1 encoded'].astype(str) + df['Host name encoded'].astype(str) + df['User age bracket encoded'].astype(str) + df['cluster'].astype(str)
    
    df_labels = df[['Event name','Interest1', 'Host name', 'User age bracket', 'cluster', 'concatenated']]
    df_labels = df_labels.drop_duplicates()
    
    df_new = df.groupby(['concatenated'])[['Active users', 'Engaged sessions', 'Sessions', 'New users']].sum()
    df_new = df_new.reset_index()
    df_new['concatenated'] = df_new['concatenated'].astype(str)
    df_new = df_new.merge(df_labels, on='concatenated', how='left')
    
    return df_new

In [3]:
df = transform_dataframe(df)

def create_figures(df, cluster):
    df = df[df['cluster'] == cluster]

    columns_to_groupby = ['Interest1', 'Event name', 'Host name', 'User age bracket', 'cluster']

    dataframes = {}

    for column in columns_to_groupby:
        grouped_df = df.groupby(column)[['Active users', 'Engaged sessions', 'Sessions', 'New users']].sum().reset_index()
        # Normalize the float columns and multiply by 100
        for float_column in grouped_df.select_dtypes(include=['float64']).columns:
            grouped_df[float_column] = (grouped_df[float_column] / grouped_df[float_column].sum() * 100).round(2)
        dataframes[column] = grouped_df

    # Create a dictionary to store the figures
    figs = {}

    # Loop over the columns_to_groupby and create a bar plot for each one
    for column in columns_to_groupby:
        interest_df = dataframes[column]
        float_columns = interest_df.select_dtypes(include=['float64']).columns

        for float_column in float_columns:
            # Filter out rows where the float_column is 0
            interest_df = interest_df[interest_df[float_column] > 0]
            figs[f'{float_column} by {column}'] = px.bar(interest_df, x=column, y=float_column, title=f'<b>{float_column} by {column}</b>', color=float_column, color_continuous_scale="Bluered_r")
            # Update hovertemplate to show rounded values
            figs[f'{float_column} by {column}'].update_traces(hovertemplate='%%{y:.2f}')

    return figs

In [4]:
clusters = df['cluster'].unique()

figs_by_cluster = {}

for cluster in clusters:
    figs_by_cluster[cluster] = create_figures(df, cluster)

figs_cluster0 = figs_by_cluster['0.0']
figs_cluster1 = figs_by_cluster['1.0']
figs_cluster2 = figs_by_cluster['2.0']
figs_cluster3 = figs_by_cluster['3.0']

In [5]:
figs = {}
for i, (fig_name, fig) in enumerate(figs_cluster0.items()):
    #print(f"Showing figure: {fig_name} for cluster: 0.0")
    figs[f'fig{i}'] = fig

figs1 = {}
for i, (fig_name, fig) in enumerate(figs_cluster1.items()):
    #print(f"Showing figure: {fig_name} for cluster: 1.0")
    figs1[f'fig{i}'] = fig

figs2 = {}
for i, (fig_name, fig) in enumerate(figs_cluster2.items()):
    #print(f"Showing figure: {fig_name} for cluster: 2.0")
    figs2[f'fig{i}'] = fig

figs3 = {}
for i, (fig_name, fig) in enumerate(figs_cluster3.items()):
    #print(f"Showing figure: {fig_name} for cluster: 3.0")
    figs3[f'fig{i}'] = fig

In [6]:

fig = go.Figure(figs2['fig0'])
fig.show()

In [7]:
fig = go.Figure(figs3['fig0'])
fig.show()

In [8]:
fig = go.Figure(figs3['fig15'])
fig.show()

In [9]:
fig = go.Figure(figs['fig9'])
fig.show()

In [10]:
import pickle

# Save the figures
with open('figs.pkl', 'wb') as f:
    pickle.dump(figs, f)

with open('figs1.pkl', 'wb') as f:
    pickle.dump(figs1, f)

with open('figs2.pkl', 'wb') as f:
    pickle.dump(figs2, f)

with open('figs3.pkl', 'wb') as f:
    pickle.dump(figs3, f)

In [11]:
keys_to_exclude = {'fig16', 'fig17', 'fig18', 'fig19'}

with open('figs.pkl', 'rb') as f:
    figs = {k: v for k, v in pickle.load(f).items() if k not in keys_to_exclude}

with open('figs1.pkl', 'rb') as f:
    figs1 = {k: v for k, v in pickle.load(f).items() if k not in keys_to_exclude}

with open('figs2.pkl', 'rb') as f:
    figs2 = {k: v for k, v in pickle.load(f).items() if k not in keys_to_exclude}

with open('figs3.pkl', 'rb') as f:
    figs3 = {k: v for k, v in pickle.load(f).items() if k not in keys_to_exclude}

In [12]:
print("Keys in figs: ", figs.keys())
print("Keys in figs1: ", figs1.keys())
print("Keys in figs2: ", figs2.keys())
print("Keys in figs3: ", figs3.keys())

Keys in figs:  dict_keys(['fig0', 'fig1', 'fig2', 'fig3', 'fig4', 'fig5', 'fig6', 'fig7', 'fig8', 'fig9', 'fig10', 'fig11', 'fig12', 'fig13', 'fig14', 'fig15'])
Keys in figs1:  dict_keys(['fig0', 'fig1', 'fig2', 'fig3', 'fig4', 'fig5', 'fig6', 'fig7', 'fig8', 'fig9', 'fig10', 'fig11', 'fig12', 'fig13', 'fig14', 'fig15'])
Keys in figs2:  dict_keys(['fig0', 'fig1', 'fig2', 'fig3', 'fig4', 'fig5', 'fig6', 'fig7', 'fig8', 'fig9', 'fig10', 'fig11', 'fig12', 'fig13', 'fig14', 'fig15'])
Keys in figs3:  dict_keys(['fig0', 'fig1', 'fig2', 'fig3', 'fig4', 'fig5', 'fig6', 'fig7', 'fig8', 'fig9', 'fig10', 'fig11', 'fig12', 'fig13', 'fig14', 'fig15'])
