In [None]:
import pandas as pd
import numpy as np
import re
import plotly.graph_objects as go

# Read data
df = pd.read_csv(r"c:\\users\\hong\\downloads\\data_interview.csv")

# Convert 'quantity' to numeric and handling errors, even with 'level'
df['quantity'] = pd.to_numeric(df['quantity'], errors='coerce')
df['level'] = pd.to_numeric(df['level'], errors='coerce')
df.fillna(0, inplace=True)

# Level Phases
lst = ['1-10', '11-25', '26-40', '41-70', '71-100', '101-200', '300-500']

# Function to replace our ranges
def replace_range_by_ints(m):
  a, b = map(int, m.groups())
  return ','.join(int(i) for i in range(a, b + 1))

# Pattern to split the string between '-' in each
pattern = re.compile('(\d+)\-(\d+)')

df['level'] = df['level'].replace(pattern, replace_range_by_ints, regex=True)

# Filter data for 'game_end' events
game_end_data = df[df['event_name'] == 'game_end']

# Filter more which users finished the game by 'game_end' & following the different versions -> then summ all
game_end_160_agg = game_end_data[game_end_data['version'] == '1.6.0'].groupby('level')['quantity'].sum().fillna(0)
game_end_152_agg = game_end_data[game_end_data['version'] == '1.5.2'].groupby('level')['quantity'].sum().fillna(0)

# Ensure all levels are represented
for phase in lst:
  if phase not in game_end_160_agg.index:
    game_end_160_agg[phase] = phase
  if phase not in game_end_152_agg.index:
    game_end_152_agg[phase] = phase

# Sorting the aggregated data by phases list (optional)
game_end_160_data_sorted = game_end_160_agg.loc[lst].to_numpy()
game_end_152_data_sorted = game_end_152_agg.loc[lst].to_numpy()

# Create traces for each version's data
trace1 = go.Bar(
    x=lst,
    y=game_end_160_agg,
    name='Version 1.6.0',
    marker_color='#4D7DAB'
                    )

trace2 = go.Bar(
    x=lst,
    y=game_end_152_agg,
    name='Version 1.5.2',
    marker_color='#41a4ff'
                )

# Create the chart layout
layout = go.Layout(
    title="Users Experiences Between 2 Versions",
    xaxis_title="Level Phase",
    yaxis_title="Total Time Playing by Users",
    xaxis=dict(
        tickvals=np.arange(len(lst)),   # Positions for labels
        ticktext=lst                    # List of labels for each position
                    ),
    barmode='group'  # Stack bars for comparison
                        )

# Create the figure with traces and layout
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Display the chart
fig.show()
