In [None]:
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('../data/clean/df_join_clean.csv')

In [None]:
# df_filtered = df[df["age_group"] == "Adults"]

# Total visits in each group
total_visits = df.groupby('Variation')['visit_id'].nunique()
total_visits_ya = df[df["age_group"] == "Young Adult"].groupby('Variation')['visit_id'].nunique()
total_visits_e = df[df["age_group"] == "Elderly"].groupby('Variation')['visit_id'].nunique()
print("Total Visits: ")
display(total_visits, total_visits_ya, total_visits_e)

# Steps for unique visits
unique_counts = df.groupby(['Variation', 'process_step'])['visit_id'].nunique().reset_index()
unique_counts_ya = df[df["age_group"] == "Young Adult"].groupby(['Variation', 'process_step'])['visit_id'].nunique().reset_index()
unique_counts_e = df[df["age_group"] == "Elderly"].groupby(['Variation', 'process_step'])['visit_id'].nunique().reset_index()

funnel = unique_counts.pivot(
    index='Variation',    
    columns='process_step',     
    values='visit_id'
)

funnel_ya = unique_counts_ya.pivot(
    index='Variation',    
    columns='process_step',     
    values='visit_id'
)

funnel_e = unique_counts_e.pivot(
    index='Variation',    
    columns='process_step',     
    values='visit_id'
)

print("\n\nFunnel Conversion Totals:")
display(funnel)


In [None]:

conversion_rates = funnel.apply(lambda x: (x / total_visits) * 100, axis=0)
print("\n\nFunnel Conversion Rates:")
display(conversion_rates.round(2))

# abandonment rates
abandonment_rates = funnel.apply(lambda x: 100 - ((x / total_visits) * 100), axis=0)
# Add ratio row (Test/Control)
abandonment_rates.loc['Ratio'] = abandonment_rates.loc['Test'] / abandonment_rates.loc['Control']
print("\n\nFunnel Abandonment Rates:")
display(abandonment_rates.round(2))


In [None]:

conversion_rates_ya = funnel_ya.apply(lambda x: (x / total_visits_ya) * 100, axis=0)
print("\n\nFunnel Conversion Rates:")
display(conversion_rates_ya.round(2))


# abandonment rates Young Adults
abandonment_rates_ya = funnel_ya.apply(lambda x: 100 - ((x / total_visits_ya) * 100), axis=0)
# Add ratio row (Test/Control)
abandonment_rates_ya.loc['Ratio'] = abandonment_rates_ya.loc['Test'] / abandonment_rates_ya.loc['Control']
print("\n\nFunnel Abandonment Rates Young Adults:")
display(abandonment_rates_ya.round(2))


In [None]:


conversion_rates_e = funnel_e.apply(lambda x: (x / total_visits_e) * 100, axis=0)
print("\n\nFunnel Conversion Rates:")
display(conversion_rates_e.round(2))

# abandonment rates Elderly
abandonment_rates_e = funnel_e.apply(lambda x: 100 - ((x / total_visits_e) * 100), axis=0)
# Add ratio row (Test/Control)
abandonment_rates_e.loc['Ratio'] = abandonment_rates_e.loc['Test'] / abandonment_rates_e.loc['Control']
print("\n\nFunnel Abandonment Rates Elderly:")
display(abandonment_rates_e.round(2))


In [None]:

# Chart

plt.figure(figsize=(12, 6))
bar_width = 0.35
steps = range(len(conversion_rates.columns))

plt.bar(steps, conversion_rates.loc['Test'], bar_width, label='Test')
plt.bar([x + bar_width for x in steps], conversion_rates.loc['Control'], 
        bar_width, label='Control')

plt.xlabel('Process Step')
plt.ylabel('Conversion Rate (% of total visits)')
plt.title('Funnel Conversion Rates by Group')
plt.xticks([x + bar_width/2 for x in steps], [f'Step {x}' for x in steps])
plt.legend()
plt.show()


In [None]:
# At which step there is abandonment OR Overall

In [None]:
#Set the hypothesis

#H0: mu step0 = mu step1 = mu step2 = mu step3 = mu step4
#H1: mu step0 != mu step1 != mu step2 != mu step3 != mu step4

#Lets choose significance level of 10%
alpha = 0.10

st.f_oneway(funnel[0], funnel[1], funnel[2], funnel[3], funnel[4])

In [None]:
print("Abandonment Ratios Young Adults")
display(abandonment_rates_ya.loc['Ratio'])

print("\nAbandonment Ratios Elderly")
display(abandonment_rates_e.loc['Ratio'])

In [None]:
#Set the hypothesis
#In proportion, the elderly has lower conversion rate in the test group than in the control group
#Abandonment rate for the Elderly is higher in the test group than in the control group compared to Young Adults.

#H0: mu conversion = mu step1 = mu step2 = mu step3 = mu step4
#H1: mu step0 != mu step1 != mu step2 != mu step3 != mu step4

#Lets choose significance level of 10%
alpha = 0.10