In [None]:
import numpy as np
import pandas as pd
import scipy.stats as st
from statsmodels.stats.proportion import proportions_ztest

pd.options.mode.copy_on_write = True

# loading all datasets
url_client_profiles = 'data/df_final_demo.txt'
url_digital_footprints1 = 'data/df_final_web_data_pt_1.txt'
url_digital_footprints2 = 'data/df_final_web_data_pt_2.txt'
url_experiment_roster = 'data/df_final_experiment_clients.txt'

df1 = pd.read_csv(url_digital_footprints1)
df2 = pd.read_csv(url_digital_footprints2)

# imported dataframes to work with
df_client_profiles = pd.read_csv(url_client_profiles)
df_exp_roster = pd.read_csv(url_experiment_roster)
# merged footprint files
df_footprints = pd.concat([df1, df2])

In [None]:
# cleaning the datasets
df_client_profiles.rename(columns={'clnt_tenure_yr': 'client_tenure_years', 'clnt_tenure_mnth': 'client_tenure_months', 'clnt_age': 'client_age', 'gendr': 'gender', 'num_accts': 'num_accounts', 'bal': 'balance', 'calls_6_mnth': 'calls_6months', 'logons_6_mnth': 'logins_6months'}, inplace=True)
df_client_profiles_cleaned = df_client_profiles.dropna(subset=["client_tenure_years", "client_tenure_months", "client_age", "gender", "num_accounts", "balance", "calls_6months", "logins_6months"], how="all")
df_exp_roster.rename(columns={'Variation': 'variation'}, inplace=True)
df_exp_roster_cleaned = df_exp_roster.dropna(subset=["variation"], how="all")
df_footprints_cleaned = df_footprints.dropna(subset=["client_id", "visitor_id", "visit_id", "process_step", "date_time"], how="all")
df_footprints_cleaned = df_footprints_cleaned.drop_duplicates(subset=['client_id', 'visit_id', 'date_time'])
df_client_profiles_cleaned['gender'] = df_client_profiles_cleaned['gender'].apply(lambda x: 'U' if x == 'X' else x)
df_client_profiles_cleaned['gender'] = df_client_profiles_cleaned['gender'].fillna('U')

In [None]:
# joining footprints and experiment roster datasets
if not isinstance(locals().get('df_sorted'), pd.DataFrame):
    df_footprints_cleaned.set_index('client_id', inplace=True)
    df_exp_roster_cleaned.set_index('client_id', inplace=True)
    joined_df = df_footprints_cleaned.join(df_exp_roster_cleaned, how='inner')
    joined_df.reset_index(drop=False, inplace=True)
    joined_df['date_time'] = pd.to_datetime(joined_df['date_time'])
    
df_sorted = joined_df.sort_values(by=['variation', 'visit_id', 'date_time'])
df_first_confirm = df_sorted[df_sorted["process_step"] == "confirm"].drop_duplicates(subset="visit_id", keep="first")
df_no_confirms = df_sorted[df_sorted["process_step"] != "confirm"]
df_sorted = pd.concat([df_no_confirms, df_first_confirm]).sort_values(by=['variation', 'visit_id', 'date_time']).reset_index(drop=True)

In [None]:
# Ensure 'date_time' is in datetime format
df_sorted['date_time'] = pd.to_datetime(df_sorted['date_time'])

# Sort by visit_id and date_time to ensure correct time difference calculation
df_sorted = df_sorted.sort_values(by=['visit_id', 'date_time'])

# Shift the 'date_time' column to get the next timestamp within each visit_id group
df_sorted['next_date_time'] = df_sorted.groupby('visit_id')['date_time'].shift(-1)

# Calculate time spent on each step (difference between the next timestamp and the current one)
df_sorted['time_spent'] = (df_sorted['next_date_time'] - df_sorted['date_time']).dt.total_seconds()

# Drop the temporary 'next_date_time' column as it's no longer needed
df_sorted.drop(columns=['next_date_time'], inplace=True)

# set 0 for NaN values (last steps)
df_sorted['time_spent'] = df_sorted['time_spent'].fillna(0)

# Compute the average time spent per process_step
average_time_per_step = df_sorted.groupby('process_step')['time_spent'].mean()

In [None]:
##### Calculating Completion Rate per Group + Performing Hypothesis Testing #####

# Create seperate dfs for treatment and control group to pass/ calculate the completion rate kpi 
df_test = joined_df[joined_df["variation"] == "Test"]
df_control = joined_df[joined_df["variation"] == "Control"]

# Count total unique visits per group
n_control = df_control['visit_id'].nunique()
n_test = df_test['visit_id'].nunique()

# Count unique visits that reached the "Confirm" step per group
completed_control = df_control[df_control['process_step'] == 'confirm']['visit_id'].nunique()
completed_test = df_test[df_test['process_step'] == 'confirm']['visit_id'].nunique()

# Calculate completion rates
comp_rate_control = completed_control / n_control
comp_rate_test = completed_test / n_test

# Print completion rates
print(f"The completion rate for the control group (old version) is: {comp_rate_control:.4f}")
print(f"The completion rate for the test group (new version) is: {comp_rate_test:.4f}")

# Perform Z-test for proportions
count = np.array([completed_test, completed_control ])  # Successes in each group
nobs = np.array([n_test, n_control])  # Total observations in each group

z_stat, p_value = proportions_ztest(count, nobs, alternative='larger')

# Print results
print(f"Z-statistic: {z_stat:.4f}")
print(f"P-value: {p_value:.4f}")

# Interpret the result
alpha = 0.05  # Significance level
if p_value < alpha:
    print("Reject the null hypothesis: The new version has a significantly higher completion rate.")
else:
    print("Fail to reject the null hypothesis: No significant difference in completion rates.")

# Claculating: Completion Rate with a Cost-Effectiveness Threshold
threshold = 0.05  # 5% increase required
observed_increase = comp_rate_test - comp_rate_control

# Check if the observed increase meets the threshold
if observed_increase >= threshold:
    print(f"The observed increase in completion rate is {observed_increase:.4f}, which meets or exceeds the required 5% threshold.")
    if p_value < 0.05:
        print("Additionally, the increase is statistically significant. The new design is both effective and justifiable from a cost perspective.")
    else:
        print("However, the increase is not statistically significant. Further analysis may be needed before making a final decision.")
else:
    print(f"The observed increase in completion rate is {observed_increase:.4f}, which is below the required 5% threshold.")
    print("The new design does not meet the cost-effectiveness criterion and may not justify the associated costs.")

In [None]:
possible_previous_steps = {
    'start': None,
    'step_1': 'start',
    'step_2': 'step_1',
    'step_3': 'step_2',
    'confirm': 'step_3'
}

def mark_errors(df, possible_previous_steps):
    df = df.copy()  # Avoid modifying original DataFrame
    
    # Sort data to ensure correct order
    df = df.sort_values(by=['visitor_id', 'visit_id', 'date_time'])
    
    # Shift previous step within each visit_id
    df['previous_step'] = df.groupby(['visit_id'])['process_step'].shift(1)

    # Define error condition
    df['error'] = (df['previous_step'] != df['process_step'].map(possible_previous_steps)) & (df['previous_step'].notna() & (df['previous_step'] != df['process_step']))

    return df.drop(columns=['previous_step'])

df_with_errors = mark_errors(df_sorted, possible_previous_steps)

# count errors and visits per variation
error_counts = df_with_errors.groupby('variation')['error'].sum()
visit_counts = df_with_errors.groupby('variation')['visit_id'].nunique()

# extract values safely
error_control, error_test = error_counts.get('Control', 0), error_counts.get('Test', 0)
visit_control, visit_test = visit_counts.get('Control', 1), visit_counts.get('Test', 1)

# compute error rates
error_rate_control = error_control / visit_control
error_rate_test = error_test / visit_test

In [None]:
# Define counts of errors and total visits for both groups
errors = [error_control, error_test]
visits = [visit_control, visit_test]

# Hypothesis Testing
# H0: The error rates for test group and control group are equal
# H1: The error rate for test group is different (or higher) than the control group

alpha = 0.05
# Perform Z-test for proportions
z_stat, p_value = proportions_ztest(errors, visits)

print(f'Z_Stat: {z_stat}')
print(f'P_value: {p_value}')

The p-value is 0.0, which is far below the common significance threshold of 0.05. This means we  reject the null hypothesis that the error rates for the test group and control group are equal.

Since the test group's error rate is higher than the control group's, this suggests that the variation introduced in the test group significantly increased errors.

Final Conclusion:
The test group has a statistically significant higher error rate compared to the control group. This means the changes applied to the test group negatively impacted user experience by leading to more errors.

In [None]:
df_client_profiles_cleaned.groupby(['gender'])['client_age'].agg(['mean', 'median', 'min', 'max', 'count'])
df_client_profiles_cleaned.groupby(['gender'])['client_tenure_years'].agg(['mean', 'median', 'min', 'max', 'count'])
df_client_profiles_cleaned.groupby(['gender'])['balance'].agg(['mean', 'median', 'min', 'max', 'count'])

# activity score calculation
df_client_profiles_cleaned['activity_score'] = df_client_profiles_cleaned.iloc[:,8] + (df_client_profiles_cleaned.iloc[:,7]*0.5) + (df_client_profiles_cleaned.iloc[:,6]*0.00001)
df_client_profiles_cleaned.sort_values('activity_score', ascending=False)

#login and call scores
high_login_threshold = df_client_profiles_cleaned['logins_6months'].quantile(0.8)
low_login_threshold = df_client_profiles_cleaned['logins_6months'].quantile(0.2)
high_call_threshold = df_client_profiles_cleaned['calls_6months'].quantile(0.8)
low_call_threshold = df_client_profiles_cleaned['calls_6months'].quantile(0.2)

def segment_customer_logins(logins):
    if logins >= high_login_threshold:
        return "Highly Active"
    elif logins <= low_login_threshold:
        return "Inactive"
    else:
        return "Moderate"

def segment_customer_calls(calls):
    if calls >= high_call_threshold:
        return "Highly Active"
    elif calls <= low_call_threshold:
        return "Inactive"
    else:
        return "Moderate"

df_client_profiles_cleaned["login_activity"] = df_client_profiles_cleaned["logins_6months"].apply(segment_customer_logins)
df_client_profiles_cleaned["calls_activity"] = df_client_profiles_cleaned["calls_6months"].apply(segment_customer_calls)

In [None]:
login_activity_counts = df_client_profiles_cleaned["login_activity"].value_counts()
df_client_profiles_cleaned.groupby(["login_activity", "gender"])[["client_age", "balance", "client_tenure_years"]].mean()

In [None]:
calls_activity_counts = df_client_profiles_cleaned["calls_activity"].value_counts()
df_client_profiles_cleaned.groupby(["calls_activity", "gender"])[["client_age", "balance", "client_tenure_years"]].mean()

In [None]:
# adjusting datatypes before exporting to csv
df_client_profiles_cleaned['client_tenure_years'] = df_client_profiles_cleaned['client_tenure_years'].astype(int)
df_client_profiles_cleaned['client_tenure_months'] = df_client_profiles_cleaned['client_tenure_months'].astype(int)
df_client_profiles_cleaned['client_age'] = df_client_profiles_cleaned['client_age'].ffill()
df_client_profiles_cleaned['client_age'] = df_client_profiles_cleaned['client_age'].astype(int)
df_client_profiles_cleaned['num_accounts'] = df_client_profiles_cleaned['num_accounts'].astype(int)
df_client_profiles_cleaned['calls_6months'] = df_client_profiles_cleaned['calls_6months'].astype(int)
df_client_profiles_cleaned['logins_6months'] = df_client_profiles_cleaned['logins_6months'].astype(int)
df_with_errors['time_spent'] = df_with_errors['time_spent'].astype(int)

In [None]:
df_client_profiles_cleaned.to_csv('data/client_profiles.csv', index=False)
df_with_errors.to_csv('data/abtest.csv', index=False)