In [None]:
project/
│
├── app.py                # Main script to run the Streamlit app
├── data_loader.py        # Handles loading data
├── pages/                # Folder containing different pages of the app
│   ├── about.py          # About the project page
│   ├── summary.py        # Data summary page
│   ├── unique_values.py  # Unique values page
│   ├── stats.py          # Basic statistics page
│   ├── demographics.py   # Demographics analysis page
│   ├── duration.py       # Process duration analysis page
│   ├── hypothesis.py     # Hypothesis testing page
│   ├── completion.py     # Completion time analysis page
│   ├── bounce_rate.py    # Bounce rate analysis page
│   ├── error_rate.py     # Error rate analysis page
└── utils_                # Folder for utility functions
    ├── display.py        # Helper functions for displaying data

In [None]:
# data_loader.py
import pandas as pd
import streamlit as st

def load_data(file_path):
    """
    Load and return the data from the given file path.
    Also, calculate any derived columns like `completion` and `age_group`.
    """
    try:
        # Load data from the specified CSV file
        df = pd.read_csv('https://raw.githubusercontent.com/simovaeliska/second_project/refs/heads/main/data/clean/combined_cleaned_data1.csv')
        
        # Ensure 'completion' column is calculated if not present
        if 'completion' not in df.columns:
            df['completion'] = df['process_step'].apply(lambda x: 1 if x in ['confirm', 'completed'] else 0)
        
        # Age categorization (if not already done)
        if 'age_group' not in df.columns:
            bins = [0, 30, 40, 50, 100] 
            labels = ['Under 30', '30-39', '40-49', '50 and above']
            df['age_group'] = pd.cut(df['clnt_age'], bins=bins, labels=labels)

        return df
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return None

In [None]:
# app.py
import streamlit as st
from data_loader import load_data  # Assuming load_data is in the data_loader.py file
from pages import about, summary, unique_values, stats, demographics, hypothesistestcompletionrate, duration, completion, error_rate

def main():
    """
    Main function to run the Streamlit app for A/B Test Demo.
    
    Sets up the page configuration, loads data, and manages navigation
    through different pages of the app.
    """
    st.set_page_config(page_title="A/B Test Demo for Group 7")
    
    # Provide the correct path to the CSV
    data_path = "path_to_your_data.csv"  # Make sure this path is correct

    # Load the data from data_loader
    df = load_data(data_path)

    if df is None:
        st.error("Data could not be loaded. Please check your file path or data source.")
        return

    # Sort the data into control and test groups
    control_group = df[df['variation'] == 'Control']
    test_group = df[df['variation'] == 'Test']

    control_group_sorted = control_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])
    test_group_sorted = test_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])

    st.sidebar.title("Navigation")
    page = st.sidebar.radio("Select a page:", [
        "About the Project", 
        "Data Summary", 
        "Unique Values", 
        "Basic Statistics", 
        "Demographics Analysis", 
        "Hypothesis Testing Completion Rate", 
        "Process Duration Analysis",  
        "Completion Time Analysis",
        "Error Rate Hypothesis Testing"
    ])

    # Handle page navigation and pass the df and sorted groups to the page
    if page == "About the Project":
        about.show_about_project()
    elif page == "Data Summary":
        summary.show_data_summary(df)
    elif page == "Unique Values":
        unique_values.show_unique_values_in_categorical_columns(df)
    elif page == "Basic Statistics":
        stats.show_basic_statistics(df)
    elif page == "Demographics Analysis":
        demographics.show_demographics(df, control_group_sorted, test_group_sorted)
    elif page == "Hypothesis Testing Completion Rate":
        hypothesistestcompletionrate.show_page(df)
    elif page == "Process Duration Analysis":
        duration.show_process_duration(df)
    elif page == "Completion Time Analysis":
        completion.show_completion_time(df)
    elif page == "Error Rate Hypothesis Testing":
        error_rate.show_error_rate_analysis(df)

if __name__ == "__main__":
    main()

In [None]:
# pages/about.py
import streamlit as st
from data_loader import load_data

def show_about_project():
    st.title("About the Project")
    
    st.header("Project Overview by Ceci and Eliska")
    st.write(
        """
        An A/B test was conducted from 3/15/2017 to 6/20/2017 by the Vanguard team to evaluate the impact of a new digital interface.

        **Control Group**: Clients interacted with Vanguard’s traditional online process.  
        **Test Group**: Clients experienced the new, spruced-up digital interface.

        ### Project Timeline:
        - **Day 1 & 2 (Week 5)**  
          EDA & Data Cleaning  
          Client behavior analysis – identifying relations and forming hypotheses.

        - **Day 3 (Week 5)**  
          Performance Metrics  
          Success Indicators  
          Redesign Outcome

        - **Day 4 & 5 (Week 5)**  
          Hypothesis Testing  
          Completion Rate  
          Cost-Effectiveness Threshold  
          Additional Hypothesis Examples  
          Experiment Evaluation  
          Design Effectiveness  
          Duration Assessment  
          Further Data Needs

        - **Day 1 & 2 (Week 6)**  
          Tableau  
          Visualization Tasks

        - **Day 3 & 4 (Week 6)**  
          Further Data Analysis and Presentation Preparation
        """
    )

    # About Vanguard Group
    st.subheader("About Vanguard Group")
    st.write(
        """
        The Vanguard Group is one of the world’s largest investment management companies, founded in 1975 by John C. Bogle. It is renowned for pioneering low-cost index funds, which allow investors to track broad market indices with minimal fees. Vanguard’s investor-owned structure means that the company is owned by the funds it manages, aligning its interests with those of its investors. Today, Vanguard manages trillions of dollars in assets, offering a wide range of investment products including mutual funds, ETFs, and retirement plans, with a focus on long-term, passive investing strategies.
        """
    )

    # New Sub-heading: Digital Challenge and Our Role
    st.subheader("Digital Challenge and Our Role")
    st.write(
        """
        **A/B Test Objective:**  
        The goal of this experiment was to evaluate a new, modernized user interface (UI) designed to improve the online client journey through in-context prompts.

        **Experiment Details:**
        - **Control Group**: Clients used the traditional Vanguard user interface.
        - **Test Group**: Clients experienced the new, intuitive UI with contextual prompts designed to guide them through the online process.

        **Timeline:**  
        The experiment took place from **March 15, 2017 – June 20, 2017**.

        **Process Flow:**  
        Clients followed a sequence of steps starting from the initial page, progressing through three key steps, and finishing at the confirmation page.

        *As part of the CX (Customer Experience) team, our job was to evaluate whether the new UI leads to better user engagement and higher completion rates during online processes.*
        """
    )

    # **Did the new UI lead to higher completion rates?**
    st.write("**Did the new UI lead to higher completion rates?**")
    st.write(
        """
        The A/B test was designed to determine whether the new, intuitive UI with contextual prompts improved user engagement and completion rates compared to the traditional Vanguard user interface. The results from hypothesis testing and analysis will help answer this key question.
        """
    )

    # New Section: What Data We Have Been Working With
    st.subheader("What Data We Have Been Working With")
    st.write(
        """
        The analysis is based on a rich set of data that provides insights into both client demographics and their digital interactions with Vanguard's platform. Below is a summary of the key datasets:

        - **Client Profiles**: This includes demographic and account details such as:
          - **Client's Age**: The age of each client.
          - **Client's Tenure**: How long each client has been with Vanguard, in both years and months.
          - **Client's Balance**: The total balance held across all of a client's accounts with Vanguard.
          - **Number of Accounts**: The number of accounts the client holds with Vanguard.

        - **Digital Footprints**: This data captures the client's online activity at each step in the client journey:
          - **Process Steps**: The sequence of steps each client went through in their digital process (e.g., initial page, steps 1-3, and confirmation page).
          - **Date-Time Logs**: Timestamps for each action taken by the client during their online session, allowing us to track the client’s progression through each step.

        - **Experiment Roster**: This dataset indicates which group each client was assigned to (Control or Test), based on their unique **client_id**. It helps us compare the performance of the Control group (traditional UI) against the Test group (new UI with contextual prompts).
        """
    )

    # New Section: Key Performance Indicators (KPIs)
    st.markdown("## **Key Performance Indicators (KPI's)**")
    st.write(
        """
        The following KPIs were tracked to evaluate the effectiveness of the experiment and assess user engagement:
        
        - **Completion Rate**: The percentage of users who completed the process flow from start to finish. This helps to evaluate whether the new interface leads to higher engagement and successful outcomes.
        - **Engagement Metrics**:
          - **Logins**: The frequency with which clients logged into their accounts, a measure of engagement and usage.
          - **Calls**: The number of client service calls made, indicating the level of client support needed.
        - **Average Account Balance**: The average balance held by clients, which provides insights into client wealth and account activity.
        - **Tenure**: The number of years a client has been with Vanguard, reflecting client loyalty and long-term engagement.
        """
    )

    st.header("Getting Started")
    st.write(
        """
        ## Metadata
        This comprehensive set of fields will guide your analysis, helping you uncover insights into client behavior and preferences.

        - **client_id**: A unique identifier for each client.
        - **variation**: Denotes whether a client was part of the experiment (Control or Test).
        - **visitor_id**: A unique ID for each client-device combination.
        - **visit_id**: A unique ID for each web visit/session.
        - **process_step**: The step in the digital process that the client is engaged in.
        - **date_time**: Timestamp of each web activity.
        - **clnt_tenure_yr**: The number of years a client has been with Vanguard.
        - **clnt_tenure_mnth**: The number of months a client has been with Vanguard.
        - **clnt_age**: The client’s age.
        - **gendr**: The client’s gender.
        - **num_accts**: The number of accounts the client holds with Vanguard.
        - **bal**: The total balance across all client accounts.
        - **calls_6_mnth**: The number of times the client reached out over the phone in the last 6 months.
        - **logons_6_mnth**: The number of times the client logged into Vanguard’s platform over the last 6 months.

        ## Bonus: Additional Tasks (Optional)
        If you have extra time after completing the core tasks, explore the following optional questions and activities:

        - **Client Behavior Analysis**: Dive deeper into the data to analyze client behavior patterns and trends.
        - **Power and Effect Size**: Perform power analysis to determine the sample size needed for a reliable test and compute the effect size.
        - **Streamlit Integration**: Add Streamlit widgets to your project to enable real-time analysis and visualization. Customize your app for interactivity.
        """
    )

    # **Data Cleaning & Merging Process** section
    st.subheader("Data Cleaning & Merging Process")
    st.write(
        """
        The data cleaning and merging process is crucial for ensuring the integrity and consistency of the dataset. Here's how we handled the data:

        **Clean Datasets**:
        - **Load Data**: We begin by loading the raw data from various sources.
        - **Drop Missing Values**: Any missing values in critical columns (like `client_id`, `process_step`, etc.) are removed to ensure that we only work with complete data.
        - **Rename Columns**: Columns are renamed to have consistent naming conventions and be more meaningful for analysis.
        - **Remove 'X' Gender Values**: Any rows with 'X' as a gender label are removed as this represents incomplete or erroneous data.
        - **Map Gender Codes to Labels**: Gender codes are mapped to more readable labels (e.g., 'M' -> 'Male', 'F' -> 'Female').
        - **Ensure Proper DateTime Format**: We ensure that the `date_time` column is in the correct format (i.e., datetime objects), allowing for time-based analysis.

        **Merge Datasets**:
        - **Merge Demographic Datasets**: We combine the demographic data (client information such as age, tenure, balance) into a single dataset.
        - **Concatenate Web Interactions Dataset**: The web interactions (client's steps through the online process) are merged with the demographic data based on the `client_id`.
        - **Final Merge on `client_id`**: After all data is cleaned and preprocessed, we merge all datasets on the common `client_id` column to create a comprehensive dataset.

        **Segment by Experiment Group**:
        - **Control and Test Groups**: The dataset is split into two groups based on the `variation` column: the Control group (clients using the traditional UI) and the Test group (clients using the new UI with contextual prompts).

        **Sort Data**:
        - **Sort by client_id, visit_id, process_step, and date_time**: This ensures that the data is ordered by the client's journey through the process, from their first visit to the confirmation step.

        **Age Group Categorization**:
        - **Assign Age Groups**: Based on defined age bins, we categorize clients into age groups (e.g., under 30, 30-39, 40-49, 50+). This categorization is applied to the `df_merged`, as well as to the `control` and `test` datasets to facilitate age-based analysis.
        """
    )

    # **Who are our clients?** section
    st.subheader("Who Are Our Clients?")
    st.write(
        """
        For demographic analysis, we worked with a pool of **70,591 users** after applying the cleaning and merging processes to ensure data quality.

        **Group with highest average tenure and balance**:
        - **Gender**: Male
        - **Age Group**: 50 and above
        - **Client's Tenure**: 16.35 years
        - **Balance**: 294,239.72$
        **Average Persona**:
        - **Gender**: Male
        - **Age Group**: 30-39
        - **Average Tenure**: 11.65 years
        - **Average Balance**: 126,284.41$
        """
    )

    # **Further Observations** section
    st.subheader("Further Observations")
    st.write(
        """
        - Males generally have higher average balances than females, with the highest balances observed in the "50 and above" age group for both genders.
        - Both males and females show similar tenure patterns, with longer tenures seen in older age groups (50+ years), indicating long-term clients.
        - The “Unknown" gender category typically has lower average balances, likely due to potential data gaps or non-disclosure of gender.
        """
    )
    # **Did the new UI lead to higher completion rates?**
    st.write("**Did the new UI lead to higher completion rates?**")
    st.write(
        """
        The A/B test was designed to determine whether the new, intuitive UI with contextual prompts improved user engagement and completion rates compared to the traditional Vanguard user interface. The results from hypothesis testing and analysis will help answer this key question.
        """
    )

    # New Section: What Data We Have Been Working With
    st.subheader("What Data We Have Been Working With")
    st.write(
        """
        The analysis is based on a rich set of data that provides insights into both client demographics and their digital interactions with Vanguard's platform. Below is a summary of the key datasets:

        - **Client Profiles**: This includes demographic and account details such as:
          - **Client's Age**: The age of each client.
          - **Client's Tenure**: How long each client has been with Vanguard, in both years and months.
          - **Client's Balance**: The total balance held across all of a client's accounts with Vanguard.
          - **Number of Accounts**: The number of accounts the client holds with Vanguard.

        - **Digital Footprints**: This data captures the client's online activity at each step in the client journey:
          - **Process Steps**: The sequence of steps each client went through in their digital process (e.g., initial page, steps 1-3, and confirmation page).
          - **Date-Time Logs**: Timestamps for each action taken by the client during their online session, allowing us to track the client’s progression through each step.

        - **Experiment Roster**: This dataset indicates which group each client was assigned to (Control or Test), based on their unique **client_id**. It helps us compare the performance of the Control group (traditional UI) against the Test group (new UI with contextual prompts).
        """
    )

    # New Section: Key Performance Indicators (KPIs)
    st.markdown("## **Key Performance Indicators (KPI's)**")
    st.write(
        """
        The following KPIs were tracked to evaluate the effectiveness of the experiment and assess user engagement:
        
        - **Completion Rate**: The percentage of users who completed the process flow from start to finish. This helps to evaluate whether the new interface leads to higher engagement and successful outcomes.
        - **Engagement Metrics**:
          - **Logins**: The frequency with which clients logged into their accounts, a measure of engagement and usage.
          - **Calls**: The number of client service calls made, indicating the level of client support needed.
        - **Average Account Balance**: The average balance held by clients, which provides insights into client wealth and account activity.
        - **Tenure**: The number of years a client has been with Vanguard, reflecting client loyalty and long-term engagement.
        """
    )

    st.header("Getting Started")
    st.write(
        """
        ## Metadata
        This comprehensive set of fields will guide your analysis, helping you uncover insights into client behavior and preferences.

        - **client_id**: A unique identifier for each client.
        - **variation**: Denotes whether a client was part of the experiment (Control or Test).
        - **visitor_id**: A unique ID for each client-device combination.
        - **visit_id**: A unique ID for each web visit/session.
        - **process_step**: The step in the digital process that the client is engaged in.
        - **date_time**: Timestamp of each web activity.
        - **clnt_tenure_yr**: The number of years a client has been with Vanguard.
        - **clnt_tenure_mnth**: The number of months a client has been with Vanguard.
        - **clnt_age**: The client’s age.
        - **gendr**: The client’s gender.
        - **num_accts**: The number of accounts the client holds with Vanguard.
        - **bal**: The total balance across all client accounts.
        - **calls_6_mnth**: The number of times the client reached out over the phone in the last 6 months.
        - **logons_6_mnth**: The number of times the client logged into Vanguard’s platform over the last 6 months.

        ## Bonus: Additional Tasks (Optional)
        If you have extra time after completing the core tasks, explore the following optional questions and activities:

        - **Client Behavior Analysis**: Dive deeper into the data to analyze client behavior patterns and trends.
        - **Power and Effect Size**: Perform power analysis to determine the sample size needed for a reliable test and compute the effect size.
        - **Streamlit Integration**: Add Streamlit widgets to your project to enable real-time analysis and visualization. Customize your app for interactivity.
        """
    )

    # **Data Cleaning & Merging Process** section
    st.subheader("Data Cleaning & Merging Process")
    st.write(
        """
        The data cleaning and merging process is crucial for ensuring the integrity and consistency of the dataset. Here's how we handled the data:

        **Clean Datasets**:
        - **Load Data**: We begin by loading the raw data from various sources.
        - **Drop Missing Values**: Any missing values in critical columns (like `client_id`, `process_step`, etc.) are removed to ensure that we only work with complete data.
        - **Rename Columns**: Columns are renamed to have consistent naming conventions and be more meaningful for analysis.
        - **Remove 'X' Gender Values**: Any rows with 'X' as a gender label are removed as this represents incomplete or erroneous data.
        - **Map Gender Codes to Labels**: Gender codes are mapped to more readable labels (e.g., 'M' -> 'Male', 'F' -> 'Female').
        - **Ensure Proper DateTime Format**: We ensure that the `date_time` column is in the correct format (i.e., datetime objects), allowing for time-based analysis.

        **Merge Datasets**:
        - **Merge Demographic Datasets**: We combine the demographic data (client information such as age, tenure, balance) into a single dataset.
        - **Concatenate Web Interactions Dataset**: The web interactions (client's steps through the online process) are merged with the demographic data based on the `client_id`.
        - **Final Merge on `client_id`**: After all data is cleaned and preprocessed, we merge all datasets on the common `client_id` column to create a comprehensive dataset.

        **Segment by Experiment Group**:
        - **Control and Test Groups**: The dataset is split into two groups based on the `variation` column: the Control group (clients using the traditional UI) and the Test group (clients using the new UI with contextual prompts).

        **Sort Data**:
        - **Sort by client_id, visit_id, process_step, and date_time**: This ensures that the data is ordered by the client's journey through the process, from their first visit to the confirmation step.

        **Age Group Categorization**:
        - **Assign Age Groups**: Based on defined age bins, we categorize clients into age groups (e.g., under 30, 30-39, 40-49, 50+). This categorization is applied to the `df_merged`, as well as to the `control` and `test` datasets to facilitate age-based analysis.
        """
    )

    # **Who are our clients?** section
    st.subheader("Who Are Our Clients?")
    st.write(
        """
        For demographic analysis, we worked with a pool of **70,591 users** after applying the cleaning and merging processes to ensure data quality.

        **Group with highest average tenure and balance**:
        - **Gender**: Male
        - **Age Group**: 50 and above
        - **Client's Tenure**: 16.35 years
        - **Balance**: 294,239.72$
        **Average Persona**:
        - **Gender**: Male
        - **Age Group**: 30-39
        - **Average Tenure**: 11.65 years
        - **Average Balance**: 126,284.41$
        """
    )

    # **Further Observations** section
    st.subheader("Further Observations")
    st.write(
        """
        - Males generally have higher average balances than females, with the highest balances observed in the "50 and above" age group for both genders.
        - Both males and females show similar tenure patterns, with longer tenures seen in older age groups (50+ years), indicating long-term clients.
        - The “Unknown" gender category typically has lower average balances, likely due to potential data gaps or non-disclosure of gender.
        """
    )
    # New Section: Key Performance Indicators (KPIs)
    st.markdown("## **Key Performance Indicators (KPI's)**")
    st.write(
        """
        The following KPIs were tracked to evaluate the effectiveness of the experiment and assess user engagement:
        
        - **Completion Rate**: The percentage of users who completed the process flow from start to finish. This helps to evaluate whether the new interface leads to higher engagement and successful outcomes.
        - **Engagement Metrics**:
          - **Logins**: The frequency with which clients logged into their accounts, a measure of engagement and usage.
          - **Calls**: The number of client service calls made, indicating the level of client support needed.
        - **Average Account Balance**: The average balance held by clients, which provides insights into client wealth and account activity.
        - **Tenure**: The number of years a client has been with Vanguard, reflecting client loyalty and long-term engagement.
        
        ### Additional Insights:
        
        - **Completion Rate**:  
          If the **Test group** shows a higher completion rate compared to the **Control group**, it suggests that the new design has improved user engagement, making it easier for users to complete the process. This could indicate that the intuitive interface or contextual prompts helped users to feel more confident and successful in completing the steps.

        - **Time Spent on Each Step**:  
          A lower time spent at each step in the **Test group** compared to the **Control group** would suggest that the new design is more efficient. This can be interpreted as users moving through the process more quickly, likely due to the improved clarity or guidance offered by the new UI design.

        - **Error Rates**:  
          A reduction in error rates in the **Test group** would suggest that the new design is more intuitive and user-friendly. This could mean that fewer users encountered confusion or made mistakes during the process, likely because the contextual prompts and simplified steps helped guide them more effectively.

        - **Bounce Rates**:  
          If the **Test group** has a lower bounce rate compared to the **Control group**, it suggests that the new design is better at keeping users engaged throughout the process. A lower bounce rate would indicate that users are more likely to stay in the process, reducing drop-offs or exits at any given step, which is a good indicator of user retention and interest.

        """
    )
        # New Section: Tableau Visuals
    st.markdown("## **Tableau Visuals**")
    st.write(
        """
        You can explore the interactive Tableau dashboard for further insights into the data and visualizations by following the link below:
        
        [**Vanguard CX Story**](https://public.tableau.com/app/profile/eliska.simova/viz/Vanguard_CX/Vanguard_story?publish=yes)
        """
    )

    # New Section: Hypothesis Testing
    st.markdown("## **Hypothesis Testing**")
    
    # Hypothesis 1
    st.subheader("Hypothesis 1: Completion Rate Comparison")
    st.write(
        """
        **Hypothesis**: "Is there a significant difference in the completion rates between the Control group and the Test group at each step of the process?"

        - All steps show statistically significant differences in completion rates between the **Test group** (new UI) and **Control group** (old UI).
        - A **p-value of 0.0000** for each step indicates strong evidence against the null hypothesis.
        - We **reject the null hypothesis** at all steps, meaning that the new design does indeed have a significantly different impact on user completion rates at each step compared to the old design.
        """
    )

    # Hypothesis 2
    st.subheader("Hypothesis 2: Completion Rate Increase")
    st.write(
        """
        **Hypothesis**: "Does the introduction of the new UI design result in a minimum 5% increase in the completion rate compared to the existing design, making it cost-effective?"

        - The completion rate increase of **9.82%** between the **Test** and **Control** groups exceeds the **5%** threshold set by Vanguard.
        - The **new UI design** could be considered **worthwhile from a business perspective** due to this significant increase in the completion rate.
        """
    )

    # Hypothesis 3
    st.subheader("Hypothesis 3: Average Client Tenure Comparison")
    st.write(
        """
        **Hypothesis**: "Is the average client tenure of those engaging with the new process the same as those engaging with the old process?"

        - **Control Group Average Tenure**: 12.09 years
        - **Test Group Average Tenure**: 11.98 years
        - **p-value**: 0.0868, **Statistic**: 1.7124 (indicates how much the means differ relative to variability)
        - We **fail to reject the null hypothesis**, meaning the **average tenure is not significantly different** between the two groups.
        - The lack of significant difference in **average tenure** supports the validity of the A/B test results.
        """
    )

    # Hypothesis 4
    st.subheader("Hypothesis 4: Average Client Age Comparison")
    st.write(
        """
        **Hypothesis**: "Is the average client age of those engaging with the new process the same as those engaging with the old process?"

        - **Control Average Age**: 47.50 years
        - **Test Average Age**: 47.16 years
        - **p-value**: 0.0160
        - Since the **p-value is less than 0.05**, we **reject the null hypothesis**, indicating that there is a **statistical difference** in the average age between the Test and Control groups.
        """
    )

    # Hypothesis 5
    st.subheader("Hypothesis 5: Error Rate Comparison")
    st.write(
        """
        **Hypothesis**: "Does the new UI design lead to a reduction in error rates compared to the old design, and is this reduction statistically significant?"

        - **Control Error Rate**: 19.21%
        - **Test Error Rate**: 17.64%
        - **Percentage Difference**: 1.57%
        - **p-value**: 0.0000
        - We **reject the null hypothesis** that there is no difference between the groups, indicating that the **new UI** has a **significantly lower error rate** than the old UI.
        """
    )

    # Hypothesis 6
    st.subheader("Hypothesis 6: Bounce Rate Comparison")
    st.write(
        """
        **Hypothesis**: “Is the bounce rate of the Test group lower than the Control group across all steps?”

        - The **Test group** showed a **statistically significant lower bounce rate** only at **Step 1**.
        - For **Steps 0**, **2**, and **3**, there were **no statistically significant differences** between the Control and Test groups.
        """
    )

    # New Section: Experiment Evaluation
    st.markdown("## **Experiment Evaluation**")
    st.write(
        """
        - **Average Ages**: The average age is statistically different between the Test and Control groups, as indicated by the **p-value of 0.0160**, which is less than the **0.05 threshold**.
        - This means the distribution of **ages** between the two groups is **not uniform**, and age differences could potentially introduce a **bias in the observed completion rate**.
        """
    )
    # New Section: Recommendations
    st.markdown("## **_Recommendations_**")
    
    # Recommendation 1
    st.subheader("1. **Simplify the Process**")
    st.write(
        """
        All age groups may benefit from a more straightforward flow.

        💡 **Reduce unnecessary steps and minimize user decisions to streamline the experience**.
        """
    )

    # Recommendation 2
    st.subheader("2. **Provide Contextual Help**")
    st.write(
        """
        High bounce rates may suggest confusion or hesitation; users might benefit from guidance or support.

        💡 **Add tooltips, help icons, and live support to guide users**.
        """
    )

    # Recommendation 3
    st.subheader("3. **Improve Visual Design**")
    st.write(
        """
        Bounce rates at certain steps suggest that users might find some of the content unclear.

        💡 **Ensure readability and consistency in font size, contrast, and button design**.
        """
    )

    # Recommendation 4
    st.subheader("4. **Personalize Experience**")
    st.write(
        """
        Users might lose track of where they are in the process or forget to complete it.

        💡 **Offer reminders and allow users to save their progress for later completion**.
        """
    )

In [None]:
# pages/summary.py
import streamlit as st
import pandas as pd
from data_loader import load_data

def show_data_summary(df):
    st.subheader("CSV Data Overview")
    st.write(f"Number of rows: {df.shape[0]}")
    st.write(f"Number of columns: {df.shape[1]}")
    st.write("First 5 rows of the dataset:")
    st.dataframe(df.head())

# Assuming you're loading your data in the main part of the app or another script
if __name__ == "__main__":
    # Example: loading a CSV file
    # Change the path below to your actual file location
    df = pd.read_csv("path_to_your_data.csv")  # Load data from CSV

    # Now call the function and pass the DataFrame `df`
    show_data_summary(df)

In [None]:
# pages/unique_values.py
import streamlit as st
import pandas as pd
from data_loader import load_data

def show_unique_values_in_categorical_columns(df):
    st.title("Unique Values in Categorical Columns")
    
    # Get all categorical columns
    categorical_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
    
    # Check if there are any categorical columns
    if not categorical_columns:
        st.warning("No categorical columns found in the file.")
        return

    st.subheader("Unique Values in Categorical Columns:")
    for column in categorical_columns:
        # Get unique values for each categorical column
        unique_values = df[column].unique()
        st.write(f"Column: {column}")
        st.write(f"Unique values: {unique_values}")

In [None]:
# pages/stats.py
import streamlit as st
import pandas as pd
from data_loader import load_data

def show_basic_statistics(df):
    # Select only numeric columns
    numeric_df = df.select_dtypes(include=['number'])

    # Check if there are numeric columns
    if numeric_df.empty:
        st.warning("No numeric columns found in the file.")
        return
    
    # Display basic statistics for numeric columns
    st.subheader("Basic Statistics for Numeric Columns:")
    statistics = numeric_df.describe().T  # Transpose for better readability
    st.write(statistics)

In [None]:
# pages/demographics.py
import pandas as pd
import plotly.express as px
import streamlit as st
from data_loader import load_data

# Function to perform demographic analysis
def analyze_demographics(df, control_group_sorted, test_group_sorted):
    """
    Function to perform demographic analysis and generate interactive plots using Plotly.
    """
    # Ensure 'clnt_age' is present and numeric
    if 'clnt_age' not in df.columns:
        st.error("The DataFrame does not contain the 'clnt_age' column.")
        return

    if not pd.api.types.is_numeric_dtype(df['clnt_age']):
        st.error("The 'clnt_age' column is not numeric.")
        return

    # Create the 'age_group' column based on 'clnt_age' ranges
    bins = [0, 18, 30, 40, 50, 60, 100]  # Define the age group ranges
    labels = ['0-18', '19-30', '31-40', '41-50', '51-60', '60+']  # Age group labels
    df['age_group'] = pd.cut(df['clnt_age'], bins=bins, labels=labels, right=False)

    # Debugging: Show first few rows of the dataframe to confirm 'age_group' column
    st.write("First few rows of the dataframe with 'age_group':")
    st.write(df[['clnt_age', 'age_group']].head())

    # Check if 'age_group' column exists now
    if 'age_group' not in df.columns:
        st.error("The 'age_group' column was not created.")
        return

    # Aggregating based on 'gender' and 'age_group'
    logs_calls_accounts = df.groupby(['gender', 'age_group']).agg({
        'num_accts': 'mean',
        'calls_6_mnth': 'mean',
        'logons_6_mnth': 'mean'
    }).reset_index().round(2)

    # Debugging: Show the aggregated result
    st.write("Aggregated data (grouped by 'gender' and 'age_group'):")
    st.write(logs_calls_accounts)

    # Plot for Average Number of Accounts
    fig1 = px.line(
        logs_calls_accounts, 
        x='age_group', 
        y='num_accts', 
        color='gender',
        title="Average Number of Accounts by Age Group and Gender",
        labels={'num_accts': 'Average Number of Accounts'},
        markers=True
    )
    st.plotly_chart(fig1)

    # Plot for Calls in the Last 6 Months
    fig2 = px.line(
        logs_calls_accounts, 
        x='age_group', 
        y='calls_6_mnth', 
        color='gender',
        title="Average Calls in Last 6 Months by Age Group and Gender",
        labels={'calls_6_mnth': 'Average Calls in Last 6 Months'},
        line_shape='linear',
        markers=True
    )
    st.plotly_chart(fig2)

    # Plot for Logons in the Last 6 Months
    fig3 = px.line(
        logs_calls_accounts, 
        x='age_group', 
        y='logons_6_mnth', 
        color='gender',
        title="Average Logons in Last 6 Months by Age Group and Gender",
        labels={'logons_6_mnth': 'Average Logons in Last 6 Months'},
        line_shape='linear',
        markers=True
    )
    st.plotly_chart(fig3)

    # *** Age Group by Test & Control ***
    st.write("### Test & Control Grouped by Age Group")
    # Filter based on unique client_id in control and test groups
    control_unique = control_group_sorted.drop_duplicates(subset='client_id')
    test_unique = test_group_sorted.drop_duplicates(subset='client_id')

    # Calculate age group distribution for each group
    control_age_group = control_unique["age_group"].value_counts()
    test_age_group = test_unique["age_group"].value_counts()

    # Combine data into one table
    age_groups_concat = pd.concat(
        [control_age_group, test_age_group], 
        axis=1, 
        keys=["Control Group Count", "Test Group Count"]
    )

    # Rename the columns for clarity
    age_groups_concat = age_groups_concat.sort_values(by="age_group", ascending=True)  # sort values
    age_groups_concat = age_groups_concat.reset_index()

    st.write(age_groups_concat)

    # *** Age Group x Gender ***
    st.write("### Age Group x Gender")
    control_age_group_gender = control_unique.groupby("age_group")["gender"].value_counts().unstack()
    test_age_group_gender = test_unique.groupby("age_group")["gender"].value_counts().unstack()

    # Reset the index to create a proper DataFrame structure
    control_age_group_gender = control_age_group_gender.reset_index()
    test_age_group_gender = test_age_group_gender.reset_index()

    st.write("Control Group - Age Group x Gender:")
    st.write(control_age_group_gender)

    st.write("Test Group - Age Group x Gender:")
    st.write(test_age_group_gender)

    # *** Age Group x Balances ***
    st.write("### Age Group x Balances")
    # Filter control and test group based on unique client_id
    control_age_group_balance = control_unique.groupby("age_group")["balance"].mean().round(2)
    test_age_group_balance = test_unique.groupby("age_group")["balance"].mean().round(2)

    # Convert the grouped Series to DataFrames
    control_age_group_balance_df = control_age_group_balance.reset_index()
    test_age_group_balance_df = test_age_group_balance.reset_index()

    # Rename the columns for clarity
    control_age_group_balance_df.rename(columns={"age_group": "Age Group", "balance": "Control Group Balance"}, inplace=True)
    test_age_group_balance_df.rename(columns={"age_group": "Age Group", "balance": "Test Group Balance"}, inplace=True)

    # Merge both control and test balance data into a single table
    balance_concat = pd.merge(control_age_group_balance_df, test_age_group_balance_df, on="Age Group")

    st.write(balance_concat)

# Function to display the demographics analysis in Streamlit
def show_demographics(df, control_group_sorted, test_group_sorted):
    """
    Show Demographics Analysis in the Streamlit app.
    This function is used to call the analysis and display the results.
    """
    st.title("Demographics Analysis")

    # Perform the demographic analysis (aggregation and plotting)
    analyze_demographics(df, control_group_sorted, test_group_sorted)

    # Additional notes or user guidance
    st.write("""
        This page provides demographic analysis, including average number of accounts, calls, 
        and logons, based on age groups and gender. The plots above allow you to explore how 
        these variables differ across age groups and between genders.
    """)

# Sorting Control and Test Groups in the main app.py or wherever necessary:
def sort_groups(df_merged):
    """
    Function to sort control and test groups based on client_id, visit_id, process_step, and date_time.
    """
    control_group = df_merged[df_merged['variation'] == 'Control']
    test_group = df_merged[df_merged['variation'] == 'Test']

    # Sort control group
    control_group_sorted = control_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])

    # Sort test group
    test_group_sorted = test_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])

    return control_group_sorted, test_group_sorted

In [None]:
#pages/duration.py
import streamlit as st
import pandas as pd  # Ensure pandas is imported
from data_loader import load_data

def show_process_duration(df):
    st.title("Process Duration Analysis")
    
    # Check if the necessary columns exist
    if 'process_step' not in df.columns or 'date_time' not in df.columns or 'client_id' not in df.columns:
        st.error("Missing required columns: 'process_step', 'date_time', or 'client_id'.")
        return
    
    # Define the custom sorting order for the process steps
    process_step_order = ['start', 'step_1', 'step_2', 'step_3', 'confirm']
    df['process_step'] = pd.Categorical(df['process_step'], categories=process_step_order, ordered=True)

    # Filter groups based on test/control
    control_group = df[df['variation'] == 'Control']
    test_group = df[df['variation'] == 'Test']

    # Sort control group and test group
    control_group_sorted = control_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])
    test_group_sorted = test_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])

    # Function to get the latest starts
    def filter_latest_starts(group_df):
        starts_only = group_df[group_df['process_step'] == 'start']
        latest_starts = starts_only.loc[starts_only.groupby('visit_id')['date_time'].idxmax()]
        return group_df.merge(latest_starts[['visit_id', 'date_time']], on=['visit_id', 'date_time'], how='inner')

    # Apply to both groups (Control and Test)
    filtered_control = filter_latest_starts(control_group_sorted)
    filtered_test = filter_latest_starts(test_group_sorted)

    # Display the complete tables for the filtered groups
    st.title("Control Group Sorted and Filtered")
    st.dataframe(filtered_control)

    st.title("Test Group Sorted and Filtered")
    st.dataframe(filtered_test)

    # Check if it works for a specific client (e.g., client_id == 2304905)
    client_total_entries = df[df["client_id"] == 2304905]
    client_last_start_control = filtered_control[filtered_control['client_id'] == 2304905]
    client_last_start_test = filtered_test[filtered_test['client_id'] == 2304905]

    # Display the results for the specific client
    st.title("Total Entries for Client 2304905")
    st.dataframe(client_total_entries)

    st.title("Last Start for Client 2304905 in Control Group")
    st.dataframe(client_last_start_control)

    st.title("Last Start for Client 2304905 in Test Group")
    st.dataframe(client_last_start_test)

In [21]:
#pages/hypothesistestcompletionrate.py
import pandas as pd
import numpy as np
import scipy.stats as stats  # Correct import for statistical functions
import streamlit as st

# Function to perform a two-proportion z-test
def two_proportion_z_test(p1, p2, n1, n2):
    """
    Perform two-proportion z-test to compare completion rates between two groups.
    Args:
        p1, p2: Completion proportions of control and test groups.
        n1, n2: Sample sizes for control and test groups.
    Returns:
        z-statistic and p-value of the z-test.
    """
    # Calculate pooled proportion
    P = (p1 * n1 + p2 * n2) / (n1 + n2)
    
    # Calculate the standard error
    SE = np.sqrt(P * (1 - P) * (1 / n1 + 1 / n2))
    
    # Calculate the z-statistic
    z = (p1 - p2) / SE
    
    # Calculate the p-value (two-tailed test) using scipy.stats.norm
    p_value = 2 * (1 - stats.norm.cdf(abs(z)))  # Corrected here
    
    return z, p_value

# Show the hypothesis testing page
def show_page(df):
    """
    Show the hypothesis testing page with completion rate analysis.
    Args:
        df: The dataframe containing the data
    """
    # Sort the data by variation and other relevant columns
    df_sorted = df.sort_values(by=['variation', 'client_id', 'visit_id', 'process_step', 'date_time'])

    # Define 'completion' column if not already defined (this should already be defined in data_loader.py)
    if 'completion' not in df_sorted.columns:
        df_sorted['completion'] = df_sorted['process_step'].apply(lambda x: 1 if x in ['confirm', 'completed'] else 0)

    # Define age bins and categorize ages
    bins = [0, 30, 40, 50, 100]  # Adjust intervals as necessary
    labels = ['Under 30', '30-39', '40-49', '50 and above']
    df_sorted['age_group'] = pd.cut(df_sorted['clnt_age'], bins=bins, labels=labels)

    # Define process steps for hypothesis testing
    steps = ['confirm', 'step_1', 'step_2', 'step_3']

    # Create an empty list to store results
    results = []

    # Hypothesis testing for each process step
    for step in steps:
        # Filter data for the current process step in control and test groups
        control_completions = df_sorted[(df_sorted['process_step'] == step) & (df_sorted['variation'] == 'Control')]['completion'].mean() * 100  # Completion rate (%) for control group
        test_completions = df_sorted[(df_sorted['process_step'] == step) & (df_sorted['variation'] == 'Test')]['completion'].mean() * 100  # Completion rate (%) for test group

        control_total = df_sorted[(df_sorted['process_step'] == step) & (df_sorted['variation'] == 'Control')].shape[0]
        test_total = df_sorted[(df_sorted['process_step'] == step) & (df_sorted['variation'] == 'Test')].shape[0]

        # Convert completion rate to proportion
        p_control = control_completions / 100
        p_test = test_completions / 100
        
        # Perform two-proportion z-test
        z_stat, p_value = two_proportion_z_test(p_control, p_test, control_total, test_total)
        
        # Store the results for each step
        results.append({
            'Step': step,
            'Control Completion Rate': control_completions,
            'Test Completion Rate': test_completions,
            'Z-statistic': z_stat,
            'P-value': p_value,
            'Significant': p_value < 0.05
        })

    # Convert results to a DataFrame
    results_df = pd.DataFrame(results)

    # Display the results
    st.subheader("Hypothesis Testing Results for Completion Rates by Step")
    st.dataframe(results_df)

    # Interpret the results for each step
    for idx, row in results_df.iterrows():
        if row['Significant']:
            st.write(f"**Step: {row['Step']}** - The difference in completion rates between control and test groups is statistically significant (Z = {row['Z-statistic']:.4f}, P = {row['P-value']:.4f}).")
        else:
            st.write(f"**Step: {row['Step']}** - There is no significant difference in completion rates between control and test groups (Z = {row['Z-statistic']:.4f}, P = {row['P-value']:.4f}).")

    # Average completion rates comparison (as previously done)
    control_mean = df_sorted[df_sorted['variation'] == 'Control']['completion'].mean() * 100
    test_mean = df_sorted[df_sorted['variation'] == 'Test']['completion'].mean() * 100
    _, p_value = stats.ttest_ind(df_sorted[df_sorted['variation'] == 'Control']['completion'], df_sorted[df_sorted['variation'] == 'Test']['completion'], alternative='two-sided')

    st.subheader("Average Completion Rate Comparison")
    st.write(f"Average completion rate for Control group: {control_mean:.2f}%")
    st.write(f"Average completion rate for Test group: {test_mean:.2f}%")
    st.write(f"T-statistic: {_:.4f}")
    st.write(f"P-value: {p_value:.4f}")

    alpha = 0.05
    if p_value < alpha:
        st.write("**Reject the null hypothesis**: The completion rates are significantly different between the Test and Control groups.")
    else:
        st.write("**Fail to reject the null hypothesis**: The completion rates are not significantly different between the Test and Control groups.")

    completion_rate_increase = test_mean - control_mean
    st.write(f"Completion rate increase: {completion_rate_increase:.2f}%")
    if completion_rate_increase >= 5:
        st.write("The completion rate increase meets the 5% threshold, justifying the cost of the new design.")
    else:
        st.write("The completion rate increase does not meet the 5% threshold. The new design may not justify its cost.")


In [None]:
#pages/completion.py
import streamlit as st
import pandas as pd  # Ensure pandas is imported
from data_loader import load_data

def show_completion_time(df):
    st.title("Completion Time Analysis")
    
    # Ensure the 'date_time' column is in datetime format
    if 'date_time' not in df.columns:
        st.error("Missing 'date_time' column in the dataset.")
        return
    
    # Coerce errors to NaT (Not a Time)
    df['date_time'] = pd.to_datetime(df['date_time'], errors='coerce')  

    # Drop rows where 'date_time' is NaT after coercion
    df = df.dropna(subset=['date_time'])

    # Sort the dataset to ensure chronological order by client_id, visit_id, and date_time
    df = df.sort_values(by=['client_id', 'visit_id', 'date_time'])
    
    # Split the data into control and test groups
    control_group = df[df['variation'] == 'Control']
    test_group = df[df['variation'] == 'Test']
    
    # Function to calculate completion time for each step considering multiple visits
    def calculate_completion_time_with_visits(group_df):
        group_df = group_df.sort_values(by=['client_id', 'visit_id', 'date_time'])
        group_df['next_step_time'] = group_df.groupby(['client_id', 'visit_id'])['date_time'].shift(-1)
        group_df = group_df.dropna(subset=['next_step_time'])
        group_df['completion_time'] = group_df['next_step_time'] - group_df['date_time']
        return group_df[['client_id', 'visit_id', 'process_step', 'date_time', 'next_step_time', 'completion_time']]

    # Function to filter out outliers using IQR
    def filter_outliers(group_df):
        # Convert completion_time to minutes for easier interpretation
        group_df['completion_time_minutes'] = group_df['completion_time'].dt.total_seconds() / 60
        
        # Calculate the IQR (Interquartile Range) for completion time
        Q1 = group_df['completion_time_minutes'].quantile(0.25)
        Q3 = group_df['completion_time_minutes'].quantile(0.75)
        IQR = Q3 - Q1

        # Define the upper and lower bounds for outliers
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        # Filter out the outliers based on IQR
        filtered_data = group_df[
            (group_df['completion_time_minutes'] >= lower_bound) &
            (group_df['completion_time_minutes'] <= upper_bound)
        ]
        return filtered_data

    # Apply the function to both the control and test groups
    control_group_completion_times = calculate_completion_time_with_visits(control_group)
    test_group_completion_times = calculate_completion_time_with_visits(test_group)
    
    # Filter out outliers from both groups
    control_group_filtered = filter_outliers(control_group_completion_times)
    test_group_filtered = filter_outliers(test_group_completion_times)
    
    # Calculate the average completion time in minutes for each process step, after removing outliers
    avg_completion_time_control = control_group_filtered.groupby('process_step')['completion_time_minutes'].mean().reset_index()
    avg_completion_time_test = test_group_filtered.groupby('process_step')['completion_time_minutes'].mean().reset_index()
    
    # Display the results for control and test group average completion times
    st.subheader("Average Completion Time for Control Group (Minutes) - After Outlier Removal")
    st.dataframe(avg_completion_time_control)

    st.subheader("Average Completion Time for Test Group (Minutes) - After Outlier Removal")
    st.dataframe(avg_completion_time_test)
    
    # Display comparison of control vs test group average completion times
    st.subheader("Comparison of Average Completion Time by Process Step")
    comparison_df = pd.merge(avg_completion_time_control, avg_completion_time_test, on='process_step', suffixes=('_control', '_test'))
    st.dataframe(comparison_df)

    # Optional: You could plot the results for better visualization if desired
    # st.bar_chart(comparison_df.set_index('process_step')[['completion_time_minutes_control', 'completion_time_minutes_test']])

    # Additional insights and interpretation
    st.subheader("Insights & Interpretation")
    st.write("""
    In this section, we can derive insights based on the completion time across different groups and process steps:
    
    - **Completion Time Analysis**: Are there significant differences in the time it takes for each process step between the Control and Test groups?
    - **Outlier Removal**: By removing outliers using the IQR method, we can focus on more representative data for process completion times.
    - **Improvement in Test Group**: Does the Test group show faster completion times compared to the Control group after considering the removal of outliers?
    
    Please review the data and interpret the results to drive business decisions and process improvements.
    """)

    # **Process Duration Analysis**: Calculate process duration for start and confirm steps
    st.subheader("Process Duration Analysis")

    # Filter to get the latest start for each client
    starts_only = df[df['process_step'] == 'start']
    latest_starts = starts_only.loc[starts_only.groupby('client_id')['date_time'].idxmax()]

    # Filter to get the last confirmation for each client
    confirmation_only = df[df['process_step'] == 'confirm']
    latest_confirms = confirmation_only.loc[confirmation_only.groupby('client_id')['date_time'].idxmax()]

    # Merge to have both latest start and confirm per client
    latest_start_confirms = pd.merge(latest_starts, latest_confirms, on='client_id', suffixes=('_start', '_confirm'))

    # Calculate process duration for those who completed the process
    latest_start_confirms['process_duration'] = latest_start_confirms['date_time_confirm'] - latest_start_confirms['date_time_start']

    # Convert timedelta to seconds for easier manipulation
    latest_start_confirms['process_duration_seconds'] = latest_start_confirms['process_duration'].dt.total_seconds()

    # Calculate the IQR (Interquartile Range) for process duration
    Q1_duration = latest_start_confirms['process_duration_seconds'].quantile(0.25)
    Q3_duration = latest_start_confirms['process_duration_seconds'].quantile(0.75)
    IQR_duration = Q3_duration - Q1_duration

    # Define the upper and lower bounds for outliers in process duration
    lower_bound_duration = Q1_duration - 1.5 * IQR_duration
    upper_bound_duration = Q3_duration + 1.5 * IQR_duration

    # Filter out the outliers based on IQR for process duration
    filtered_duration_data = latest_start_confirms[
        (latest_start_confirms['process_duration_seconds'] >= lower_bound_duration) &
        (latest_start_confirms['process_duration_seconds'] <= upper_bound_duration)
    ]

    # Convert process_duration back to Timedelta
    filtered_duration_data['process_duration'] = pd.to_timedelta(filtered_duration_data['process_duration_seconds'], unit='s')

    # Calculate the average process duration again after removing outliers
    st.subheader("Average Process Duration (Filtered) - After Outlier Removal")
    st.write(f"Average process duration: {filtered_duration_data['process_duration'].mean()}")
    st.write(f"Median process duration: {filtered_duration_data['process_duration'].median()}")
    
    # Optional: Display filtered duration data
    st.subheader("Filtered Process Duration Data (Outliers Removed)")
    st.dataframe(filtered_duration_data[['client_id', 'process_duration']])

In [None]:
#bounce_rate.py
import streamlit as st
import pandas as pd
from scipy.stats import norm
from data_loader import load_data

# Function to calculate counts for z-test
def calculate_counts(group):
    steps = ['start', 'step_1', 'step_2', 'step_3', 'confirm']
    counts = []  # Store counts as tuples (N_started, N_dropped)
    
    for i in range(len(steps) - 1):
        current_step = steps[i]
        next_step = steps[i + 1]
        # Total users who started at this step
        started = group[group['process_step'] == current_step]['client_id'].nunique()
        # Total users who dropped off at this step
        reached_next = group[group['process_step'] == next_step]['client_id'].nunique()
        dropped = started - reached_next
        counts.append((started, dropped))
    
    return counts

# Function to perform two-proportion z-test
def two_proportion_z_test(n1, x1, n2, x2):
    # Calculate proportions
    p1 = x1 / n1 if n1 > 0 else 0
    p2 = x2 / n2 if n2 > 0 else 0
    
    # Pooled proportion
    p = (x1 + x2) / (n1 + n2)
    
    # Calculate z-statistic
    z = (p1 - p2) / ((p * (1 - p) * (1 / n1 + 1 / n2)) ** 0.5)
    
    # Calculate two-tailed p-value
    p_value = 2 * (1 - norm.cdf(abs(z)))
    
    return z, p_value

# Main function to display the bounce rate analysis page
def show_bounce_rate(df):
    st.title("Bounce Rate Analysis")

    # Ensure that the necessary columns exist in the dataframe
    if 'clnt_age' not in df.columns or 'process_step' not in df.columns or 'client_id' not in df.columns:
        st.error("Required columns ('clnt_age', 'process_step', 'client_id') are missing.")
        return
    
    # Create age groups based on 'clnt_age'
    bins = [0, 30, 40, 50, 100]  # You can adjust the age bins as needed
    labels = ['Under 30', '30-39', '40-49', '50 and above']
    df['age_group'] = pd.cut(df['clnt_age'], bins=bins, labels=labels)
    
    # Split the data into Control and Test groups
    control_group = df[df['variation'] == 'Control']
    test_group = df[df['variation'] == 'Test']
    
    # Sort Control and Test groups
    control_group_sorted = control_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])
    test_group_sorted = test_group.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])
    
    # Calculate the drop-off rates for both Control and Test groups
    control_dropoff_rate = calculate_dropoff_rate(control_group_sorted)
    test_dropoff_rate = calculate_dropoff_rate(test_group_sorted)

    # Display Bounce Rates for Control and Test Groups
    st.subheader("Bounce Rates for Control and Test Groups (Overall)")
    st.write("Control Group Bounce Rates (%):")
    for step, rate in control_dropoff_rate.items():
        st.write(f"{step}: {rate:.2f}%")
    
    st.write("\nTest Group Bounce Rates (%):")
    for step, rate in test_dropoff_rate.items():
        st.write(f"{step}: {rate:.2f}%")
    
    # Calculate and display drop-off rates by Age Group for Control and Test groups
    st.subheader("Bounce Rates by Age Group")

    control_dropoff_rate_by_age = calculate_dropoff_rate_by_age(control_group_sorted)
    test_dropoff_rate_by_age = calculate_dropoff_rate_by_age(test_group_sorted)

    # Display Control group drop-off rates by Age
    st.write("Control Group Bounce Rates by Age Group:")
    for age_group, rates in control_dropoff_rate_by_age.items():
        st.write(f"Age Group: {age_group}")
        for step, rate in rates.items():
            st.write(f"  {step}: {rate:.2f}%")
    
    # Display Test group drop-off rates by Age
    st.write("Test Group Bounce Rates by Age Group:")
    for age_group, rates in test_dropoff_rate_by_age.items():
        st.write(f"Age Group: {age_group}")
        for step, rate in rates.items():
            st.write(f"  {step}: {rate:.2f}%")

    # **Hypothesis Testing (Z-Test) Section**
    st.subheader("Hypothesis Test using Z-Test")
    
    # Null Hypothesis: H₀ = The bounce rates for the control and test groups are the same for a given step.
    # Alternative Hypothesis: H₁ = The bounce rates for the control and test groups are different for a given step.
    
    st.write("""
    **Null Hypothesis (H₀)**: The bounce rates for the control and test groups are the same for a given step.  
    **Alternative Hypothesis (H₁)**: The bounce rates for the control and test groups are different for a given step.
    """)

    # Calculate counts for Control and Test groups
    control_counts = calculate_counts(control_group_sorted)
    test_counts = calculate_counts(test_group_sorted)

    # Perform z-tests for each step and decide on hypothesis
    z_test_results = []

    steps = ['start', 'step_1', 'step_2', 'step_3']
    for i, step in enumerate(steps):
        n1, x1 = control_counts[i]  # Control group: (N_started, N_dropped)
        n2, x2 = test_counts[i]     # Test group: (N_started, N_dropped)
        
        # Perform z-test for proportions
        z_stat, p_value = two_proportion_z_test(n1, x1, n2, x2)
        
        # Decide whether to reject the null hypothesis
        reject_null = p_value < 0.05
        
        # Store results
        z_test_results.append({
            'Step': step,
            'Control Bounce Rate (%)': (x1 / n1) * 100 if n1 > 0 else 0,
            'Test Bounce Rate (%)': (x2 / n2) * 100 if n2 > 0 else 0,
            'Z-Statistic': z_stat,
            'P-Value': p_value,
            'Reject Null Hypothesis': reject_null
        })

    # Convert results to DataFrame for display
    z_test_results_df = pd.DataFrame(z_test_results)

    # Display Z-Test Results
    st.write(z_test_results_df)

    # Optional: Provide a brief explanation of the Z-Test
    st.write("""
    The Z-Test is used here to test if there is a significant difference between the bounce rates for the control and test groups at each process step.
    
    - A Z-Statistic closer to 0 indicates that the difference between the two groups is small.
    - A p-value below 0.05 suggests that we reject the null hypothesis and conclude that the bounce rates are significantly different.
    - If the p-value is greater than 0.05, we fail to reject the null hypothesis, meaning the bounce rates are similar.
    """)

In [None]:
#pages/error_rate.py
import streamlit as st
import pandas as pd
from scipy.stats import binomtest, ttest_ind
from data_loader import load_data

# Function to calculate errors
def calculate_errors(group):
    group['step_index'] = group['process_step'].map({'start': 0, 'step_1': 1, 'step_2': 2, 'step_3': 3, 'confirm': 4})
    group['error'] = group['step_index'].diff().apply(lambda x: x < 0)  # Negative diff indicates a backward step
    return group

# Function to calculate error rates for both groups
def calculate_error_rates(control_group, test_group):
    # Calculate errors
    control_group = calculate_errors(control_group)
    test_group = calculate_errors(test_group)

    # Calculate Error Rates
    control_error_rate = control_group['error'].mean() * 100
    test_error_rate = test_group['error'].mean() * 100

    # Error rate difference
    error_rate_difference = control_error_rate - test_error_rate  # Difference between control and test error rates
    threshold = 5  # 5% threshold for the difference

    # Perform hypothesis testing: binomial test
    control_errors = control_group['error'].sum()
    control_total = len(control_group)
    test_errors = test_group['error'].sum()
    test_total = len(test_group)

    # Perform a one-tailed binomial test
    result = binomtest(test_errors, test_total, control_errors / control_total, alternative='less')

    # Perform independent t-test for error rates
    control_error_rate_values = control_group['error'].astype(int)
    test_error_rate_values = test_group['error'].astype(int)

    # Perform the independent t-test
    t_stat, t_p_value = ttest_ind(control_error_rate_values, test_error_rate_values, equal_var=False, alternative='two-sided')

    return {
        'control_error_rate': control_error_rate,
        'test_error_rate': test_error_rate,
        'error_rate_difference': error_rate_difference,
        'binomial_p_value': result.pvalue,
        't_statistic': t_stat,
        't_p_value': t_p_value,
    }

# Function to display the results
def show_error_rate_analysis(df):
    st.title("Error Rate Hypothesis Testing")

    # Split the data into control and test groups
    control_group = df[df['variation'] == 'Control']
    test_group = df[df['variation'] == 'Test']

    # Calculate error rates and perform hypothesis testing
    results = calculate_error_rates(control_group, test_group)

    # Display results
    st.subheader("Error Rates Comparison")
    st.write(f"Control Group Error Rate: {results['control_error_rate']:.2f}%")
    st.write(f"Test Group Error Rate: {results['test_error_rate']:.2f}%")
    st.write(f"Error Rate Difference: {results['error_rate_difference']:.2f}%")

    # Display hypothesis testing results
    st.subheader("Hypothesis Testing Results")

    # Binomial Test Results
    st.write(f"Binomial Test p-value: {results['binomial_p_value']:.4f}")
    if results['binomial_p_value'] < 0.05:
        st.write("The test group has significantly lower error rate than the control group (p-value < 0.05).")
    else:
        st.write("There is no significant difference in error rates between the test and control groups.")

    # T-test Results
    st.write(f"T-test Statistic: {results['t_statistic']:.4f}")
    st.write(f"T-test p-value: {results['t_p_value']:.4f}")
    if results['t_p_value'] < 0.05:
        st.write("There is a significant difference in error rates between the control and test groups.")
    else:
        st.write("There is no significant difference in error rates between the control and test groups.")

    # Conclusion based on practical significance (error rate difference)
    st.subheader("Practical Significance")
    if results['error_rate_difference'] >= 5:
        st.write("The test group has at least a 5% lower error rate than the control group, which is practically significant.")
    else:
        st.write("The error rate difference is less than 5%, which may not be practically significant for making decisions.")

    # Conclusion
    st.subheader("Conclusion")
    if results['binomial_p_value'] < 0.05 and results['error_rate_difference'] >= 5:
        st.write("The test group shows both statistical and practical significance. The improvement in error rates may justify action.")
    else:
        st.write("Although the test group shows statistically significant differences, the practical significance (error rate difference) may not justify making significant changes.")

In [None]:
# utils/display.py
import streamlit as st
from data_loader import load_data

# Function to show an error message
def show_error(message: str):
    st.error(message)

# Function to display a dataframe
def display_dataframe(df, rows=5):
    if df is not None:
        st.dataframe(df.head(rows))
    else:
        show_error("Data is not available.")

# Function to display basic statistics of the dataframe
def show_basic_statistics(df):
    if df is not None:
        numeric_df = df.select_dtypes(include=['number'])
        if not numeric_df.empty:
            st.subheader("Basic Statistics")
            st.write(numeric_df.describe().T)
        else:
            show_error("No numeric columns found for statistics.")
    else:
        show_error("Data is not available for statistics.")

# Function to show unique values for categorical columns
def show_unique_values(df):
    if df is not None:
        categorical_columns = df.select_dtypes(include=['object', 'category']).columns.tolist()
        if categorical_columns:
            for column in categorical_columns:
                st.write(f"Column: {column}")
                st.write(f"Unique values: {df[column].unique()}")
        else:
            show_error("No categorical columns found.")
    else:
        show_error("Data is not available for unique value display.")