In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact_manual
from IPython.display import display

In [2]:
units = ['T03', 'T06', 'GWB', 'WMS']

In [3]:
def plot_unit_histograms(df, units, types, ordered_questions, start_date, end_date):
    # Convert start_date and end_date to datetime
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # Original list of questions
    questions = ['Listen', 'Involve', 'Accessibility', 'Trust', 'Overall']
    
    # Convert 'Date' column to datetime for comparison
    df['Date'] = pd.to_datetime(df['Date'])

    # Filter by date range
    df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

    # Filter by type if not 'All'
    if types != 'All':
        df = df[df['Type'].isin(types)]

    # Prepare a DataFrame for counting responses per unit
    unit_counts = df.groupby(['Unit', 'Type']).size().unstack(fill_value=0)

    # Reordering the list to place 'Overall' at the end
    # ordered_questions = [q for q in questions if q != 'Overall'] + ['Overall']
    
    # Melting the DataFrame for plotting
    melted_df = df.melt(id_vars=['Type', 'Unit'], value_vars=questions, var_name='Question', value_name='Score')

    # Reorder melted_df based on ordered_questions
    melted_df['Question'] = pd.Categorical(melted_df['Question'], categories=ordered_questions, ordered=True)
    melted_df = melted_df.sort_values('Question')
    
    # Grouping data by 'Unit', 'Type', 'Question' to calculate the average score
    grouped_df = melted_df.groupby(['Unit', 'Type', 'Question'], observed=True).mean().reset_index()

    # Define custom colours
    custom_palette = {
        'Patient': '#00727C',
        'Visitor': '#9ED9D1'
    }
    
    # Plotting for each unit
    for unit in (units if units != 'All' else df['Unit'].unique()):
        unit_df = grouped_df[grouped_df['Unit'] == unit]

        plt.figure(figsize=(15, 8))
        ax = sns.barplot(data=unit_df, x='Question', y='Score', hue='Type', errorbar=None, palette=custom_palette)

        # Adjust legend to include counts specific to this unit
        patient_count = unit_counts.loc[unit, 'Patient'] if 'Patient' in unit_counts.columns else 0
        visitor_count = unit_counts.loc[unit, 'Visitor'] if 'Visitor' in unit_counts.columns else 0
        handles, labels = ax.get_legend_handles_labels()
        new_labels = []
        for label in labels:
            if label == 'Patient':
                new_labels.append(f"Patient (n = {patient_count})")
            elif label == 'Visitor':
                new_labels.append(f"Visitor (n = {visitor_count})")
        ax.legend(handles, new_labels, title='Type', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=14)
        
        # Annotating each bar with the average score
        for bar in ax.patches:
            height = bar.get_height()
            # Displaying the score below the top of the bar
            ax.annotate(f'{height:.2f}',
                        xy=(bar.get_x() + bar.get_width() / 2, height),
                        xytext=(0, -12),  # Offset to position the text below the top
                        textcoords="offset points",
                        ha='center', va='bottom')

        # Setting plot title and labels
        plt.title(f'Average Scores by Question for {unit} (From {start_date.strftime("%Y-%m-%d")} to {end_date.strftime("%Y-%m-%d")})', fontsize=20)
        plt.xlabel('Question', fontsize=12)
        plt.ylabel('Average Score', fontsize=12)
        plt.ylim(0.1, 5)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()

In [4]:
def update_visualizations(selected_type, selected_unit, start_date, end_date):
    # Check for valid date range
    if not start_date or not end_date:
        print("Please select both start and end dates.")
        return

    df = pd.read_csv('cleaned_survey.csv')

    # Original list of questions
    questions = ['Listen', 'Involve', 'Accessibility', 'Trust', 'Overall']

    # Reordering the list to place 'Overall' at the end
    ordered_questions = [q for q in questions if q != 'Overall'] + ['Overall']

    # Prepare and plot data, including filtering by type and unit
    plot_unit_histograms(df, [selected_unit] if selected_unit != 'All' else ['T03', 'T06', 'GWB', 'WMS'],
                         [selected_type] if selected_type != 'All' else ['Patient', 'Visitor'],
                         ordered_questions, start_date, end_date)

In [5]:
# Dropdown for 'Type'
type_dropdown = widgets.Dropdown(
    options=['Patient', 'Visitor', 'All'],
    value='All',
    description='Type:',
)

# Dropdown for 'Unit'
unit_dropdown = widgets.Dropdown(
    options=['T03', 'T06', 'GWB', 'WMS', 'All'],
    value='All',
    description='Unit:',
)

# Start Date Picker
start_date_picker = widgets.DatePicker(
    description='Start Date',
    disabled=False
)

# End Date Picker
end_date_picker = widgets.DatePicker(
    description='End Date',
    disabled=False
)

# Display the widgets
interact_manual(update_visualizations,
                selected_type=widgets.Dropdown(options=['Patient', 'Visitor', 'All'], value='All', description='Type:'),
                selected_unit=widgets.Dropdown(options=['T03', 'T06', 'GWB', 'WMS', 'All'], value='All', description='Unit:'),
                start_date=widgets.DatePicker(description='Start Date'),
                end_date=widgets.DatePicker(description='End Date'))

interactive(children=(Dropdown(description='Type:', index=2, options=('Patient', 'Visitor', 'All'), value='All…

<function __main__.update_visualizations(selected_type, selected_unit, start_date, end_date)>