In [1]:
# function to plot attendance bar charts for each contextual category
def plot_attendance_proportions(df, varColumn):
    filtered_df = df[df['PERCENTAGEATT'] <= 0.8]

    proportions = filtered_df.groupby(varColumn).size() / df.groupby(varColumn).size()
    total_proportions = 1 - proportions
    proportions.index = proportions.index.astype(bool).map({True: 'True', False: 'False'})

    bars2 = plt.bar(proportions.index, total_proportions, label='Above 0.8')
    bars = plt.bar(proportions.index, proportions, bottom=total_proportions, label='Below or Equal to 0.8', color='#FF7F0E')

    plt.title('Proportion of Learners with 80% or Less Attendance by' + varColumn)
    plt.xlabel('FSM Category')
    plt.ylabel('Proportion of Learners')
    plt.legend(title='Attendance Category', bbox_to_anchor=(1.05, 1), loc='upper left')

    for bar, proportion in zip(bars + bars2, proportions):
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width() / 2, bar.get_y() + height / 2,
                f'{proportion:.1%}', ha='center', va='center')

    plt.show()

In [2]:
# function to plot attendance bar charts for each contextual category by year
def plot_attendance_proportions_with_year(df, varColumn):
    filtered_df = df.copy()
    filtered_df['AttGroup'] = pd.cut(filtered_df['PERCENTAGEATT'], bins=[-np.inf, 0.8, np.inf], labels=['<=0.8', '>0.8'])

    proportions = filtered_df.groupby(['Year', varColumn, 'AttGroup']).size().unstack(fill_value=0)
    proportions = proportions.div(proportions.sum(axis=1), axis=0)

    colors = ['#FF7F0E', '#1F77B4']

    fig, ax = plt.subplots(figsize=(10, 6))
    years = proportions.index.get_level_values('Year').unique()
    x = np.arange(len(years))
    width = 0.35

    for i, var_value in enumerate([True, False]):
        true_data = proportions.loc[(years, var_value), '<=0.8']
        bars = ax.bar(x + i * width, true_data, width, label='True' if var_value else 'False', color=colors[i])
        for bar, year in zip(bars, years):
            height = bar.get_height()
            ax.annotate(f'{height:.1%}', xy=(bar.get_x() + bar.get_width() / 2, height),
                        xytext=(0, 3),
                        textcoords="offset points",
                        ha='center', va='bottom', fontsize=8)

    ax.set_xlabel('Year')
    ax.set_ylabel('Proportion of Learners')
    ax.set_title('Proportion of Learners with 80% or Less Attendance by Year and ' + varColumn)
    ax.set_xticks(x + width / 2)
    ax.set_xticklabels(years)
    ax.legend(title=varColumn, labels=['True', 'False'], bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.tight_layout()
    plt.show()

In [3]:
# function to plot clustered attendance bar charts for multiple contextual categories
def plot_clustered_attendance_proportions(df, features):
    fig, ax = plt.subplots(figsize=(12, 6))
    bar_width = 0.35
    indices = np.arange(len(features))
    colors = ['#FF7F0E', '#1F77B4']

    for i, feature in enumerate(features):
        filtered_df = df[df['PERCENTAGEATT'] <= 0.8]
        proportions = filtered_df.groupby(feature).size() / df.groupby(feature).size()
        proportions.index = proportions.index.map({True: 'True', False: 'False'})
        true_bar = ax.bar(indices[i] - bar_width / 2, proportions.get('True', 0), bar_width, label='True' if i == 0 else "", color=colors[0])
        false_bar = ax.bar(indices[i] + bar_width / 2, proportions.get('False', 0), bar_width, label='False' if i == 0 else "", color=colors[1])
        ax.text(indices[i] - bar_width / 2, proportions.get('True', 0) / 2,
                f'{proportions.get("True", 0):.1%}', ha='center', va='center', color='black')
        ax.text(indices[i] + bar_width / 2, proportions.get('False', 0) / 2,
                f'{proportions.get("False", 0):.1%}', ha='center', va='center', color='black')

    ax.set_title('Proportion of Learners with 80% or Less Attendance by Feature')
    ax.set_xlabel('Features')
    ax.set_ylabel('Proportion of Learners')
    ax.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.set_xticks(indices)
    ax.set_xticklabels(features)

    plt.show()

In [4]:
# function to plot clustered average exclusion session missed bar charts for multiple contextual categories
def plot_weighted_avg_exclusion_sessions(df, features):
    fig, ax = plt.subplots(figsize=(12, 6))
    bar_width = 0.35
    indices = np.arange(len(features))
    colors = ['#FF7F0E', '#1F77B4']
    weighted_avg_true_values = []
    weighted_avg_false_values = []

    for i, feature in enumerate(features):
        filtered_df = df[df['Exclusion_SessionsMissed'] > 0]
        group_size = filtered_df.groupby(feature)['Exclusion_SessionsMissed'].count()
        weighted_avg_exclusions = filtered_df.groupby(feature)['Exclusion_SessionsMissed'].sum() / group_size
        weighted_avg_exclusions.index = weighted_avg_exclusions.index.map({True: 'True', False: 'False'})
        weighted_avg_true_values.append(weighted_avg_exclusions.get('True', 0))
        weighted_avg_false_values.append(weighted_avg_exclusions.get('False', 0))

    true_bar = ax.bar(indices - bar_width / 2, weighted_avg_true_values, bar_width, label='True', color=colors[0])
    false_bar = ax.bar(indices + bar_width / 2, weighted_avg_false_values, bar_width, label='False', color=colors[1])

    for i, true_value in enumerate(weighted_avg_true_values):
        ax.text(indices[i] - bar_width / 2, true_value / 2,
                f'{true_value:.1f}', ha='center', va='center', color='black')
    for i, false_value in enumerate(weighted_avg_false_values):
        ax.text(indices[i] + bar_width / 2, false_value / 2,
                f'{false_value:.1f}', ha='center', va='center', color='black')

    ax.set_title('Weighted Average Exclusion Sessions Missed by Feature')
    ax.set_xlabel('Features')
    ax.set_ylabel('Weighted Average Exclusion Sessions Missed')
    ax.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.set_xticks(indices)
    ax.set_xticklabels(features)

    plt.show()

In [5]:
# function to plot clustered average exclusions bar charts for multiple contextual categories
def plot_clustered_avg_exclusions(df, features):
    fig, ax = plt.subplots(figsize=(12, 6))
    bar_width = 0.35
    indices = np.arange(len(features))
    colors = ['#FF7F0E', '#1F77B4']
    avg_true_values = []
    avg_false_values = []

    for i, feature in enumerate(features):
        avg_exclusions = df.groupby(feature)['Exclusions'].mean()
        avg_exclusions.index = avg_exclusions.index.map({True: 'True', False: 'False'})
        avg_true_values.append(avg_exclusions.get('True', 0))
        avg_false_values.append(avg_exclusions.get('False', 0))

    true_bar = ax.bar(indices - bar_width / 2, avg_true_values, bar_width, label='True', color=colors[0])
    false_bar = ax.bar(indices + bar_width / 2, avg_false_values, bar_width, label='False', color=colors[1])

    for i, true_value in enumerate(avg_true_values):
        ax.text(indices[i] - bar_width / 2, true_value / 2,
                f'{true_value:.1%}', ha='center', va='center', color='black')
    for i, false_value in enumerate(avg_false_values):
        ax.text(indices[i] + bar_width / 2, false_value / 2,
                f'{false_value:.1%}', ha='center', va='center', color='black')

    ax.set_title('Average Exclusions by Feature')
    ax.set_xlabel('Features')
    ax.set_ylabel('Average Exclusions')
    ax.legend(title='Category', bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.set_xticks(indices)
    ax.set_xticklabels(features)

    plt.tight_layout()
    plt.show()