In [None]:
import os
from IPython.display import display, Markdown
import pandas as pd

from file_reader import read_activities

in_progress_file = "in-progress-2021-07-02.xlsx"
completed_file = "completed-2021-07-02.xlsx"

path = os.path.join("..", "..", "Data")

in_progress = read_activities(os.path.join(path, in_progress_file))
completed = read_activities(os.path.join(path, completed_file))

cutoff_date = "2020-05-31"
cutoff_text = "June 1, 2020"

In [None]:
pd.set_option('display.max_rows', 500)

In [None]:
book_1_in_progress = in_progress[in_progress["sc_1_reflections_on_the_life_of_the_spirit"] == "In Progress"].copy()

book_1_in_progress["start_date_2"] = pd.to_datetime(book_1_in_progress["start_date"])

book_1_in_progress = book_1_in_progress[book_1_in_progress["start_date_2"] > cutoff_date]
book_1_in_progress["In Progress"] = 1

b1_j1_started = book_1_in_progress[["grouping", "cluster_name", "In Progress"]]

In [None]:
book_2_in_progress = in_progress[in_progress["sc_2_arising_to_serve"] == "In Progress"].copy()

book_2_in_progress["start_date_2"] = pd.to_datetime(book_2_in_progress["start_date"])

book_2_in_progress = book_2_in_progress[book_2_in_progress["start_date_2"] > cutoff_date]
book_2_in_progress["In Progress"] = 1

b2_j1_started = book_2_in_progress[["grouping", "cluster_name", "In Progress"]]

In [None]:
book_1_completed = completed[completed["sc_1_reflections_on_the_life_of_the_spirit"] != 0].copy()

book_1_completed["start_date_2"] = pd.to_datetime(book_1_completed["start_date"])
book_1_completed = book_1_completed[book_1_completed["start_date_2"] > cutoff_date]
book_1_completed["Completed"] = 1

b1_j1_completed = book_1_completed[["grouping", "cluster_name", "Completed"]]

In [None]:
book_2_completed = completed[completed["sc_2_arising_to_serve"] != 0].copy()

book_2_completed["start_date_2"] = pd.to_datetime(book_2_completed["start_date"])
book_2_completed = book_2_completed[book_2_completed["start_date_2"] > cutoff_date]
book_2_completed["Completed"] = 1

b2_j1_completed = book_2_completed[["grouping", "cluster_name", "Completed"]]

In [None]:
combined_1 = pd.concat([b1_j1_started, b1_j1_completed])
combined_1.fillna(0, inplace=True)
combined_1 = combined_1.astype({"In Progress": "int", "Completed": "int"})

combined_2 = pd.concat([b2_j1_started, b2_j1_completed])
combined_2.fillna(0, inplace=True)
combined_2 = combined_2.astype({"In Progress": "int", "Completed": "int"})

In [None]:
display(Markdown(f"# Book 1 Since {cutoff_text}"))
display(Markdown("## Summary Statistics"))
display(Markdown("### Total Count in Region"))

region = combined_1.sum()
display(Markdown(f"In Progress: {region['In Progress']}, Completed: {region['Completed']}"))

display(Markdown("### Count by Grouping"))
grouped = combined_1[["grouping", "In Progress", "Completed"]].groupby(by=["grouping"])
display(grouped.sum())

display(Markdown("### Count by Cluster"))
grouped = combined_1.groupby(by=["grouping", "cluster_name"])
display(grouped.sum())

In [None]:
book_1 = pd.concat([book_1_completed, book_1_in_progress])

book_1_by_tutor = book_1.assign(
    facilitator=book_1.facilitators.str.split("; ")
).explode("facilitator")

# There is a least one row with NaN for tutor. Let's just ignore it for now.

book_1_by_tutor["wider_community"] = (
    book_1_by_tutor["total_participants"].astype("int") - book_1_by_tutor["bahai_participants"].astype("int")
)

b1_stats = book_1_by_tutor.groupby("facilitator").agg(
    num_study_circles=pd.NamedAgg(column="name", aggfunc="count"),
    total_wider_community=pd.NamedAgg(column="wider_community", aggfunc="sum"),
    total_participants=pd.NamedAgg(column="total_participants", aggfunc="sum")
)

b1_stats.rename_axis("Tutor", inplace=True)

num_circles = "# of Study Circles"
total_wider = "# Participants from Wider Community"
total_part = "# Participants"
b1_stats.rename(
    {
        "num_study_circles": num_circles,
        "total_wider_community": total_wider,
        "total_participants": total_part
    }, axis=1, inplace=True
)

display(Markdown("## Experience Tutoring Book 1"))
display(Markdown("These data combine completed and active study circles"))
display(Markdown("### Have tutored > 1 Study Circle"))
display(b1_stats[b1_stats[num_circles] > 1].sort_values(by=num_circles, ascending=False))

display(Markdown("### Have Tutored > 1 From Wider Community"))
display(b1_stats[b1_stats[total_wider] > 1].sort_values(by=total_wider, ascending=False))

In [None]:
display(Markdown(f"# Book 2 Since {cutoff_text}"))
display(Markdown("## Summary Statistics"))
display(Markdown("### Total Count in Region"))

region = combined_2.sum()
display(Markdown(f"In Progress: {region['In Progress']}, Completed: {region['Completed']}"))

display(Markdown("### Count by Grouping"))
grouped = combined_2[["grouping", "In Progress", "Completed"]].groupby(by=["grouping"])
display(grouped.sum())

display(Markdown("### Count by Cluster"))
grouped = combined_2.groupby(by=["grouping", "cluster_name"])
display(grouped.sum())

In [None]:
book_2 = pd.concat([book_2_completed, book_2_in_progress])

book_2_by_tutor = book_2.assign(
    facilitator=book_2.facilitators.str.split("; ")
).explode("facilitator")

# There is a least one row with NaN for tutor. Let's just ignore it for now.

book_2_by_tutor["wider_community"] = (
    book_2_by_tutor["total_participants"].astype("int") - book_2_by_tutor["bahai_participants"].astype("int")
)

b2_stats = book_2_by_tutor.groupby("facilitator").agg(
    num_study_circles=pd.NamedAgg(column="name", aggfunc="count"),
    total_wider_community=pd.NamedAgg(column="wider_community", aggfunc="sum"),
    total_participants=pd.NamedAgg(column="total_participants", aggfunc="sum")
)

b2_stats.rename_axis("Tutor", inplace=True)

b2_stats.rename(
    {
        "num_study_circles": num_circles,
        "total_wider_community": total_wider,
        "total_participants": total_part
    }, axis=1, inplace=True
)

display(Markdown("## Experience Tutoring Book 2"))
display(Markdown("These data combine completed and active study circles"))
display(Markdown("### Have tutored > 1 Study Circle"))
display(b2_stats[b2_stats[num_circles] > 1].sort_values(by=num_circles, ascending=False))

display(Markdown("### Have Tutored > 1 From Wider Community"))
display(b2_stats[b2_stats[total_wider] > 1].sort_values(by=total_wider, ascending=False))