In [1]:
from utils import Activities, Users, InfoParser

import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.io as pio
pd.options.plotting.backend = "plotly"
import plotly.figure_factory as ff
from statsmodels.graphics.mosaicplot import mosaic

import seaborn as sns
import ptitprince as pt

png_renderer = pio.renderers["png"]
png_renderer.scale=3

pio.renderers.default = "notebook_connected"

import matplotlib.pyplot as plt

import numpy as np

from config_vars import month_map, month_order

In [2]:
def show_save(fig,filename,save=True,transparent = True):
        
    #transparent background
    if transparent:
        fig.update_layout({
        'paper_bgcolor': 'rgba(248, 249, 250, 1)',
        })
    
    
    config = dict({
        'displaylogo': False,
        #'modeBarButtonsToRemove': ["zoom2d", "pan2d", "select2d", "lasso2d", "zoomIn2d", "zoomOut2d", "autoScale2d", "resetScale2d", "hoverClosestCartesian", "hoverCompareCartesian", "zoom3d", "pan3d", "resetCameraDefault3d", "resetCameraLastSave3d", "hoverClosest3d", "orbitRotation", "tableRotation", "zoomInGeo", "zoomOutGeo", "resetGeo", "hoverClosestGeo", "sendDataToCloud", "hoverClosestGl2d", "hoverClosestPie", "toggleHover", "resetViews", "toggleSpikelines", "resetViewMapbox"]
    })
    
    if save:
        fig.write_html(f"{results_folder}{filename}.html" , include_plotlyjs = False, config=config)

    fig.update_layout(
        font=dict(
            size=9,
        ),
    )
    
    if save:
        fig.write_image(f"{results_folder}images/{filename}.png",scale=3)

    fig.show(config=config)

In [3]:
pd.options.display.max_columns = None
pd.options.display.max_rows    = None

In [79]:
data_folder = "../data/"
raw_folder = "../data/mysql_extractions/"
results_folder = "../report/results/"

#plotly style:
line_traces = dict(mode='lines+markers',line_shape='spline',line_smoothing=0.5,marker_size=10,marker_opacity=0.9)

color_discrete_map={
    "TI": "#3366CC",
    "GE": "#EF553B",
    "responses": "#990099",
    "requests": "#FF9900",
    "recipes": "#DD4477",
    "experiences": "#66AA00",
    2013: "#2E1E22",2014:"#F5853F",2015:"#FF5921",2016:"#25E6DC",
    2017:"#6174AB",2018:"#4C91D6",2019:"#61E02F",2020:"#F5E833",
    "overall mean": "magenta"
}
# https://coolors.co/2e1e22-f5853f-ff5921-25e6dc-6174ab-4c91d6-61e02f-f5e833



In [5]:
inii = InfoParser("../report/data_info.ini")

# Table of Contents
* [Data Loading and Cleaning](#Data-Loading-and-Cleaning)
* [Data Exploring](#Data-Exploring)
	* [Overview:](#Overview:)
	* [Years trends and differences:](#Years-trends-and-differences:)
		* [Number of activities](#Number-of-activities)
		* [Number of activities per student](#Number-of-activities-per-student)
		* [Number of active users](#Number-of-active-users)
		* [Number of feedbacks](#Number-of-feedbacks)
		* [Number of feedback responses per supervisor](#Number-of-feedback-responses-per-supervisor)
		* [Number of feedback requests per student](#Number-of-feedback-requests-per-student)
		* [Activities images](#Activities-images)
		* [Activities total lenght](#Activities-total-lenght)
	* [Months trends:](#Months-trends:)
		* [Activities per month](#Activities-per-month)
			* [Creation](#Creation)
			* [Edits](#Edits)
			* [Curriculum](#Curriculum)
		* [Feedbacks](#Feedbacks)
	* [Feedbacks](#Feedbacks)
		* [Ratio](#Ratio)
		* [Edits and delay](#Edits-and-delay)
	* [Logins](#Logins)


# Data Loading and Cleaning

### Loading

In [6]:
users = pd.read_csv(f"{data_folder}users.csv",sep="\t")
students = pd.read_csv(f"{data_folder}students.csv",sep="\t")
supervisors = pd.read_csv(f"{data_folder}supervisors.csv",sep="\t")
teachers = pd.read_csv(f"{data_folder}teachers.csv",sep="\t")

activities = pd.read_csv(f"{data_folder}activities.csv",sep="\t")

activities = activities.merge(users[['us_user','us_canton','gender']],
                              on='us_user',how="left")

a_fun = Activities()
a_fun.load_map_month(month_map,month_order)


feedbacks = pd.read_csv(f"{data_folder}activities_feedbacks_info.csv")
feedbacks = feedbacks.loc[feedbacks['ac_activity'].isin(activities['ac_activity']),:]
feedbacks = feedbacks.merge(activities[['ac_activity','creation_school_year','us_canton']],on="ac_activity",how="left")

df_months = pd.read_csv(f"{raw_folder}months.csv").fillna(0)
df_months.replace({'month':month_map},inplace=True)
df_months['month_order'] = df_months['month'].map(dict(zip(month_order,range(1,13))))
df_months.sort_values(by=['month_order','activity_school_year'],inplace=True)

y_users = df_months['n_users_per_year']
df_months['norm_avg_n_user_recipes']        = df_months['n_recipes'].divide(y_users)
df_months['norm_avg_n_user_experiences']    = df_months['n_experiences'].divide(y_users)
df_months['norm_avg_n_activities']          = df_months['n_activities'].divide(y_users)

months = df_months

logins = pd.read_csv(f"{raw_folder}logins.csv")

# Data Exploring

## Overview:

In [7]:
year_from = str(users.start_year.min()) + "-" + str(users.start_year.min()+1)
year_to   = str(users.start_year.max()) + "-" + str(users.start_year.max()+1)
n_users       = len(users)
n_supervisors = len(supervisors)
n_students    = len(students)
n_teachers    = len(teachers)
n_others_u    = n_users-(n_supervisors+n_students+n_teachers)
n_males       = len(users.query("gender == 1"))
n_females     = len(users.query("gender == 0"))
n_unknown     = n_users-n_males-n_females
users_from_TI = len(users.query("us_canton == 'TI'"))
users_from_GE = len(users.query("us_canton == 'GE'"))

inii.info('n_users',n_users)
inii.info('n_students',n_students)
inii.info('n_supervisors',n_supervisors)
inii.info('n_teachers',n_teachers)
inii.info('n_females',n_females)
inii.info('n_males',n_males)
inii.info('n_unknown',n_unknown)
inii.info('n_others_u',n_others_u)
inii.info('users_from_TI',users_from_TI)
inii.info('users_from_GE',users_from_GE)
inii.info('year_from',year_from)
inii.info('year_to',year_to)

print(f"There are {n_users} users: {n_students} students, {n_supervisors} supervisors, and {n_teachers} teachers")
print(f"{n_males} males, {n_females} females and {n_unknown} unknown")

There are 1002 users: 780 students, 177 supervisors, and 26 teachers
729 males, 257 females and 16 unknown


In [8]:
n_activities       = len(activities)
n_recipes          = len(activities.query("at_activityType == 'recipe'"))
n_experiences      = len(activities.query("at_activityType == 'experience'"))
activities_from_TI = len(activities.query("us_canton == 'TI'"))
activities_from_GE = len(activities.query("us_canton == 'GE'"))
activities_with_feedback_requests  = len(feedbacks)
activities_with_feedback_responses = feedbacks['response_date'].notna().sum()


inii.info('n_activities',n_activities)
inii.info('n_recipes',n_recipes)
inii.info('n_experiences',n_experiences)
inii.info('activities_from_TI',activities_from_TI)
inii.info('activities_from_GE',activities_from_GE)
inii.info('activities_with_feedback_requests',activities_with_feedback_requests)
inii.info('activities_with_feedback_responses',activities_with_feedback_responses)

In [57]:
data_hist = activities.groupby(by=["creation_school_year","us_canton"]).size().reset_index(name="count")
fig = data_hist.plot.scatter(x='creation_school_year',y='count',color="us_canton",color_discrete_map=color_discrete_map)
fig.update_xaxes(type="category")
fig.update_layout(
    title="Activities per year",
    title_x=0.5,
    yaxis_title="Number of activities",
    xaxis_title="Year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_traces(line_traces)

show_save(fig,"activities_per_vintage")

In [58]:
data_hist = activities.groupby(by=["activity_school_year","creation_school_year","us_canton"]).size().reset_index(name="count")
fig = data_hist.plot.scatter(x='creation_school_year',y='count',facet_col="activity_school_year",
    color="us_canton",color_discrete_map=color_discrete_map)

fig.update_layout(
    title="Activities per year",
    title_x=0.5,
    yaxis_title="# of activities",
    xaxis_title="Year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)

fig.update_xaxes(title=dict(text=""),tickangle=45,type="category")
fig.update_traces(line_traces)
show_save(fig,"activities_per_vintage_school_year",save=True)

In [12]:
fig =go.Figure(go.Sunburst(
   ids=["Users","Students","Supervisors","Teachers","st-Male","st-Female","sp-Male","sp-Female","tc-Male","tc-Female"],
   labels=["Users","Students","Supervisors","Teachers","Male","Female","Male","Female","Male","Female"],
   parents=["","Users","Users","Users", "Students", "Students", "Supervisors","Supervisors","Teachers","Teachers"],
   values=[len(users),len(students),len(supervisors),len(teachers),
           len(students.query("gender == 1")),len(students.query("gender == 0")),
           len(supervisors.query("gender == 1")),len(supervisors.query("gender == 0")),
           len(teachers.query("gender == 1")),len(teachers.query("gender == 0")),
          ],
   branchvalues="total",
   texttemplate = "%{label}: %{value}" ,
))
fig.update_layout(margin = dict(t=20, l=0, r=0, b=20))

fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
fig.show()
fig.write_html(f"{results_folder}users_type_gender.html" , include_plotlyjs = False)

In [14]:
labels = ['Recipes','Experiences']
values = [len(activities.query("at_activityType=='recipe'")), 
          len(activities.query("at_activityType=='experience'"))]

fig = go.Figure(data=[go.Pie(labels=labels, 
                             values=values,texttemplate = "<b>%{label}</b>: %{value}" ,
)])
fig.update_layout(margin = dict(t=20, l=0, r=0, b=20),showlegend=False)

fig.show()
fig.write_html(f"{results_folder}activities_type.html" , include_plotlyjs = False)

----------------

## Years trends and differences:

###  Number of activities

In [59]:
hist_data = activities.groupby(by=['creation_month','activity_school_year','us_canton'],sort=False).size().reset_index(name="count")
hist_data = a_fun.sort_month(hist_data,'creation_month',['activity_school_year'])


fig = hist_data.plot(x="creation_month", y=["count"], facet_col='activity_school_year', 
    color="us_canton",color_discrete_map=color_discrete_map,
    category_orders={"creation_month":month_order,"activity_school_year":[1,2,3],"start_year":[2013,2014,2015,2016,2017,2018,2019]}
)
fig.update_layout(
    title="Number of activities per month",
    title_x=0.5,
    yaxis_title="# of activities",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)

show_save(fig,"activities_per_month",save=True)

In [80]:
canton = "TI"
hist_data = activities.query(f"us_canton == '{canton}'").groupby(by=['creation_month','activity_school_year','start_year'],sort=False).size().reset_index(name="count")
hist_data = a_fun.sort_month(hist_data,'creation_month',['activity_school_year','start_year'])

mean_hist = a_fun.sort_month(
    hist_data.groupby(by=["creation_month","activity_school_year"]).mean().round().reset_index(),
    'creation_month',['activity_school_year','start_year'])
mean_hist['start_year'] = "overall mean"

hist_data = hist_data.append(mean_hist, ignore_index=True)

fig = hist_data.plot(x="creation_month", y=["count"], facet_col='activity_school_year', color="start_year",
    color_discrete_map=color_discrete_map,
    category_orders={"creation_month":month_order,"activity_school_year":[1,2,3],"start_year":[2013,2014,2015,2016,2017,2018,2019]}
)
fig.update_layout(
    title="Number of activities per month per start year, Ticino",
    title_x=0.5,
    yaxis_title="# of activities",
    legend_orientation="h",legend=dict(x=0.5, y=-0.12,xanchor='center',yanchor='top',traceorder='normal'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)

show_save(fig,"activities_per_month_per_vintage_TI",save=True)

In [81]:
canton = "GE"
hist_data = activities.query(f"us_canton == '{canton}'").groupby(by=['creation_month','activity_school_year','start_year'],sort=False).size().reset_index(name="count")
hist_data = a_fun.sort_month(hist_data,'creation_month',['activity_school_year','start_year'])

mean_hist = a_fun.sort_month(
    hist_data.groupby(by=["creation_month","activity_school_year"]).mean().round().reset_index(),
    'creation_month',['activity_school_year','start_year'])
mean_hist['start_year'] = "overall mean"

hist_data = hist_data.append(mean_hist, ignore_index=True)

fig = hist_data.plot(x="creation_month", y=["count"], facet_col='activity_school_year', color="start_year",
    color_discrete_map=color_discrete_map,
    category_orders={"creation_month":month_order,"activity_school_year":[1,2,3],"start_year":[2013,2014,2015,2016,2017,2018,2019]}
)
fig.update_layout(
    title="Number of activities per month per start year, Genève",
    title_x=0.5,
    yaxis_title="# of activities",
    legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top',traceorder='normal'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)

show_save(fig,"activities_per_month_per_vintage_GE",save=True)

-----

### Number of activities per student

In [83]:
a = students.query("n_activities > 0").groupby('start_year')['n_activities'].apply(list)

In [86]:
# Create distplot with custom bin_size
fig = ff.create_distplot(a.values, a.index.astype("str"), bin_size=8, show_rug=False)
fig.show()

In [60]:
df = px.data.tips()
fig = px.histogram(students.sort_values(by=["start_year"]), x="n_activities", color="start_year",
                   marginal="box", # or violin, rug,
                   opacity=0.6,
                   nbins=100,
                  )
fig.update_layout(
    width=900,
    height=900,
)
fig.update_layout(barmode='overlay')
fig.show()

In [14]:
len(students.query("n_activities == 0"))

0

In [82]:
fig = students.sort_values(by=["start_year"]).plot.box(
    x="start_year",y="n_activities", color="start_year",
    hover_data =["us_user","n_activities","start_year"],
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Number of activities per student per start year",
    title_x=0.5,
    yaxis_title="Activities per student",
    xaxis_title="Start Year",
)

show_save(fig,"activities_per_user_per_vintage",save=True)

In [83]:
fig = activities.groupby(by=['us_user','creation_school_year']).size().reset_index(name="n_activities").sort_values(by=["creation_school_year"]).plot.box(
    x="creation_school_year",y="n_activities", color="creation_school_year",
    hover_data =["us_user","n_activities","creation_school_year"],
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Number of activities per student per year",
    title_x=0.5,
    yaxis_title="Activities per student",
    xaxis_title="Year",
    legend=dict(traceorder='normal'),legend_title_text='',
)

show_save(fig,"activities_per_user_per_year",save=True)

In [84]:
fig = activities.sort_values(by=["creation_school_year","activity_school_year"]).plot.box(
    x="creation_school_year",y="activity_total_length", 
    color="creation_school_year", facet_col="activity_school_year",
    hover_data =["ac_activity","ac_title","activity_total_length","start_year"],
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Activities total lenghts per creation year",
    title_x=0.5,
    showlegend = False,
    yaxis_title="Activity total lenght",
)

    
fig.update_xaxes(title=dict(text=""),tickangle=45,type="category")
fig.update_yaxes(range=[-10, 330])

show_save(fig,"activities_total_length_creation_year",save=True)

In [85]:
fig = activities.sort_values(by=["start_year","activity_school_year"]).plot.box(
    x="start_year",y="activity_total_length", 
    color="start_year", facet_col="activity_school_year",
    hover_data =["ac_activity","ac_title","activity_total_length","creation_year"],
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Activities total lenghts per start year",
    title_x=0.5,
    showlegend = False,
    yaxis_title="Activity total lenght",
)

    
fig.update_xaxes(title=dict(text=""),tickangle=45,type="category")
fig.update_yaxes(range=[-10, 330])

show_save(fig,"activities_total_length_start_year",save=True)

In [146]:
fig = activities.query("activity_total_length <= 330").plot.hist(x="activity_total_length",nbins=100)

fig.update_layout(
    title="Activities total lenght",
    title_x=0.5,
    yaxis_title="# of activities",
    xaxis_title="total length",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)

show_save(fig,"ativities_total_length_distr",save=True)

------------

### Number of active users

In [21]:
hist_data = activities.groupby(by=['activity_school_year','start_year','us_user'],sort=False).size().reset_index(name="count")
active_users_per_year = hist_data.query("count > 0").drop("us_user",axis=1).groupby(by=['activity_school_year','start_year'],sort=False).size().reset_index(name="count").sort_values(by=["start_year","activity_school_year"])
activities_per_user = hist_data.drop("us_user",axis=1).groupby(by=['activity_school_year','start_year'],sort=False).mean().reset_index().sort_values(by=["start_year","activity_school_year"]).rename(columns={'count':'activities_per_user'})
df = active_users_per_year.merge(activities_per_user, on=["activity_school_year","start_year"],how="left")

In [23]:
fig = df.plot.scatter(x="start_year", y="count", facet_col='activity_school_year', color="activities_per_user")
fig.update_layout(
    title="Number of active users per year",
    title_x=0.5,
    yaxis_title="# of actitve users",
    legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45, type="category")
fig.update_traces(line_traces)
fig.update_traces(line_traces,line=dict(color="gray"))

fig.show()

-----------
### Number of feedbacks

In [138]:
hist_requests = feedbacks.groupby(by=["request_year_school","us_canton"]).size().reset_index(name="count").assign(type = 'requests').rename(columns={'request_year_school':'year'})
hist_responses = feedbacks.groupby(by=["response_year_school","us_canton"]).size().reset_index(name="count").assign(type = 'responses').rename(columns={'response_year_school':'year'})
hist_data = hist_requests.append(hist_responses)

In [141]:
fig = hist_data.plot(
    x="year", y=["count"],color="type", facet_row="us_canton",
    color_discrete_map=color_discrete_map, height=650
)

fig.update_layout(
    title="Total number of feedback requests and responses per year per canton",
    title_x=0.5,
    legend_orientation="h",legend=dict(x=0.5, y=-0.12,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_yaxes(title=dict(text="count"))
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)

fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

show_save(fig,"feedbacks_requests_and_responses_per_year_per_canton",save=True)

In [142]:
hist_requests = feedbacks.groupby(by=["request_year_school","us_canton"]).size().reset_index(name="count").assign(type = 'requests').rename(columns={'request_year_school':'year'})
hist_responses = feedbacks.groupby(by=["response_year_school","us_canton"]).size().reset_index(name="count").assign(type = 'responses').rename(columns={'response_year_school':'year'})
hist_data = hist_requests.append(hist_responses)

fig = hist_data.plot.bar(x="year",y="count",color="type", 
    barmode="group", facet_row="us_canton",
    color_discrete_map=color_discrete_map
)
fig.update_layout(
    title="Total number of feedback requests and responses per year",
    title_x=0.5,
    legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)

fig.update_yaxes(title=dict(text="count"))
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

show_save(fig,"feedbacks_requests_and_responses_per_year_per_canton_bar",save=True)

In [143]:
feedbacks['response_month'] = pd.DatetimeIndex(feedbacks['response_date']).month
feedbacks.replace({'response_month':month_map},inplace=True)
feedbacks['request_month'] = pd.DatetimeIndex(feedbacks['request_date']).month
feedbacks.replace({'request_month':month_map},inplace=True)

hist_requests = a_fun.sort_month(
    feedbacks.groupby(['activity_school_year','request_month']).size().reset_index(name="count"),
    'request_month',['activity_school_year']
).rename(columns={'request_month':'month'}).assign(type = 'requests')

hist_responses = a_fun.sort_month(
    feedbacks.groupby(['activity_school_year','response_month']).size().reset_index(name="count"),
    'response_month',['activity_school_year']
).rename(columns={'response_month':'month'}).assign(type = 'responses')

hist_data = hist_requests.append(hist_responses)

In [136]:
fig = hist_data.plot(
    x="month", y=["count"],facet_col='activity_school_year', color="type",
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Total number of feedback requests and responses per month",
    title_x=0.5,
    yaxis_title="count",
    legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text=''
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces)

show_save(fig,"feedbacks_requests_and_responses_per_month_per_school_year",save=True)

In [140]:
fig = students.query("n_feedback_requests <= 30").plot.hist(x="n_feedback_requests",nbins=31,)

fig.update_layout(yaxis_type="log")
fig.update_layout(
    title="Log10 distrubution of number of feedbacks requests per user",
    title_x=0.5,
    yaxis_title="Log10 users count",
    xaxis_title="feedback requests",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)

show_save(fig,"requests_per_users_distr",save=True)

In [101]:
hist_data = feedbacks.groupby(by=['activity_school_year','request_year_school','us_canton'],sort=False).size().reset_index(name="count")

fig = hist_data.plot(x="request_year_school", y=["count"], facet_col='activity_school_year', 
                     color="us_canton", color_discrete_map=color_discrete_map)
fig.update_layout(
    title="Number of feedback requests per year",
    title_x=0.5,
    yaxis_title="# of feedbacks",
    xaxis_title="request year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_xaxes(title=dict(text=""),tickangle=45,type="category")
fig.update_traces(line_traces)

show_save(fig,"feedbacks_requests_per_year_per_school_year_per_canton",save=True)

In [110]:
hist_data = feedbacks.groupby(by=['activity_school_year','response_year_school','us_canton'],sort=False).size().reset_index(name="count")

fig = hist_data.sort_values(by="response_year_school").plot(x="response_year_school", y=["count"], facet_col='activity_school_year', 
                     color="us_canton",
                     color_discrete_map=color_discrete_map)
fig.update_layout(
    title="Number of feedback requests per year",
    title_x=0.5,
    yaxis_title="# of feedbacks",
    xaxis_title="request year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_xaxes(title=dict(text=""),tickangle=45,type="category")
fig.update_traces(line_traces)

show_save(fig,"feedbacks_responses_per_year_per_school_year_per_canton",save=True)

In [152]:
data_hist = feedbacks.groupby(by=['recipient','response_year_school','us_canton']).size().reset_index(name="count")

fig = data_hist.sort_values(by=["response_year_school"]).plot.box(
    x="response_year_school",y="count", color="us_canton", 
    hover_data =["recipient","response_year_school","count"],
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Number of feedbacks responses per supervisor",
    title_x=0.5,
    yaxis_title="Feedbacks per supervisor",
    xaxis_title="Response year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_yaxes(range=[-3, 60])

show_save(fig,"feedbacks_per_response_year_per_supervisor",save=True)

In [151]:
data_hist = feedbacks.groupby(by=['sender','request_year_school','us_canton']).size().reset_index(name="n_feedback_requests")
fig = data_hist.sort_values(by=["request_year_school"]).plot.box(
    x="request_year_school",y="n_feedback_requests", color="us_canton",
    color_discrete_map=color_discrete_map,
)

fig.update_layout(
    title="Number of feedback requests per student",
    title_x=0.5,
    yaxis_title="Requests per student",
    xaxis_title="Request year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_yaxes(range=[-3, 60])

show_save(fig,"feedbacks_per_request_year_per_student",save=True)

In [153]:
fig = students.query("n_feedback_requests > 0").sort_values(by=["start_year"]).plot.box(
    x="start_year",y="n_feedback_requests", color="us_canton",
    hover_data =["us_user","user_name","n_feedback_requests","start_year"],
    color_discrete_map=color_discrete_map
)

fig.update_layout(
    title="Number of feedback requests per student per start year",
    title_x=0.5,
    yaxis_title="Requests per student",
    xaxis_title="Start year",
    legend=dict(traceorder='normal'),legend_title_text='Canton',
)
fig.update_yaxes(range=[-3, 100])

show_save(fig,"feedbacks_per_vintage_per_student",save=True)

In [159]:
feedbacks['has_response'] = feedbacks['response_date'].notnull()

fig = a_fun.sort_month(
    feedbacks.groupby(["request_month","activity_school_year"]).sum()['has_response'].div(
        feedbacks.groupby(["request_month","activity_school_year"]).size()
    ).reset_index(name="ratio").assign(
        total = feedbacks.groupby(["request_month","activity_school_year"]).size().values.astype('int64')
    ),'request_month',['activity_school_year']
).plot.scatter(
    x="request_month", y=["ratio"],facet_col='activity_school_year', 
    size="total", 
    color="total",
    size_max=60
)

fig.update_layout(
    title="Ratio responses/requests",
    title_x=0.5,
    yaxis_title="ratio",
    showlegend = False,
    coloraxis_colorbar=dict(
        title="Requests",
    ),
)
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_traces(line_traces,line=dict(color="gray"))

show_save(fig,"feedbacks_ratio_per_activity_school_year_per_month",save=True)

In [163]:
tot_req = feedbacks.groupby(['recipient']).size().reset_index(name="received").rename(columns={'recipient':'us_user'})
ratio = tot_req.merge(feedbacks.query("has_response == True").groupby(['recipient']).size().reset_index(name="responsed").rename(columns={'recipient':'us_user'}),on="us_user",how="left").fillna(0)
ratio['ratio'] = ratio['responsed'].div(ratio['received'])
ratio = ratio.merge(supervisors[['us_user','start_year','user_name']],on="us_user",how="right").dropna()
ratio.sort_values(by="ratio",inplace=True)

fig = ratio.plot.scatter(x="us_user",y="ratio",color="received")
fig.update_layout(
    title="Ratio responses/requests per each supervisor", 
    title_x=0.5,
    xaxis_title='Supervisor',
    yaxis_title='Ratio',
    xaxis_type='category',
    coloraxis_colorbar=dict(
        title="Requests",
    ),
)
fig.update_traces(mode='markers',opacity=0.8,)

fig.add_trace(ratio.plot.bar(x="us_user",y="ratio",color="received").data[0])

fig.update_xaxes(showticklabels=False)
show_save(fig,"feedbacks_ratio_per_each_supervisor",save=True)

#### Edits and delay

In [198]:
df_feedbacks_info = feedbacks.copy()

In [203]:
n_requests  = df_feedbacks_info['request_date'].notnull().sum()
n_responses = df_feedbacks_info['response_date'].notnull().sum()
print(f"There are {n_responses} responses over {n_requests} requests ({round(n_responses/n_requests*100,3)}%)")

no_grade = df_feedbacks_info['student_grade'].isnull().sum()
print(f'There are {no_grade}/{len(df_feedbacks_info)} without student grade ({round(no_grade/len(df_feedbacks_info),4)*100}%)')

no_answer = df_feedbacks_info['response_date'].isnull().sum()
print(f'There are {no_answer}/{len(df_feedbacks_info)} without answer ({round(no_answer/len(df_feedbacks_info),3)*100}%)')

no_sup_grade = df_feedbacks_info['supervisor_grade'].isnull().sum()
print(f'There are {no_sup_grade}/{len(df_feedbacks_info)} without supervisor grade ({round(no_sup_grade/len(df_feedbacks_info),4)*100}%)')

response_no_grade = (df_feedbacks_info['supervisor_grade'].isnull()).sum()
print(f'There are {response_no_grade} responses without grades')

There are 1981 responses over 4819 requests (41.108%)
There are 406/4819 without student grade (8.42%)
There are 2838/4819 without answer (58.9%)
There are 2900/4819 without supervisor grade (60.18%)
There are 2900 responses without grades


In [170]:
df_feedbacks_info.dropna(inplace=True)
df_feedbacks_info['delay_days'] = df_feedbacks_info['delay_hours']/24

In [190]:
df_hist = df_feedbacks_info.groupby(["activity_school_year","times_before_answer"]).count()[['ac_activity']].reset_index().rename(columns={'ac_activity': 'count'})


fig = df_hist.query("times_before_answer < 5").plot.hist(x="times_before_answer", y='count', 
                         color = "activity_school_year", histnorm='percent' )

fig.update_layout(
    barmode='group',
    xaxis_type='category',
    title="Requests before a feedback",
    title_x=0.5,
    yaxis_title="Percent",
    xaxis_title="Number of requests before the response",
    coloraxis_showscale=False,
    legend=dict(traceorder='normal'),legend_title_text='School Year',
)


show_save(fig,"feedbacks_request_before_answer",save=True)

In [191]:
fig = df_feedbacks_info[df_feedbacks_info['delay_days']<100].plot.hist(x="delay_days",  nbins=30,
                         color = "activity_school_year" )

fig.update_layout(
    barmode='group',
    title="Histogram: days before an answer",
    title_x=0.5,
    yaxis_title="Count",
    xaxis_title="days before an answer",
    coloraxis_showscale=False,
    legend=dict(traceorder='normal'),legend_title_text='School Year',
)

show_save(fig,"feedbacks_days_before_answer",save=True)

In [192]:
fig = df_feedbacks_info[df_feedbacks_info['delay_days']<100].plot.hist(x="delay_days",  nbins=30,
                         color = "activity_school_year", histnorm='percent' )

fig.update_layout(
    barmode='group',
    title="Histogram: number of days before an answer",
    title_x=0.5,
    yaxis_title="Percent",
    xaxis_title="days before an answer",
    coloraxis_showscale=False,
    legend=dict(traceorder='normal'),legend_title_text='School Year',
)

show_save(fig,"feedbacks_days_before_answer_normalized",save=True)

In [193]:
fig = df_feedbacks_info[df_feedbacks_info['edits_after']<5].plot.hist(x="edits_after",  nbins=5,
                         color = "activity_school_year", histnorm='percent' )

fig.update_layout(
    barmode='group',
    title="Histogram: number of edits after a feedback",
    title_x=0.5,
    yaxis_title="Percent",
    xaxis_title="# of edits after an answer per feedback request",
    legend=dict(traceorder='normal'),legend_title_text='School Year',
)

show_save(fig,"feedbacks_edits_after_answer",save=True)

In [183]:
df_feedbacks_info['has_edit_after'] = (df_feedbacks_info['edits_after']>0).astype(int)
df_count = df_feedbacks_info.groupby(["activity_school_year","supervisor_grade","has_edit_after"]).size().reset_index(name='count')

c = (pd.core.reshape.util.cartesian_product([df_count['activity_school_year'].unique(),df_count['supervisor_grade'].unique(),df_count['has_edit_after'].unique()]))
c = pd.DataFrame(dict(activity_school_year=c[0],supervisor_grade=c[1],has_edit_after=c[2]))
c = c.set_index(["activity_school_year","supervisor_grade","has_edit_after"]).join(df_count.set_index(["activity_school_year","supervisor_grade","has_edit_after"])).reset_index().sort_values(by=['activity_school_year','supervisor_grade'])
c.fillna(0,inplace=True)

df_feedback_edits =  c[c['has_edit_after']==True].drop("has_edit_after",axis=1).rename(columns={'count':'edit'})
df_feedback_edits['no_edit'] = c[c['has_edit_after']==False]['count'].tolist()

df_feedback_edits['ratio'] = df_feedback_edits['edit'].div(df_feedback_edits['no_edit']+df_feedback_edits['edit']).fillna(0)

In [195]:
fig = df_feedback_edits.plot(x="supervisor_grade", y=["ratio"],
                             line_group='activity_school_year', color='activity_school_year')
fig.update_layout(
    title="% activities that have been edit after a feedback per grade",
    title_x=0.5,
    yaxis_title="% activities",
    xaxis_title="supervisor grade",
    legend=dict(traceorder='normal'),legend_title_text='School Year',
)
fig.update_traces(line_traces)

show_save(fig,"feedbacks_edits_after_grades",save=True)