In [1]:
import pandas as pd
pd.options.plotting.backend = "plotly"

import numpy as np

from am4chart import *

In [2]:
def get_loc(df,ix,default=0,cols=None):
    try:
        if(cols):
            return df.loc[ix][cols]
        return df.loc[ix]
    except TypeError:
        return default

def normalizePer(df,col,sum_col,group_col,copy_of = None,new_name=None):
    if not new_name:
        new_name = col
    if not copy_of:
        copy_of  = col
        
    df[new_name] = df[copy_of]
    
    total = df[[group_col,sum_col]].groupby(group_col).sum()
    
    for xi in total.index:
        tot  = total.loc[xi,sum_col]
        mask = df_months[group_col]==xi
        df.loc[mask,new_name] = df[mask][new_name].divide(tot/100)

In [3]:
pd.options.display.max_columns = None
pd.options.display.max_rows    = None

In [4]:
data_folder = "../data/"

In [12]:
am4 = Amchart()

# INFO:
## Raw informations:
 - Total activities: 33,728
 - Total users: 1,052
 - Start year: 2013 (104 users)
     - Following years: 2014: 113, 2015: 105, 2016: 103, 2017: 81, 2018: 144, 2019: 155

All the data in this notebook are coming from the merge of two DBs in Ticino. This is still not perfect considering some redundancy (to be fixed in the future)

[months](http://chili.ineeda.space/data/months.htm),
[months_supervisors](http://chili.ineeda.space/data/months_supervisors.htm),
[users](http://chili.ineeda.space/data/users.htm),
[activities](http://chili.ineeda.space/data/activities.htm)

[other notebook](http://chili.ineeda.space/notebooks/preprocess_pipeline.html)

# Data Loading and Cleaning

In [5]:
month_map={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May', 6:'Jun', 7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}
month_order=['Aug','Sep','Oct','Nov','Dec','Jan','Feb','Mar','Apr','May','Jun','Jul']

In [6]:
df_months = pd.read_csv(f"{data_folder}months.csv").fillna(0)
df_months.replace({'month':month_map},inplace=True)
df_months['month_order'] = df_months['month'].map(dict(zip(month_order,range(1,13))))
df_months.sort_values(by=['month_order','activity_school_year'],inplace=True)

df_months.head()

Unnamed: 0,month,activity_school_year,n_users_per_year,n_logins,n_activities,n_recipes,n_experiences,avg_n_user_activities,avg_n_user_recipes,avg_n_user_experiences,n_files,n_files_recipes,n_files_experiences,avg_n_files,avg_n_files_recipes,avg_n_files_experiences,std_n_files,std_n_files_recipes,std_n_files_experiences,n_feedback_requests,n_feedback_responses,n_feedback_requests_recipes,n_feedback_responses_recipes,n_feedback_requests_experiences,n_feedback_responses_experiences,n_in_curriculum,n_in_curriculum_recipes,n_in_curriculum_experiences,n_in_curriculum_insert_date,n_in_curriculum_insert_date_recipes,n_in_curriculum_insert_date_experiences,avg_activity_total_length,std_activity_total_length,avg_len_descriptions,std_len_descriptions,avg_len_steps,std_len_steps,avg_len_observations,std_len_observations,avg_sum_len_reflections,std_avg_sum_len_reflections,avg_avg_len_reflections,std_avg_len_reflections,avg_len_bilancio,std_len_bilancio,avg_len_competenze,std_len_competenze,avg_len_miglioramenti,std_len_miglioramenti,avg_len_critici,std_len_critici,total_reflections,total_null_reflections,n_edits,perc_total_feedback_requests,perc_total_feedback_requests_recipes,perc_total_feedback_requests_experiences,perc_feedback_responses,perc_feedback_responses_recipes,perc_feedback_responses_experiences,perc_in_curriculum,perc_recipes_in_curriculum,perc_experiences_in_curriculum,perc_in_curriculum_insert_date,perc_recipes_in_curriculum_insert_date,perc_experiences_in_curriculum_insert_date,month_order
21,Aug,1,547,175,71.0,29.0,42.0,3.55,1.45,2.1,60.0,43.0,17.0,6.0,5.0,1.0,3.3534,4.161,1.42,0.0,0.0,0.0,0.0,0.0,0.0,65.0,28.0,37.0,37.0,24.0,13.0,103.0,76.14,5.0,5.5,92.0,73.33,10.0,23.66,1.31,1.79,0.63,0.6,0.75,0.43,0.75,0.43,1.0,0.94,2.75,7.56,9.0,7.0,208.0,0.0,0.0,0.0,0.0,0.0,0.0,91.55,39.44,52.11,52.11,33.8,18.31,1
22,Aug,2,371,1420,532.0,273.0,259.0,6.05,3.1,2.94,1555.0,1300.0,255.0,10.0,9.0,2.0,4.8276,5.8386,4.0261,89.0,72.0,54.0,48.0,35.0,24.0,478.0,232.0,246.0,26.0,12.0,14.0,128.0,86.92,7.0,12.96,113.0,77.48,5.0,12.03,7.26,9.32,4.48,7.21,7.15,11.64,9.72,13.62,7.02,15.62,5.14,11.72,173.0,51.0,3206.0,16.73,19.78,13.51,13.53,17.58,9.27,89.85,43.61,46.24,4.89,2.26,2.63,1
23,Aug,3,224,910,585.0,152.0,433.0,9.75,2.53,7.22,654.0,490.0,164.0,11.0,10.0,1.0,6.0734,7.4977,2.8684,51.0,29.0,17.0,14.0,34.0,15.0,516.0,90.0,426.0,0.0,0.0,0.0,169.0,125.11,8.0,15.89,153.0,113.59,6.0,12.37,6.23,5.72,4.7,5.29,5.12,11.23,10.37,15.02,4.63,6.36,4.79,8.43,158.0,12.0,5107.0,8.72,11.18,7.85,4.96,9.21,3.46,88.21,15.38,72.82,0.0,0.0,0.0,1
24,Sep,1,547,521,173.0,96.0,77.0,2.62,1.45,1.17,355.0,248.0,107.0,8.0,6.0,2.0,5.2481,6.2521,2.8655,28.0,18.0,24.0,14.0,4.0,4.0,123.0,52.0,71.0,19.0,15.0,4.0,120.0,104.8,7.0,11.09,106.0,94.23,11.0,21.22,8.72,10.38,4.62,8.28,7.57,11.91,13.61,24.1,8.36,12.45,5.33,9.47,43.0,11.0,674.0,16.18,25.0,5.19,10.4,14.58,5.19,71.1,30.06,41.04,10.98,8.67,2.31,2
25,Sep,2,371,1666,614.0,303.0,311.0,3.81,1.88,1.93,1807.0,1299.0,508.0,10.0,8.0,2.0,5.7121,6.9026,4.6594,106.0,73.0,63.0,45.0,43.0,28.0,567.0,266.0,301.0,56.0,19.0,37.0,146.0,128.25,13.0,30.95,123.0,102.28,9.0,14.74,7.08,7.56,4.88,6.78,6.28,12.3,10.13,12.5,6.05,9.75,5.88,11.39,221.0,91.0,4782.0,17.26,20.79,13.83,11.89,14.85,9.0,92.35,43.32,49.02,9.12,3.09,6.03,2


In [7]:
y_users = df_months['n_users_per_year']
df_months['norm_avg_n_user_recipes']        = df_months['n_recipes'].divide(y_users)
df_months['norm_avg_n_user_experiences']    = df_months['n_experiences'].divide(y_users)
df_months['norm_avg_n_activities']          = df_months['n_activities'].divide(y_users)

# Data Exploring

## Apprentices

### # of activities

In [10]:
fig = df_months.plot(x="month", y=["n_recipes","n_experiences","n_feedback_responses","n_edits"],facet_col='activity_school_year')
fig.update_layout(yaxis_title="",
                  legend_orientation="h",legend=dict(x=0.5, y=-0.1,xanchor='center',yanchor='top'),legend_title_text='')
fig.update_xaxes(title=dict(text=""),tickangle=45)
fig.update_yaxes(type="log")
fig.update_traces(mode='lines+markers')

fig.show()

In [17]:
chart = am4.LineChart()
chart.setTitle("Activities, feedback requests, feedback responses")
chart.set_y_label("Count")
chart.set_x_label("month")
chart.fromDataFrame(df_months, ["month","activity_school_year"], ["n_activities","n_feedback_requests","n_feedback_responses","n_edits"], 
                     hue="activity_school_year", hue_prefix="year ",
                     sorter=['activity_school_year','month_order'],
                     separate_series=True, range_divider='activity_school_year',
                     show_legend = False, x_axis_names="month", log_scale=True
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
chart = am4.LineChart()
chart.setTitle("Total number of activities per month")
chart.set_y_label("# activities")
chart.set_x_label("month")
chart.fromDataFrame(df_months, ["month","activity_school_year"], "n_activities", 
                     hue="activity_school_year", hue_prefix="year ",
                     sorter=['activity_school_year','month'],
                     range_divider = "activity_school_year",
                     show_legend = True, x_axis_names="month"
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
chart = am4.LineChart()
chart.setTitle("Normalized [bold]average[/] number of activities per users per month")
chart.set_y_label("average activities")
chart.set_x_label("month")
chart.fromDataFrame(df_months,["month","activity_school_year"],
                     "norm_avg_n_activities", 
                     hue="activity_school_year", hue_prefix="year ",
                     sorter=['activity_school_year','month'],
                     range_divider = "activity_school_year",
                     show_legend = True, x_axis_names="month"
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [12]:
column = am4.ColumnChart()
column.setTitle("Average number of [bold]activities[/] per user per month")
column.set_y_label("average activities")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ["norm_avg_n_user_recipes","norm_avg_n_user_experiences"], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

----------

### Files

In [13]:
# normalize wrt number of activities per month
df_months['norm_n_files_recipes'] = df_months['n_files_recipes'].divide(df_months['n_activities'])
df_months['norm_n_files_experiences'] = df_months['n_files_experiences'].divide(df_months['n_activities'])

column = am4.ColumnChart()
column.setTitle("[bold]Normalized[/] number of activities's files per month")
column.set_y_label("# of files per activity")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['norm_n_files_recipes','norm_n_files_experiences'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

And the average grouped by years:

In [14]:
df_months[['activity_school_year','n_files','avg_n_files','std_n_files']]\
    .groupby('activity_school_year').mean()

Unnamed: 0_level_0,n_files,avg_n_files,std_n_files
activity_school_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2738.5,9.666667,6.282992
2,1906.083333,10.583333,7.11145
3,853.666667,9.833333,5.787942


### Feedbacks and Curriculum

In [15]:
column = am4.ColumnChart()
column.setTitle("% of activities with requests for feedback per month")
column.set_y_label("% activities with request")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['perc_total_feedback_requests_recipes','perc_total_feedback_requests_experiences'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
normalizePer(df_months,'perc_total_feedback_requests_recipes',
             'n_feedback_requests','activity_school_year','n_feedback_requests_recipes')
normalizePer(df_months,'perc_total_feedback_requests_experiences',
             'n_feedback_requests','activity_school_year','n_feedback_requests_experiences')

In [17]:
column = am4.ColumnChart()
column.setTitle("% of feedback requests over the school year")
column.set_y_label("% activities with request")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['perc_total_feedback_requests_recipes','perc_total_feedback_requests_experiences'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

TODO: try with request date

In [18]:
column = am4.ColumnChart()
column.setTitle("% of activities in curriculum per month")
column.set_y_label("% activities in curriculum")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['perc_recipes_in_curriculum'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

TODO: CONTROLLARE BENE LA QUERY n_activities per via del "final".
TODO: vedere se moltiplicare i due

In [19]:
normalizePer(df_months,'norm_perc_recipes_in_curriculum',
             'n_activities','activity_school_year','n_in_curriculum_recipes')
normalizePer(df_months,'norm_perc_experiences_in_curriculum',
             'n_activities','activity_school_year','n_in_curriculum_experiences')

In [20]:
column = am4.ColumnChart()
column.setTitle("[bold]Normalized[/] % of activities in curriculum per month")

column.set_y_label("% activities in curriculum")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['norm_perc_recipes_in_curriculum','norm_perc_experiences_in_curriculum'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Lengths
(total length: description + steps + observations)

In [21]:
column = am4.ColumnChart()
column.setTitle("Activity total length per month")
column.set_y_label("Total lenght average")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['avg_activity_total_length'],
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [22]:
column = am4.ColumnChart()
column.setTitle("Activity total length per month")
column.set_y_label("Total lenght average")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['avg_len_descriptions','avg_len_steps','avg_len_observations'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [23]:
df_months[['activity_school_year','avg_activity_total_length','std_activity_total_length']]\
    .groupby('activity_school_year').mean()

Unnamed: 0_level_0,avg_activity_total_length,std_activity_total_length
activity_school_year,Unnamed: 1_level_1,Unnamed: 2_level_1
1,134.666667,120.981667
2,152.5,151.664167
3,153.166667,128.285


Note: std very high because of NULL descriptions. TODO: try without NULL descriptions

In [24]:
column = am4.ColumnChart()
column.setTitle("[bold]Average[/] reflections total length per month")
column.set_y_label("Total average lenght average")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['avg_sum_len_reflections'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [25]:
column = am4.ColumnChart()
column.setTitle("Total length of average reflections per month")
column.set_y_label("Total lenght of average")
column.set_x_label("year")
column.fromDataFrame(df_months, "activity_school_year", 
                     ['avg_len_bilancio','avg_len_competenze','avg_len_miglioramenti','avg_len_critici'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Edits

In [11]:
chart = am4.LineChart()
chart.setTitle("Edits per months")
chart.set_y_label("# of edits")
chart.set_x_label("year")
chart.fromDataFrame(df_months, ["month","activity_school_year"], "n_edits", 
                     hue="activity_school_year", hue_prefix="year ",
                     sorter=['activity_school_year','month'],
                     range_divider = "activity_school_year",
                     show_legend = True, x_axis_names="month"
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Logins

In [34]:
df_students_logins = pd.read_csv(f"{data_folder}students_logins.csv")\
                        .drop(['ut_user_type','start_semester','start_year'],axis=1)
df_students_logins['date'] = df_students_logins['date'].astype('datetime64')
df_students_logins.head()

Unnamed: 0,us_user,date,user_school_year,month,dayofweek,hour,minute
0,20,2013-11-25 15:30:13,1,11,2,15,30
1,20,2013-12-19 14:11:06,1,12,5,14,11
2,20,2014-01-08 18:50:53,1,1,4,18,50
3,20,2014-01-08 20:14:51,1,1,4,20,14
4,20,2014-01-14 10:56:35,1,1,3,10,56


In [35]:
date_hist = pd.DataFrame(df_students_logins['date'].groupby([df_students_logins["date"].dt.dayofweek,df_students_logins["date"].dt.hour]).count())
date_hist = date_hist.rename(columns={"date": "count"})
date_hist = pd.DataFrame([(x,y,get_loc(date_hist,(x,y),0,'count')) for x in range(7) for y in range(24)])
date_hist.columns = ['dayofweek','hour','count']

In [None]:
chart = am4.LineChart()
chart.setTitle("Apprentices logins")
chart.set_y_label("# logins")
chart.set_x_label("hour")
chart.fromDataFrame(date_hist, ['hour'], "count", 
                     hue="dayofweek", x_axis_names = "hour",
                     show_legend = True
                    )
chart.plot()

In [40]:
chart = am4.LineChart()
chart.setTitle("Apprentices logins")
chart.set_y_label("# logins")
chart.set_x_label("hour")
chart.fromDataFrame(date_hist, ['hour'], "count", 
                     hue="dayofweek", x_axis_names = "hour",
                     show_legend = True
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [49]:
chart = am4.LineChart()
chart.setTitle("Cumulate apprentices logins")
chart.set_y_label("# logins")
chart.set_x_label("hour")
chart.fromDataFrame(date_hist.groupby("hour").sum().reset_index(), ['hour'], "count", 
                     x_axis_names = "hour",
                     show_legend = True
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
chart = am4.LineChart()
chart.setTitle("Apprentices logins")
chart.set_y_label("# logins")
chart.set_x_label("hour")
chart.fromDataFrame(date_hist, ['dayofweek','hour'], "count", 
                     hue="dayofweek", x_axis_names = "hour",
                     range_divider = "dayofweek",
                     show_legend = True, visible_labels=['6','12','18','0']
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

--------------
## Supervisors

In [53]:
df_months_supervisors = pd.read_csv(f"{data_folder}months_supervisors.csv").fillna(0).sort_values(by=['month','activity_school_year'])
df_months_supervisors.head()

Unnamed: 0,month,activity_school_year,n_users_per_year,n_logins,n_activities,n_recipes,n_experiences,avg_n_user_activities,avg_n_user_recipes,avg_n_user_experiences,n_files,n_files_recipes,n_files_experiences,avg_n_files,avg_n_files_recipes,avg_n_files_experiences,std_n_files,std_n_files_recipes,std_n_files_experiences,n_feedback_requests,n_feedback_responses,n_feedback_requests_recipes,n_feedback_responses_recipes,n_feedback_requests_experiences,n_feedback_responses_experiences,n_in_curriculum,n_in_curriculum_recipes,n_in_curriculum_experiences,n_in_curriculum_insert_date,n_in_curriculum_insert_date_recipes,n_in_curriculum_insert_date_experiences,avg_activity_total_length,std_activity_total_length,avg_len_descriptions,std_len_descriptions,avg_len_steps,std_len_steps,avg_len_observations,std_len_observations,avg_sum_len_reflections,std_avg_sum_len_reflections,avg_avg_len_reflections,std_avg_len_reflections,avg_len_bilancio,std_len_bilancio,avg_len_competenze,std_len_competenze,avg_len_miglioramenti,std_len_miglioramenti,avg_len_critici,std_len_critici,total_reflections,total_null_reflections,n_edits,perc_total_feedback_requests,perc_total_feedback_requests_recipes,perc_total_feedback_requests_experiences,perc_feedback_responses,perc_feedback_responses_recipes,perc_feedback_responses_experiences,perc_in_curriculum,perc_recipes_in_curriculum,perc_experiences_in_curriculum,perc_in_curriculum_insert_date,perc_recipes_in_curriculum_insert_date,perc_experiences_in_curriculum_insert_date
0,1,1,227,267,1336.0,959.0,377.0,4.61,3.31,1.3,3739.0,3112.0,627.0,9.0,8.0,1.0,5.5525,6.2082,3.544,292.0,217.0,225.0,165.0,67.0,52.0,1249.0,919.0,330.0,221.0,169.0,52.0,16.0,0.0,1.0,0.0,14.0,0.0,1.0,0.0,4.84,7.3,2.66,5.19,4.38,9.77,7.55,14.28,4.01,7.87,3.44,8.69,89.0,94.0,9135.0,21.86,23.46,17.77,16.24,17.21,13.79,93.49,68.79,24.7,16.54,12.65,3.89
1,1,2,101,105,1016.0,355.0,661.0,5.43,1.9,3.53,1674.0,1328.0,346.0,9.0,7.0,1.0,4.278,5.2542,2.6519,96.0,58.0,62.0,44.0,34.0,14.0,948.0,313.0,635.0,75.0,51.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.36,4.59,1.75,3.33,3.02,5.85,4.77,7.25,3.31,9.37,2.32,6.82,17.0,39.0,6215.0,9.45,17.46,5.14,5.71,12.39,2.12,93.31,30.81,62.5,7.38,5.02,2.36
2,1,3,40,26,805.0,328.0,477.0,7.06,2.88,4.18,596.0,500.0,96.0,11.0,10.0,1.0,7.6447,8.4491,2.829,43.0,20.0,23.0,11.0,20.0,9.0,682.0,240.0,442.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.47,6.76,2.38,2.19,5.03,8.99,5.69,9.13,7.17,7.63,4.0,11.33,11.0,4.0,4169.0,5.34,7.01,4.19,2.48,3.35,1.89,84.72,29.81,54.91,0.12,0.0,0.12
3,2,1,227,217,1282.0,745.0,537.0,3.96,2.3,1.66,4014.0,3514.0,500.0,11.0,10.0,1.0,6.7077,7.337,2.785,249.0,134.0,181.0,116.0,68.0,18.0,1144.0,677.0,467.0,192.0,143.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.34,7.46,3.21,6.42,4.48,8.98,7.74,15.31,3.86,7.74,5.26,13.99,58.0,67.0,7795.0,19.42,24.3,12.66,10.45,15.57,3.35,89.24,52.81,36.43,14.98,11.15,3.82
4,2,2,101,80,1091.0,317.0,774.0,6.23,1.81,4.42,1963.0,1254.0,709.0,10.0,8.0,2.0,8.0259,8.4525,5.6257,87.0,53.0,37.0,26.0,50.0,27.0,1020.0,285.0,735.0,150.0,41.0,109.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.34,4.63,2.69,3.23,4.3,6.86,6.25,8.99,2.92,5.45,3.9,10.21,24.0,27.0,7181.0,7.97,11.67,6.46,4.86,8.2,3.49,93.49,26.12,67.37,13.75,3.76,9.99


### Feedbacks

In [54]:
normalizePer(df_months_supervisors,'perc_total_feedback_responses_recipes',
             'n_feedback_responses','activity_school_year','n_feedback_responses_recipes')
normalizePer(df_months_supervisors,'perc_total_feedback_responses_experiences',
             'n_feedback_responses','activity_school_year','n_feedback_responses_experiences')

In [55]:
column = am4.ColumnChart()
column.setTitle("% of feedback responses over the school year")
column.set_y_label("% of responses")
column.set_x_label("year")
column.fromDataFrame(df_months_supervisors, "activity_school_year", 
                     ['perc_total_feedback_responses_recipes','perc_total_feedback_responses_experiences'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [56]:
df_months_supervisors['ration_response'] = (df_months_supervisors['n_feedback_responses']/df_months['n_feedback_requests']).fillna(0)

In [57]:
column = am4.ColumnChart()
column.setTitle("Ratio responses/requests")
column.set_y_label("responses/request")
column.set_x_label("year")
column.fromDataFrame(df_months_supervisors, "activity_school_year", 
                     ['ration_response'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [58]:
column = am4.ColumnChart()
column.setTitle("Total length of average feedbacks per month")
column.set_y_label("Total length")
column.set_x_label("year")
column.fromDataFrame(df_months_supervisors, "activity_school_year", 
                     ['avg_len_bilancio','avg_len_competenze','avg_len_miglioramenti','avg_len_critici'], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [13]:
df_supervisors_feedback = pd.read_csv(f"{data_folder}supervisors_feedbacks.csv")
df_supervisors_feedback.fillna(0, inplace=True)
df_supervisors_feedback['ratio'] = df_supervisors_feedback['sent']/df_supervisors_feedback['received']
df_supervisors_feedback.loc[df_supervisors_feedback['ratio']>1,'ratio'] = 1.0

In [14]:
bins = 21
data_range = df_supervisors_feedback['ratio'].max()-df_supervisors_feedback['ratio'].min()
hist_step  = data_range/(bins-1)

df_supervisors_feedback['ratio_hist'] = round((df_supervisors_feedback['ratio']/hist_step).astype(int)*hist_step,2)

df_hist = df_supervisors_feedback.groupby(["ratio_hist"]).count()[['us_user']].reset_index().rename(columns={'us_user': 'count'})

chart = am4.ColumnChart()
chart.setTitle("Histogram: ratio responses/requests per supervisor")
chart.set_y_label("log10 count")
chart.set_x_label("ratio responses/requests")
chart.fromDataFrame(df_hist, "ratio_hist", "count", log_scale=True,
                    show_legend=False)
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [15]:
chart = am4.ColumnChart()
chart.setTitle("Ratio responses/requests per each supervisor")
chart.set_y_label("% responses over requests")
chart.set_x_label("supervisors")
chart.fromDataFrame(df_supervisors_feedback, "us_user", "ratio",
                    sorter="ratio", 
                    hide_x_labels = True,
                    show_legend=False)
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [16]:
df_feedbacks_info = pd.read_csv(f"{data_folder}activities_feedbacks_info.csv")
df_feedbacks_info.head()

Unnamed: 0,ac_activity,request_date,response_date,delay_hours,edits_between,edits_after,times_before_answer,activity_school_year
0,438,2014-02-04 13:57:46,,,0,0,1,1
1,579,2014-02-04 15:20:18,,,0,0,1,1
2,723,2014-02-20 14:42:12,2014-11-17 20:59:09,6486.0,1,0,1,1
3,844,2014-02-25 13:32:38,,,0,0,2,1
4,845,2014-02-25 14:15:32,,,0,0,1,1


In [17]:
no_answer = df_feedbacks_info['response_date'].isnull().sum()
print(f'There are {no_answer}/{len(df_feedbacks_info)} without answer ({round(no_answer/len(df_feedbacks_info),4)*100}%)')

There are 2960/4965 without answer (59.62%)


In [18]:
df_feedbacks_info.dropna(inplace=True)
df_feedbacks_info['delay_days'] = df_feedbacks_info['delay_hours']/24

In [30]:
df_hist = df_feedbacks_info.groupby(["activity_school_year","times_before_answer"]).count()[['ac_activity']].reset_index().rename(columns={'ac_activity': 'count'})

chart = am4.ColumnChart()
chart.setTitle("# of requests before a feedback")
chart.set_y_label("Log10 count")
chart.set_x_label("requests before an answer")
chart.fromDataFrame(df_hist, "times_before_answer", "count", 
                     hue="activity_school_year", hue_prefix="year ",
                     log_scale = False
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [26]:
bins = 50
data_range = df_feedbacks_info['delay_days'].max()-df_feedbacks_info['delay_days'].min()
hist_step  = int(data_range/bins)

df_feedbacks_info['delay_days_hist'] = (df_feedbacks_info['delay_days']/hist_step).astype(int)*hist_step

In [29]:
df_hist = df_feedbacks_info.groupby(["activity_school_year","delay_days_hist"]).size().reset_index(name='count')

chart = am4.ColumnChart()
chart.setTitle("Histogram: days before an answer")
chart.set_y_label("Log count")
chart.set_x_label("days before an answer")
chart.fromDataFrame(df_hist[df_hist['count']<150], "delay_days_hist", "count", 
                     hue="activity_school_year", hue_prefix="year ",
                     log_scale = False,
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [32]:
requests_count = df_feedbacks_info['activity_school_year'].value_counts()
df_hist = df_feedbacks_info.groupby(["activity_school_year","edits_after"]).size().reset_index(name='count')
df_hist['count_norm'] = df_hist['count'].div(df_hist['activity_school_year'].map(requests_count))

chart = am4.ColumnChart()
chart.setTitle("Histogram: edits after a feedback")
chart.set_y_label("count")
chart.set_x_label("# of edits after an answer")
chart.fromDataFrame(df_hist, "edits_after", "count", 
                     hue="activity_school_year", hue_prefix="year ",
                     log_scale = False, min_y = 0.1
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [46]:
chart = am4.ColumnChart()
chart.setTitle("Histogram: [bold]normalized[/] edits after a feedback")
chart.set_y_label("Log count")
chart.set_x_label("# of edits after an answer per feedback request")
chart.fromDataFrame(df_hist, "edits_after", "count_norm", 
                     hue="activity_school_year", hue_prefix="year ",
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Logins

In [47]:
df_supervisors_logins = pd.read_csv(f"{data_folder}supervisors_logins.csv")\
                        .drop(['ut_user_type','start_semester','start_year'],axis=1)
df_supervisors_logins['date'] = df_supervisors_logins['date'].astype('datetime64')

date_hist = pd.DataFrame(df_supervisors_logins['date'].groupby([df_supervisors_logins["date"].dt.dayofweek,df_supervisors_logins["date"].dt.hour]).count())
date_hist = date_hist.rename(columns={"date": "count"})
date_hist = pd.DataFrame([(x,y,get_loc(date_hist,(x,y),0,'count')) for x in range(7) for y in range(24)])
date_hist.columns = ['dayofweek','hour','count']

In [48]:
chart = am4.LineChart()
chart.setTitle("Supervisors logins")
chart.set_y_label("# logins")
chart.set_x_label("hour")
chart.fromDataFrame(date_hist, ['dayofweek','hour'], "count", 
                     hue="dayofweek", x_axis_names = "hour",
                     range_divider = "dayofweek",
                     show_legend = True, visible_labels=['6','12','18','0']
                    )
chart.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Years considerations:

In [49]:
df_years = pd.read_csv(f"{data_folder}years_test.csv")
df_years.head()

Unnamed: 0,month,activity_school_year,start_year,n_activities,n_recipes,n_experiences,avg_n_user_activities,avg_n_user_recipes,avg_n_user_experiences
0,1,1,2013,176,150.0,26.0,5.03,4.29,0.74
1,1,1,2014,245,191.0,54.0,5.98,4.66,1.32
2,1,1,2015,170,69.0,101.0,5.15,2.09,3.06
3,1,1,2016,131,98.0,33.0,3.45,2.58,0.87
4,1,1,2017,160,109.0,51.0,4.32,2.95,1.38


In [50]:
first_year = df_years[df_years['activity_school_year']==1]

column = am4.ColumnChart()
column.setTitle("Total number of [bold]activities[/] per month")
column.set_y_label("number of activities")
column.set_x_label("year")
column.fromDataFrame(first_year, ["start_year"], 
                     ["n_activities"], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [51]:
column = am4.ColumnChart()
column.setTitle("Average number of [bold]activities[/] per user per month")
column.set_y_label("average number of activities")
column.set_x_label("year")
column.fromDataFrame(first_year, ["start_year"], 
                     ["avg_n_user_activities"], 
                     hue="month", hue_prefix="month ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
df_only_years = df_years.groupby(["activity_school_year","start_year"]).sum().reset_index()

In [53]:
column = am4.ColumnChart()
column.setTitle("Total number of [bold]activities[/] per school year")
column.set_y_label("number of activities")
column.set_x_label("year")
column.fromDataFrame(df_only_years, ["start_year"], 
                     ["avg_n_user_activities"], 
                     hue="activity_school_year", hue_prefix="school year ",
                     show_legend = False
                    )
column.plot()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>