In [1]:
#Libraries used in the functions below
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import math

from jupyter_dash import JupyterDash
import os
try:
    os.environ.pop('http_proxy')
    os.environ.pop('https_proxy')
except KeyError:
    pass

# Import dash
import dash
from dash import Dash, html, dcc, Input, Output, dash_table, State

# Import plotly
import plotly.graph_objs as go
import plotly.express as px
import seaborn as sns
pd.options.mode.chained_assignment = None 

In [2]:
#Datasets turned into Pandas DataFrames
dfC = pd.read_csv("dataset/courses.csv")
dfSA=pd.read_csv("dataset/studentAssessment.csv")
dfSI=pd.read_csv("dataset/studentInfo.csv")
dfSR=pd.read_csv("dataset/studentRegistration.csv")
dfVLE=pd.read_csv("dataset/vle.csv")
dfSVLE=pd.read_csv("dataset/studentVle.csv")
dfA=pd.read_csv("dataset/assessments.csv")
dfP=pd.read_csv("dataset/generalized_predicts.csv")

In [3]:
dfC['Course_Section'] = dfC[['code_module', 'code_presentation']].apply(lambda x: '-'.join(x), axis=1)
course_section=list(dfC['Course_Section'] )
dfP['Course_Section'] = dfP[['code_module', 'code_presentation']].apply(lambda x: '-'.join(x), axis=1)

In [4]:
dfSI['Course_Section'] = dfSI[['code_module', 'code_presentation']].apply(lambda x: '-'.join(x), axis=1)
def roster(course_section,sort_type):
    dfRoster=pd.merge(dfSI,dfP, on=['id_student','Course_Section'])
    dfClassRoster=dfRoster[dfRoster['Course_Section']==course_section]
    if sort_type=='Numerical':
        dfClassRoster=dfClassRoster.sort_values('id_student')
    else:
        dfClassRoster=dfClassRoster.sort_values('probability',ascending=False)
    return list(dfClassRoster['id_student'])

In [5]:
df_svle_with_type = dfSVLE.merge(dfVLE[['id_site','activity_type']], left_on='id_site', right_on='id_site')
df_svle_with_type.head()

Unnamed: 0,code_module,code_presentation,id_student,id_site,date,sum_click,activity_type
0,AAA,2013J,28400,546652,-10,4,forumng
1,AAA,2013J,28400,546652,-10,1,forumng
2,AAA,2013J,28400,546652,-10,1,forumng
3,AAA,2013J,28400,546652,-10,8,forumng
4,AAA,2013J,30268,546652,-10,3,forumng


In [6]:
# create svle_with_type_dict dictionary by class
# get all combinations of code_module and code_presentation
df_svle_with_type['Course_Section'] = df_svle_with_type['code_module']+'-'+df_svle_with_type['code_presentation']
combos = (df_svle_with_type['Course_Section']).unique()
svle_with_type_dict = {}
for combo in combos:
    svle_with_type_dict[combo] = df_svle_with_type[df_svle_with_type['Course_Section']==combo]

In [7]:
svle_with_type_dict['BBB-2013J'].head()

Unnamed: 0,code_module,code_presentation,id_student,id_site,date,sum_click,activity_type,Course_Section
753564,BBB,2013J,560773,704206,-23,1,resource,BBB-2013J
753565,BBB,2013J,560920,704206,-10,1,resource,BBB-2013J
753566,BBB,2013J,554243,704206,-10,1,resource,BBB-2013J
753567,BBB,2013J,370307,704206,-10,1,resource,BBB-2013J
753568,BBB,2013J,609047,704206,-10,2,resource,BBB-2013J


In [8]:
dfA['Course_Section'] = dfA[['code_module', 'code_presentation']].apply(lambda x: '-'.join(x), axis=1)
resource_dict = dict(zip(dfVLE['id_site'], dfVLE['activity_type']))
def classCode(Module,Presentation):
    dfCode=dfSVLE[dfSVLE['code_module']==Module]
    dfCode=dfCode[dfCode['code_presentation']==Presentation]
    return dfCode

In [11]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

# Create server variable with Flask server object for use with gunicorn
server = app.server

# Create a unique list of code_module
available_indicators1 = dfC['Course_Section']

# Step 1
df_sa_with_due_dates = dfSA.merge(dfA[['id_assessment','date','code_module','code_presentation','weight']], left_on='id_assessment', right_on='id_assessment')
#dfSI[['code_module', 'code_presentation']].apply(lambda x: '-'.join(x), axis=1)
df_sa_with_due_dates['Course_Section'] = df_sa_with_due_dates[['code_module','code_presentation']].apply(lambda x: '-'.join(x), axis=1)
df_si_with_preds =pd.merge(dfSI,dfP, on=['id_student','Course_Section'])
student_info_table_cols = ['id_student','Predicted Fail/Withdraw %', '# of Assignments Completed','Current Grade (weighted)', "Avg of this student's previous scores",'Most recent score',"Most recent score - this student's previous avg"]

app.layout = html.Div([
                        html.H1("Instructor Interface/Dashboard",style={'textAlign':'center', 'paddingTop':'1%','marginBottom':'-16px'}),
                        html.Div([
                                html.Div(["Module-presentation:"], style={'float':'left','margin':'0px','display':'inline-block','fontSize':'20px','fontWeight':'light'}),
                                html.Div(
                                    [dcc.Dropdown(
                                        id='CourseSection',
                                        options=[{'label': i, 'value': i} for i in available_indicators1],
                                        value=available_indicators1[0],
                                        #style={'display':'inline-block','float':'left'}
                                    )],style={'minWidth':'150px','display':'inline-block'}
                                ),
                                
                            ],style={'display':'inline-block','marginTop':'-40px'}),
                        html.Hr(style={'marginTop':'0px'}),
                        html.Div([
                            # html.H1("Instructor Interface/Dashboard"),
                            # dcc.Dropdown(
                            #     id='CourseSection',
                            #     options=[{'label': i, 'value': i} for i in available_indicators1],
                            #     value=available_indicators1[0]),
                            html.H5(id='all_student_table_title'),
                            dash_table.DataTable(
                                id="student_info_table",
                                columns=[{"name": i, "id": i} for i in student_info_table_cols],
                                style_cell={
                                        'minWidth': '90px', 
                                        'width': '90px',
                                        'maxWidth': '90px',
                                        'overflow': 'hidden',
                                        'textOverflow': 'ellipsis'},
                                fixed_rows={'headers': True},
                                style_table={'height':'300px'},
                                style_data_conditional=[{
                                    'if':{'row_index':'odd'},
                                    'backgroundColor':'rgb(235,235,235)',
                                },
                                {
                                    'if':{
                                        'filter_query':'{Predicted Fail/Withdraw %} > 0.8'
                                    },
                                    'backgroundColor':'rgb(255,204,203)',
                                    'fontWeight':'bold'
                                },
                                {
                                    'if':{
                                        'filter_query':"{Most recent score - this student's previous avg} > 10",
                                        'column_id':"Most recent score - this student's previous avg"
                                    },
                                    'backgroundColor':'rgb(187,255,153)'
                                },
                                {
                                    'if':{
                                        'filter_query':"{Most recent score - this student's previous avg} < -10",
                                        'column_id':"Most recent score - this student's previous avg"
                                    },
                                    'backgroundColor':'rgb(255,128,128)'
                                }
                                ],
                                style_header={'whiteSpace':'normal'},
                                filter_action="native",
                                sort_action="native",
                                sort_mode="multi",
                                data=[]
                                ),
                             dcc.RadioItems(
                                ['Assessment', 'Resources'],
                                'Assessment',
                                id='Graph-Type',
                                inline=True),
                            dcc.Graph(id='GraphMean'),
                            #dcc.Graph(id='GraphMedian'),
                                dcc.Slider(
                                    0,
                                    #200,
                                    max(dfA['date']),
                                    step=None,
                                    id='Day',
                                    value=60),
                            #html.Div(id="final_table")
                        ],style={'width': '49%','display': 'inline-block'}),
                        html.Div([
                             #html.H6("Sort by:"),
                             #dcc.RadioItems(
                             #   ['Numerical', 'Failure Rate'],
                             #    id='Sort_by',
                             #    value='Numerical',
                             #inline=True),
                            
                            #dcc.Dropdown(
                            #    id='StudentID'),
                            
                            #html.H6("Table: Selected Student Information"),
                            html.Div(id='student_id_msg'),
                            html.H6(id='single_student_table_title'),
                            html.Div(id="Student_table"),
                            html.Div(id="Student_Comments"),

                        ], style={'width': '49%','display': 'inline-block','float': 'right'})
            ], style={'width':'95%','margin':'auto'})

@app.callback(
    Output('student_info_table','active_cell'),
    Output('student_info_table','selected_cells'),
    Input('CourseSection','value')
)
def reset_selection(course_section):
    return None, []

@app.callback(
    Output('all_student_table_title','children'),
    #Input('CourseSection','value'),
    Input('Day','value')
)
def set_all_student_table_title(day):
    return "Early Warning Table as of day {0}".format(day)

# @app.callback(
#     Output('StudentID', 'options'),
#     Input('CourseSection', 'value'),Input('Sort_by','value'))
# def set_course_options(selected_course,Sort_by):
#     return roster(selected_course,Sort_by)

# @app.callback(
#     Output('StudentID', 'value'),
#     Input('StudentID', 'options'))
# def set_id_value(available_options):
#     return available_options[0]


@app.callback(
    Output('single_student_table_title','children'),
    Input('student_info_table','active_cell'),
    Input('student_info_table','derived_viewport_data')
)
def set_single_student_table_title(active_cell, data):
    id_student = get_id_student(active_cell, data)
    if id_student is not None:
        return "Student {0}'s Information".format(id_student)
    return None

def get_id_student(active_cell, data):
    if active_cell:
        return data[active_cell['row']]['id_student']
    elif data:
        return data[0]['id_student']
    else:    
        return None

@app.callback(
    Output('student_id_msg', 'children'),
    #Input('StudentID', 'options'),
    Input('student_info_table','active_cell'),
    Input('student_info_table','derived_viewport_data'),
    #Input('CourseSection','value')
    )
def set_id_value(active_cell, data):
    id_student = get_id_student(active_cell, data)
    if id_student is not None:
        return "Selected student: {0}. Select another student by clicking on the Early Warning Table.".format(id_student)
    return "No data availabe - remove filters from the Early Warning Table!"

# @app.callback(
#     Output('student_id_combined_msg','children'),
#     Input('StudentID','children')
# )
# def set_student_id_msg(student_id):
#     if student_id:
#         return ["Selected student: ", student_id]
#     else:
#         return ["Select a student from the table to see their information!"]

@app.callback(
    Output('student_info_table', 'data'),
    Input('CourseSection', 'value'),Input('Day','value')
)
def update_student_info_table(course_section, day):
    if not course_section:
        to_ret = pd.DataFrame([["No","Course","Selected","-----","Select","Course","Above"]], columns=student_info_table_cols)
        return to_ret.to_dict('records')
    #df_studentInfo = dfSI[dfSI['Course_Section']==course_section]
    df_roster_this_course = pd.DataFrame(df_si_with_preds[df_si_with_preds['Course_Section']==course_section])
    
    #df_sa_with_due_dates
    df_sa_this_course = df_sa_with_due_dates[df_sa_with_due_dates['Course_Section']==course_section]
    df_student_assessments = df_roster_this_course.merge(df_sa_this_course, left_on='id_student',right_on='id_student')
    #print(df_student_assessments.columns)
    
    # filter out assignments due after day
    df_student_assessments = df_student_assessments[df_student_assessments.date <= day]
    # filter out assignments submitted after day
    df_student_assessments = df_student_assessments[df_student_assessments.date_submitted <= day]

    # calculate total possible scores:
    total_possible_score = dfA[dfA.date <= day]
    total_possible_score = total_possible_score[total_possible_score['Course_Section'] == course_section]
    # only count TMAs
    total_possible_score = np.sum(total_possible_score[total_possible_score['assessment_type']=='TMA']['weight'])
    #print(total_possible_score)

    # calculate each student's scores
    df_student_assessments['weighted_score'] = df_student_assessments['score'] * df_student_assessments['weight']
    df_student_scores = df_student_assessments.groupby(['id_student']).sum().reset_index()[['id_student','weighted_score']]
    df_student_scores['pct_total'] = np.round(df_student_scores['weighted_score']/total_possible_score,1)

    most_rec_scores = get_most_recent_assignment_scores(dfSA, dfA, dfSI, course_section, day)
    # calculate avg before the last turned in assignment
    #most_rec_scores.loc[most_rec_scores['num_assignments_submitted']==1,'avg_before'] = most_rec_scores.loc[most_rec_scores['num_assignments_submitted']==1,'stu_avg_score']
    no_assignments_submitted_msg = "No prior assignments submitted"
    
    most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'avg_before'] = np.round(
        (most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'stu_avg_score']*most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'num_assignments_submitted'] -
        most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'most_rec_score'])/(most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'num_assignments_submitted'] -1)
        ,1)
    most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'recent_minus_avg'] = np.round(
        most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'most_rec_score'] - most_rec_scores.loc[most_rec_scores['num_assignments_submitted']>1, 'avg_before'],1)
    
    most_rec_scores.loc[most_rec_scores['num_assignments_submitted']<=1,'avg_before'] = no_assignments_submitted_msg
    most_rec_scores.loc[most_rec_scores['num_assignments_submitted']<=1, 'recent_minus_avg'] = 'No prev. avg'
    # add column indicating how many asisgnments turned in vs total
    most_rec_scores['assignments_completed_frac'] = most_rec_scores['num_assignments_submitted'].astype(str) + '/' + most_rec_scores['num_assignments_due'].astype(str)

    num_assignments_due = np.max(most_rec_scores['num_assignments_due'])
    if pd.isna(num_assignments_due):
        num_assignments_due = 0

    # Columns we want:
    #['id_student','Predicted Fail/Withdraw %', 'Current Assessment %\n(out of due assignments)']
    df_final = df_roster_this_course.merge(df_student_scores, how='left', left_on='id_student',right_on='id_student')[['id_student','probability','pct_total']]
    df_final = df_final.merge(most_rec_scores, how='left',left_on='id_student',right_on='id_student')[list(df_final.columns) + ['avg_before','most_rec_score', 'assignments_completed_frac', 'num_assignments_submitted','recent_minus_avg']]

    # df_final.loc[df_final['num_assignments_submitted']<=1,['recent_minus_avg']] = 'No prev. avg'
    # df_final.loc[df_final['num_assignments_submitted']>1,['recent_minus_avg']] = np.round(
    #         df_final.loc[df_final['num_assignments_submitted']>1,['most_rec_score']] -
    #         df_final.loc[df_final['num_assignments_submitted']>1,['avg_before']],1)
    
    #df_final.columns = student_info_table_cols
    # columns: ['id_student',
    #           'Predicted Fail/Withdraw %', 
    #           '# of Assignments Completed',
    #           'Current Assessment weighted % (out of due assignments)', 
    #           'Prior assessment average (unweighted)',
    #           'Most recent assignment score']
    df_final = df_final[['id_student','probability','assignments_completed_frac','pct_total','avg_before','most_rec_score','recent_minus_avg']]
    #df_final['recent_minus_avg'] = df_final['most_rec_score'] - df_final['avg_before']
    
    # fill-in blanks
    df_final.loc[df_final['pct_total'].isna(),['assignments_completed_frac','pct_total','avg_before','most_rec_score']] = [
        '0/{0}'.format(num_assignments_due),0,no_assignments_submitted_msg,"Nothing submitted"
    ]


    # if less than 2 assignments turned in - don't show predictions
    if num_assignments_due < 2:
        df_final['probability'] = 'Too early to predict'
    
    df_final.columns = student_info_table_cols
    return df_final.to_dict("records")

    
def get_most_recent_assignment_scores(df_sa, df_a, df_si, course_section, day):
    
    # filter out student assessments submitted on day or after
    df_sa_filtered = df_sa[df_sa['date_submitted'] <= day]

    # change any NaNs in student assessment table to 0
    df_sa_filtered.loc[df_sa_filtered['score'].isna(),['score']] = 0
    
    # join with student info - in case student has not submitted any assignments yet...
    student_info = df_si[df_si['Course_Section']==course_section]
    df_sa_filtered = student_info[['id_student']].merge(df_sa_filtered, how='left',left_on='id_student',right_on='id_student')
    
    # filter out assignments with due date on day or after
    df_a_filtered = df_a[(df_a['date']<day) & (df_a['Course_Section']==course_section)]
    
    # calc total # of assignments due
    num_assignments_submittable = df_a_filtered['id_assessment'].count()

    # join to remove any early-submitted assignments from being shown
    df_merged = df_sa_filtered.merge(df_a_filtered, left_on='id_assessment', right_on='id_assessment').sort_values(by=['date_submitted','date'],ascending=[False,False])
    
    # count the number of assignments turned in by student
    num_assignments = df_merged.groupby(['id_student'])['id_assessment'].count().reset_index()[['id_student','id_assessment']].rename(columns={'id_assessment':'num_assignments_submitted'})
    
    # return the average for all assignments turned in by student
    assignment_avg = df_merged.groupby(['id_student'])['score'].mean().reset_index()[['id_student','score']].rename(columns={'score':'stu_avg_score'})

    # get the score of last assignment turned in by the student (for comparison against their previous average)
    to_ret = df_merged.loc[df_merged.groupby(['id_student'])['date_submitted'].idxmax(),['id_student','id_assessment','score']].rename(columns={'score':'most_rec_score'})

    # combine to single df
    to_ret = to_ret.merge(num_assignments, left_on='id_student', right_on='id_student')[['id_student','id_assessment','most_rec_score','num_assignments_submitted']]
    to_ret = to_ret.merge(assignment_avg, left_on='id_student', right_on='id_student')[list(to_ret.columns) + ['stu_avg_score']]
    to_ret['num_assignments_due'] = num_assignments_submittable
    return to_ret

@app.callback(
    Output('GraphMean', 'figure'),Input('CourseSection', 'value'),#Input('StudentID', 'value'),
    Input('student_info_table','active_cell'),
    Input('student_info_table','derived_viewport_data'),
    Input('Graph-Type','value'),Input('Day','value'))

def update_figure(CourseSection1, active_cell, data,type1,day1):
    if not CourseSection1:
        return px.bar() 
    StudentID1 = get_id_student(active_cell, data)
    if type1=='Assessment':
        #dfA1=dfA[dfA['date']<=day1]
        #dfCourse_Assessments=dfA1[dfA1['Course_Section']==CourseSection1]
        #assessment=list(dfCourse_Assessments['id_assessment'])
        #dictMean={}
        # make faster
        #for assess in assessment:
        #    dfSAMean=dfSA[dfSA['id_assessment']==assess]
        #    dictMean[assess]=np.mean(dfSAMean['score'])
        
        
        assessments = df_sa_with_due_dates.loc[(df_sa_with_due_dates['date']<=day1) & (df_sa_with_due_dates['Course_Section']==CourseSection1),['id_assessment','id_student','score','date_submitted']]
        assessment_avg = assessments[['id_assessment','score']].groupby('id_assessment').mean().reset_index()
        assessment_avg['type'] = 'Mean'

        dfStudentScore = assessments.loc[(assessments['id_student']==StudentID1) & (assessments['date_submitted']<=day1),['id_assessment','score']]
        dfStudentScore['type'] = 'Student'

        #dfStudentScore=dfSA[dfSA['id_student']==StudentID1]
        #dfStudentScore=dfStudentScore[['id_assessment','score']]
        #dfStudentScore=dfStudentScore.loc[dfStudentScore['id_assessment'].isin(assessment)]
        #dfMean = pd.DataFrame(list(dictMean.items()), columns = ['id_assessment','score'])
        #dfMean['type']='Mean'
        #dfStudentScore['type']='Student'
        #dfTogether=pd.concat([dfStudentScore, dfMean], ignore_index=True,axis=0)
        dfTogether=pd.concat([dfStudentScore, assessment_avg], ignore_index=True,axis=0)
        dfTogether['id_assessment'] = dfTogether.id_assessment.astype(str)
        dfTogether=dfTogether.dropna()
        figure = px.bar(dfTogether, x="id_assessment", y="score", color="type", barmode="group",
                title="Comparison of Students Scores and Class Averages")
        figure.update_layout(legend_title="")
        return figure
    else:
        #dfSVLE1=dfSVLE[dfSVLE['date']<=day1]
        #dfCode=dfSVLE1[dfSVLE1['code_module']==CourseSection1[:3]]
        #dfCode=dfCode[dfCode['code_presentation']==CourseSection1[4:]]
        dfSVLE1 = svle_with_type_dict[CourseSection1]
        dfCode = dfSVLE1[dfSVLE1['date']<=day1]
        student_count=len(dfCode['id_student'].unique())
        #dfCode=dfCode.replace({"id_site": resource_dict})
        #dfResource=dfCode.groupby(['id_site']).sum()
        dfResource=dfCode.groupby(['activity_type']).sum()
        dfResource['sum_click']=dfResource['sum_click']/student_count
        dfResource=dfResource.reset_index()
        dfStudent=dfCode[dfCode['id_student']==StudentID1]
        #dfStudent=dfStudent.groupby(['id_site']).sum()
        dfStudent=dfStudent.groupby(['activity_type']).sum()
        dfStudent=dfStudent.reset_index()
        dfStudent['type']='Student'
        dfResource['type']='Mean'
        dfTogether1=pd.concat([dfStudent, dfResource], ignore_index=True,axis=0)
        figure2 = px.bar(dfTogether1, x="activity_type", y="sum_click", color="type", barmode="group",
            title="Comparison of Students Resource Usage and Class Averages")
        figure2.update_layout(legend_title="")
        return figure2

# @app.callback(
#     Output('GraphMedian', 'figure'),Input('CourseSection', 'value'),#Input('StudentID', 'value'),
#     Input('student_info_table','active_cell'),
#     Input('student_info_table','derived_viewport_data'),
#     Input('Graph-Type','value'),Input('Day','value'))

# def update_figure(CourseSection1, active_cell, data,type1,day1): 
#     StudentID1 = get_id_student(active_cell, data)
#     if type1=='Assessment':
#         dfA1=dfA[dfA['date']<=day1]
#         dfCourse_Assessments=dfA1[dfA1['Course_Section']==CourseSection1]
#         assessment=list(dfCourse_Assessments['id_assessment'])
#         dictMedian={}
#         for assess in assessment:
#             dfSAMedian=dfSA[dfSA['id_assessment']==assess]
#             dfSAMedian=dfSAMedian.dropna()
#             dictMedian[assess]=np.median(dfSAMedian['score'])
#         dfStudentScore=dfSA[dfSA['id_student']==StudentID1]
#         dfStudentScore=dfStudentScore[['id_assessment','score']]
#         dfStudentScore=dfStudentScore.loc[dfStudentScore['id_assessment'].isin(assessment)]
#         dfMedian = pd.DataFrame(list(dictMedian.items()), columns = ['id_assessment','score'])
#         dfMedian['type']='Median'
#         dfStudentScore['type']='Student'
#         dfTogether=pd.concat([dfStudentScore, dfMedian], ignore_index=True,axis=0)
#         dfTogether['id_assessment'] = dfTogether.id_assessment.astype(str)
#         dfTogether=dfTogether.dropna()
#         figure = px.bar(dfTogether, x="id_assessment", y="score", color="type", barmode="group",
#                 title="Comparison of Students Scores and Class Medians")
#         return figure
#     else:
#         dfSVLE1=dfSVLE[dfSVLE['date']<=day1]
#         dfCode=dfSVLE1[dfSVLE1['code_module']==CourseSection1[:3]]
#         dfCode=dfCode[dfCode['code_presentation']==CourseSection1[4:]]
#         dfCode=dfCode.replace({"id_site": resource_dict})
#         dfClass=dfCode.groupby(['id_site','id_student']).sum()
#         dfClass=dfClass.reset_index()
#         resourceType=dfClass['id_site'].unique()
#         resourceMedian={}
#         for resource in resourceType:
#             dfResourceMedian=dfClass[dfClass['id_site']==resource]
#             resourceMedian[resource]=np.median(dfResourceMedian['sum_click'])
#         dfMedian = pd.DataFrame(list(resourceMedian.items()), columns = ['id_site','sum_click'])
#         dfStudent=dfCode[dfCode['id_student']==StudentID1]
#         dfStudent=dfStudent.groupby(['id_site']).sum()
#         dfStudent=dfStudent.reset_index()
#         dfStudent['type']='Student'
#         dfMedian['type']='Median'
#         dfTogether1=pd.concat([dfStudent, dfMedian], ignore_index=True,axis=0)
#         figure2 = px.bar(dfTogether1, x="id_site", y="sum_click", color="type", barmode="group",
#             title="Comparison of Students Resource Usage and Class Medians")
#         return figure2

# @app.callback(
#     Output('final_table', 'children'),Input('CourseSection', 'value'),Input('Graph-Type','value'),Input('Day','value'))

# def update_figure(CourseSection1, type1, day1):
#     if type1=='Assessment':
#         dfA1=dfA[dfA['date']<=day1]
#         dfCourse_Assessments=dfA1[dfA1['Course_Section']==CourseSection1]
#         assessment=list(dfCourse_Assessments['id_assessment'])
#         trial1=[]
#         for assess in assessment:
#             dfSAScore=dfSA[dfSA['id_assessment']==assess]
#             trial1.append([assess]+list(dfSAScore['score'].describe().round(2)))
#         data1=pd.DataFrame(trial1,columns=['Assessment','Count','Mean','STD','Min','25%','50%','75%','Max'])
#         return [dash_table.DataTable(
#                 id='Summary',
#                 columns=[{"name": i, "id": i} for i in data1.columns],
#                 data=data1.to_dict('records'),
#             )]
#     else:
#         dfSVLE1=dfSVLE[dfSVLE['date']<=day1]
#         dfCode=dfSVLE1[dfSVLE1['code_module']==CourseSection1[:3]]
#         dfCode=dfCode[dfCode['code_presentation']==CourseSection1[4:]]
#         dfCode=dfCode.replace({"id_site": resource_dict})
#         dfClass=dfCode.groupby(['id_site','id_student']).sum()
#         dfClass=dfClass.reset_index()
#         resourceType=dfClass['id_site'].unique()
#         trial2=[]
#         for idSite in resourceType:
#             dfClick=dfClass[dfClass['id_site']==idSite]
#             trial2.append([idSite]+list(dfClick['sum_click'].describe().round(2)))
#         data2=pd.DataFrame(trial2,columns=['Resource','Count','Mean','STD','Min','25%','50%','75%','Max'])
#         return [dash_table.DataTable(
#                 id='Summary',
#                 columns=[{"name": i, "id": i} for i in data2.columns],
#                 data=data2.to_dict('records'),
#             )]

@app.callback(
    [Output('Student_table', 'children'),Output('Student_Comments', 'children')],[Input('CourseSection', 'value'),
    #Input('StudentID', 'value'),
    Input('student_info_table','active_cell'),
    Input('student_info_table','derived_viewport_data'),
    Input('Day','value')])
def update_figure(CourseSection1, active_cell, data, day1):
    if not CourseSection1:
        to_ret= pd.DataFrame([["Select","Course","From","Dropdown!"]], columns=['Assessment/Resource','Student Score/Clicks','Difference (Sum_Click - Median)','Difference (Sum_Click - Mean)'])
        return [dash_table.DataTable(columns=[{"name":i,"id":i} for i in to_ret.columns], data=to_ret.to_dict('records')),None]
    StudentID1 = get_id_student(active_cell, data)
    #dfA1=dfA[dfA['date']<=day1]
    #dfCourse_Assessments=dfA1[dfA1['Course_Section']==CourseSection1]
    #assessment=list(dfCourse_Assessments['id_assessment'])
    #median=[]
    #mean=[]
    #STDEV=[]
    
    # make this faster
    #for assess in assessment:
    #    dfSAMedian=dfSA[dfSA['id_assessment']==assess]
    #    mean.append(np.mean(dfSAMedian['score']).round(2))
    #    STDEV.append(np.std(dfSAMedian['score']).round(2))
    #    dfSAMedian=dfSAMedian.fillna(0)
    #    median.append(np.median(dfSAMedian['score']).round(2))
    
    # attempt to make faster:
    # wants: median, stdev, mean attached to student score dataframe
    
    # student score df is: 
    # df of assessments with data about score, different from median, and difference from mean
    # get list of all assessments (whether or not this student has completed them)
    assessments = df_sa_with_due_dates[(df_sa_with_due_dates['date']<=day1) & (df_sa_with_due_dates['Course_Section']==CourseSection1)]
    assessment_ids = dfA.loc[(dfA['date']<= day1) & (dfA['Course_Section']==CourseSection1),['id_assessment']]
    #students_assessments = df_sa_with_due_dates.loc[(df_sa_with_due_dates['id_student']==StudentID1) & (df_sa_with_due_dates['date_submitted']<=day1),['id_assessment','score']]
    students_assessments = assessments[(assessments['id_student']==StudentID1) &(assessments['date_submitted']<=day1)]
    dfStudentScore = assessment_ids.merge(students_assessments, how='left',left_on='id_assessment',right_on='id_assessment')
    dfStudentScore['score'].fillna(0)
    

    # assessments_mean = np.mean(df_sa_with_due_dates['score'])
    # assessments_median = np.median(df_sa_with_due_dates['score'])
    # assessments_stdev = np.std(df_sa_with_due_dates['score'])
    # assessments.groupby('id_assessment').agg('score':['mean','std','median'])

    assessment_aggs = assessments.groupby('id_assessment').agg({'score':['mean','std','median']})
    #assessment_aggs.columns = ["_".join(a) for a in test_.columns.to_flat_index()]
    assessment_aggs.columns = ['Mean','STDEV','Median']
    assessment_aggs = assessment_aggs.reset_index()

    dfStudentScore = dfStudentScore.merge(assessment_aggs, how='right',left_on='id_assessment',right_on='id_assessment')
    

    # dfStudentScore=dfSA[dfSA['id_student']==StudentID1]
    # dfStudentScore=dfStudentScore[['id_assessment','score']]
    # dfStudentScore=dfStudentScore.loc[dfStudentScore['id_assessment'].isin(assessment)]
    # for assess in assessment:
    #     if dfStudentScore[dfStudentScore['id_assessment'].isin([assess])].empty==True:
    #         dfStudentScore.loc[len(dfStudentScore.index)] = [assess, 0] 
    #dfStudentScore['STDEV']=assessments_stdev
    #dfStudentScore['Mean']=assessments_mean
    #dfStudentScore['Median']=assessments_median
    dfStudentScore=dfStudentScore.fillna(0)
    dfStudentScore['DifferenceMean']=(dfStudentScore['score']-dfStudentScore['Mean']).round(1)
    dfStudentScore['DifferenceMedian']=(dfStudentScore['score']-dfStudentScore['Median']).round(1)
    dfStudentScore=dfStudentScore[['id_assessment','score','DifferenceMedian','DifferenceMean','STDEV']]
    dfStudentScore.columns=['ID','Student','Difference (Sum_Click - Median)','Difference (Sum_Click - Mean)','STDEV']
    dfStudentScore=dfStudentScore.fillna(0)

    # make faster
    #dfSVLE1=dfSVLE[dfSVLE['date']<=day1]
    dfSVLE1 = svle_with_type_dict[CourseSection1]
    dfSVLE1 = dfSVLE1[dfSVLE1['date']<=day1]
    #dfCode=dfSVLE1[dfSVLE1['code_module']==CourseSection1[:3]]
    #dfCode=dfCode[dfCode['code_presentation']==CourseSection1[4:]]
    #dfCode=dfCode.replace({"id_site": resource_dict})
    #dfClass=dfCode.groupby(['id_site','id_student']).sum()
    #dfClass=dfClass.reset_index()
    #resourceType=dfClass['id_site'].unique()
    #resourceMedian={}
    #resourceMean=[]
    #resourceSTDEV=[]
    
    # make faster
    # for resource in resourceType:
    #     dfResourceMedian=dfClass[dfClass['id_site']==resource]
    #     resourceMedian[resource]=(np.median(dfResourceMedian['sum_click']).round(2))
    #     resourceMean.append(np.mean(dfResourceMedian['sum_click']).round(2))
    #     resourceSTDEV.append(np.std(dfResourceMedian['sum_click']).round(2))

    # attempt: use groupby and aggregate functions
    # first sum up by each student and resource type
    svle_student_sums = svle_with_type_dict[CourseSection1][['activity_type','id_student','sum_click']].groupby(['id_student','activity_type']).sum()
    svle_student_sums = svle_student_sums.reset_index()
    svle_aggs = svle_student_sums.groupby('activity_type').agg({'sum_click':['mean','std','median']})
    svle_aggs.columns = ['Mean','STDEV','Median']
    svle_aggs = svle_aggs.reset_index()
    

    #dfStudent = dfSVLE1[dfSVLE1['id_student']==StudentID1]
    #dfStudent = dfStudent[['activity_type','sum_click']].groupby(['activity_type']).sum().reset_index()
    dfStudent = svle_student_sums[svle_student_sums['id_student']==StudentID1]
    dfStudent = svle_aggs.merge(dfStudent, how='left',left_on='activity_type',right_on='activity_type')
    
    #dfStudent=dfCode[dfCode['id_student']==StudentID1]
    #dfStudent=dfStudent.groupby(['id_site']).sum()
    #dfStudent=dfStudent.reset_index()
    #ResourceList=[resourceMedian.keys(),resourceMedian.values()]
    #dfResourceList=pd.DataFrame(ResourceList).T
    #dfResourceList['Mean']=resourceMean
    #dfResourceList['STDEV']=resourceSTDEV
    #dfResourceList.columns=['id_site','Median','Mean','STDEV']
    #dfStudentResource=pd.merge(dfResourceList, dfStudent, on='id_site',how='left')
    #dfStudentResource=dfStudentResource.fillna(0)
    dfStudentResource=dfStudent.fillna(0)[['activity_type','sum_click','Mean','STDEV','Median']]
    #del dfStudentResource['id_student']
    #del dfStudentResource['date']
    dfStudentResource['Median']=(dfStudentResource['sum_click']-dfStudentResource['Median']).round(1)
    dfStudentResource['Mean']=(dfStudentResource['sum_click']-dfStudentResource['Mean']).round(1)
    dfStudentResource=dfStudentResource[['activity_type','sum_click','Median','Mean','STDEV']]
    dfStudentResource.columns=['ID','Student','Difference (Sum_Click - Median)','Difference (Sum_Click - Mean)','STDEV']

    # combine single student assessment and student vle data
    dfTogetherStudent=pd.concat([dfStudentScore, dfStudentResource], ignore_index=True,axis=0)
    count=0
    comment=[]
    text=""
    for thing in dfTogetherStudent['STDEV']:
        if dfTogetherStudent['Difference (Sum_Click - Mean)'][count]>0:
            if dfTogetherStudent['Difference (Sum_Click - Mean)'][count]<=0.5*thing:
                comment.append('Average')
            elif dfTogetherStudent['Difference (Sum_Click - Mean)'][count]>=2*thing:
                comment.append('Well Above Average')
            elif dfTogetherStudent['Difference (Sum_Click - Mean)'][count]>0.5*thing:
                comment.append('Above Average')
            count=count+1
        else:
            if -1*dfTogetherStudent['Difference (Sum_Click - Mean)'][count]<=0.5*thing:
                comment.append('Average')
            elif -1*dfTogetherStudent['Difference (Sum_Click - Mean)'][count]>=2*thing:
                comment.append('Well Below Average')
                if type(dfTogetherStudent['ID'][count])==type(14):
                    text=text+"This student performed Well Below Average on the "+str(dfTogetherStudent['ID'][count])+" assessment.\n"
                    text=text+"It is recommended that the student review the content related to the "+str(dfTogetherStudent['ID'][count])+" assessment, perform extra practice, and make appropriate corrections to their assessment.\n\n"
                else:
                    text=text+"This student's level of access to the "+str(dfTogetherStudent['ID'][count])+" resource was Well Below Average.\n"
                    text=text+"If the student is struggling with the course content, it is recommended that they spend more time accessing "+str(dfTogetherStudent['ID'][count])+".\n\n"

            elif -1*dfTogetherStudent['Difference (Sum_Click - Mean)'][count]>0.5*thing:
                comment.append('Below Average')
                if type(dfTogetherStudent['ID'][count])==type(14):
                    text=text+"This student performed Below Average on the "+str(dfTogetherStudent['ID'][count])+" assessment.\n"
                    text=text+"It is recommended that the student review the content related to the "+str(dfTogetherStudent['ID'][count])+" assessment, perform extra practice, and make appropriate corrections to their assessment.\n\n"
                else:
                    text=text+"This student's level of access to the "+str(dfTogetherStudent['ID'][count])+" resource was Below Average.\n"
                    text=text+"If the student is struggling with the course content, it is recommended that they spend more time accessing "+str(dfTogetherStudent['ID'][count])+".\n\n"            
            count=count+1
    dfTogetherStudent['STDEV']=comment
    dfTogetherStudent.columns=['Assessment/Resource','Student Score/Clicks','Difference (Sum_Click - Median)','Difference (Sum_Click - Mean)','Performance']
    dfTogetherStudent=dfTogetherStudent[['Assessment/Resource','Student Score/Clicks','Difference (Sum_Click - Median)','Difference (Sum_Click - Mean)']]

    return [dash_table.DataTable(
        id='Summary1',
        columns=[{"name": i, "id": i} for i in dfTogetherStudent.columns],
        data=dfTogetherStudent.to_dict('records'),
        )],[dcc.Textarea(
        id='textarea-example',
        value=text,
        style={'width': '100%','maxWidth':'100%', 'height': 300},
        )]


    
    
app.run_server(mode="external", port = 8301,debug=False)

Dash app running on http://127.0.0.1:8301/


In [93]:
#df_sa_with_due_dates[df_sa_with_due_dates.id_student==106247]
df_sa_with_due_dates.loc[(df_sa_with_due_dates['date']<=60) & (df_sa_with_due_dates['Course_Section']=='AAA-2013J')
    &(df_sa_with_due_dates['id_student']==106247),['id_assessment','id_student','score']]

Unnamed: 0,id_assessment,id_student,score
28,1752,106247,67.0
528,1753,106247,66.0


In [45]:
df_test = df_svle_with_type[(df_svle_with_type['code_module']=='AAA')&(df_svle_with_type['code_presentation']=='2013J')]

In [51]:
svle_student_sums_ = svle_with_type_dict['AAA-2013J'][['activity_type','id_student','sum_click']].groupby(['id_student','activity_type']).sum()
svle_student_sums_ = svle_student_sums_.reset_index()
svle_aggs_ = svle_student_sums_.groupby('activity_type').agg({'sum_click':['mean','std','median']})
svle_aggs_.columns = ['Mean','STDEV','Median']
svle_aggs_.reset_index()

Unnamed: 0,activity_type,Mean,STDEV,Median
0,dataplus,8.903382,7.804675,7.0
1,forumng,469.286096,817.459208,162.5
2,glossary,3.758621,8.46328,1.0
3,homepage,371.283069,440.280507,253.5
4,oucollaborate,2.095652,1.90555,1.0
5,oucontent,707.624339,581.241944,572.0
6,resource,19.798387,25.617857,13.0
7,subpage,105.103448,86.619628,84.0
8,url,44.005587,56.281804,25.0
