In [1]:
import numpy as np
import pandas as pd

def int_handle_cnt(internel_handle_list, df_int_handle, name):
    df_temp = df_int_handle[df_int_handle['internal_handle'].isin(internel_handle_list)]
    df_temp = df_temp.groupby(['De-id']).count().reset_index('De-id')
    df_temp.columns = ['De-id', PRE_FIX + name]
    return df_temp

def extract_one_month(df, PRE_FIX):
    df_t = df[(df['event_type']=='PAGE_ACCESS') |
              (df['event_type']=='COURSE_ACCESS') |
              (df['event_type']=='LOGIN_ATTEMPT') |
              (df['event_type']=='SESSION_TIMEOUT') |
              (df['event_type']=='LOGOUT')]
    df_t = df_t[['De-id', 'event_type', 'course_id', 'internal_handle', 'timestamp']]

    df_evt = df_t[['De-id', 'event_type']]
    df_login = df_evt[df_evt['event_type'] == 'LOGIN_ATTEMPT'].groupby(['De-id']).count().reset_index('De-id')
    df_login.columns = ['De-id', PRE_FIX + 'LOGIN_ATTEMPT']

    df_se_out = df_evt[df_evt['event_type'] == 'SESSION_TIMEOUT'].groupby(['De-id']).count().reset_index('De-id')
    df_se_out.columns = ['De-id', PRE_FIX + 'SESSION_TIMEOUT']

    df_logout = df_evt[df_evt['event_type'] == 'LOGOUT'].groupby(['De-id']).count().reset_index('De-id')
    df_logout.columns = ['De-id', PRE_FIX + 'LOGOUT']

    df_all = df_login
    df_all = pd.merge(df_all, df_se_out, on='De-id', how='left')
    df_all = pd.merge(df_all, df_logout, on='De-id', how='left')

    df_int_handle = df_t[['De-id', 'internal_handle']]

    group_list        = ['groups', 'cp_group_create_self_groupmem', 'group_file', 'group_file', 'group_forum', 'groups_sign_up', 'agroup', 'group_blogs','group_task_create', 'group_task_view','cp_group_edit_self_groupmem','group_file_add', 'group_email', 'cp_groups', 'cp_groups_settings','edit_group_blog_entry', 'db_forum_collection_group', 'group_tasks', 'group_journal','group_virtual_classroom', 'add_group_journal_entry','email_all_groups', 'edit_group_journal_entry', 'email_select_groups', 'add_group_blog_entry']
    db_list           = ['discussion_board_entry', 'db_thread_list_entry', 'discussion_board', 'db_thread_list','db_collection', 'db_collection_group', 'db_collection_entry', 'db_thread_list_group']
    myinfo_list       = ['my_inst_personal_info', 'my_inst_personal_settings','my_inst_personal_edit', 'my_inst_myplaces_settings','my_tasks', 'my_task_create', 'my_email_courses','my_task_view', 'my_announcements']
    course_list       = ['course_tools_area', 'course_task_view', 'enroll_course', 'classic_course_catalog']
    journal_list      = ['journal', 'journal_view', 'view_draft_journal_entry',  'add_journal_entry', 'edit_journal_entry']
    email_list        = ['send_email', 'email_all_instructors', 'email_all_students', 'email_select_students','email_all_users',  'email_select_groups','email_all_groups']
    staff_list        = ['staff_information', 'cp_staff_information']
    annoucements_list = ['my_announcements', 'announcements_entry', 'announcements', 'cp_announcements']
    content_list      = ['content', 'cp_content']
    grade_list        = ['check_grade']

    df_group        = int_handle_cnt(group_list, df_int_handle, 'group')
    df_db           = int_handle_cnt(db_list, df_int_handle, 'db')
    df_myinfo       = int_handle_cnt(myinfo_list, df_int_handle, 'myinfo')
    df_course       = int_handle_cnt(course_list, df_int_handle, 'course')
    df_journal      = int_handle_cnt(journal_list, df_int_handle, 'journal')
    df_email        = int_handle_cnt(email_list, df_int_handle, 'email')
    df_staff        = int_handle_cnt(staff_list, df_int_handle, 'staff')
    df_annoucements = int_handle_cnt(annoucements_list, df_int_handle, 'annoucements')
    df_content      = int_handle_cnt(content_list, df_int_handle, 'content')
    df_grade        = int_handle_cnt(grade_list, df_int_handle, 'grade')

    dfs = [df_group, df_db, df_myinfo, df_course, df_journal, df_email, df_staff, df_annoucements, df_content, df_grade]

    for df in dfs:
        df_all = pd.merge(df_all, df, on='De-id', how='left')   

    df_all = df_all.rename(columns={'De-id':'MASKED_STUDENT_ID'})
    return df_all



In [7]:
lib_se1 = pd.read_csv('Std_Lib_features_2016_se1.csv')
df_se1 = lib_se1

In [8]:
df = pd.read_csv('DR0008_activity_accumulator_2016_09.csv', sep='\t')
PRE_FIX = '09_'
df_one_month = extract_one_month(df, PRE_FIX)
df_se1 = pd.merge(df_se1, df_one_month, on = ['MASKED_STUDENT_ID'], how='left').fillna(0)
del df
del df_one_month

df = pd.read_csv('DR0008_activity_accumulator_2016-10.csv', sep='\t')
PRE_FIX = '10_'
df_one_month = extract_one_month(df, PRE_FIX)
df_se1 = pd.merge(df_se1, df_one_month, on = ['MASKED_STUDENT_ID'], how='left').fillna(0)
del df
del df_one_month

df = pd.read_csv('DR0008_activity_accumulator_2016-11.csv', sep='\t')
PRE_FIX = '11_'
df_one_month = extract_one_month(df, PRE_FIX)
df_se1 = pd.merge(df_se1, df_one_month, on = ['MASKED_STUDENT_ID'], how='left').fillna(0)
del df
del df_one_month

df = pd.read_csv('DR0008_activity_accumulator_2016-12.csv', sep='\t')
PRE_FIX = '12_'
df_one_month = extract_one_month(df, PRE_FIX)
df_se1 = pd.merge(df_se1, df_one_month, on = ['MASKED_STUDENT_ID'], how='left').fillna(0)
del df
del df_one_month

In [12]:
# label_atRist
df_se1.to_csv('2016_se1_lib_lms.csv', index=False)