In [61]:
import numpy as np
import random
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score
from imblearn.ensemble import BalancedRandomForestClassifier

In [62]:
def multiscale_reg_ext(seq, num_scale, stepsize, min_seq_length, overlap):
    if not isinstance(seq, np.ndarray):
        seq = np.array(seq)    
    seq[seq != 0] = 1;
    seq = seq.astype(int)  
    nz_index = np.argwhere(seq != 0)
    reg_sub_seq = []
    for i in range(0,num_scale):
        ## None zero sub_sequence extraction
        sub_seq = []        
        sub_seq_length = min_seq_length + i * stepsize
        buf_reg_feature = np.zeros(2 ** sub_seq_length - 1) ## will not count the number of zero subsequences.
        for j in nz_index:
            if overlap == 1:
                for k in range(int(j - sub_seq_length + 1), int(j + 1)):
                    buf_sub_seq = list(seq[k : k + sub_seq_length])
                    if len(buf_sub_seq) >= sub_seq_length:
                        sub_seq.append(list(seq[k : k + sub_seq_length]))
            else:
                k = int(j / sub_seq_length)
                sub_seq.append(list(seq[k * sub_seq_length : (k + 1) * sub_seq_length]))
        
        ## Removing unique sub_sequence
        for buf_sub_seq in sub_seq:
            num_seb_seq = sub_seq.count(buf_sub_seq)
            if num_seb_seq == 1:
                sub_seq.remove(buf_sub_seq)
            else:
                index = int(''.join(str(k) for k in buf_sub_seq),2) ## Will not be 0 because all zero subsequences have been removed.
                buf_reg_feature[index - 1] += 1
        
        reg_sub_seq.append(sub_seq)
                
        if i == 0:
            reg_feature = buf_reg_feature
        else:
            reg_feature = np.append(reg_feature,buf_reg_feature)
        
    reg_feature = list(reg_feature)    
                 
    return reg_feature

In [64]:
data_path_list = [
    'DR0008_activity_accumulator_2016_09.csv',
    'DR0008_activity_accumulator_2016-10.csv',
    'DR0008_activity_accumulator_2016-11.csv',
    'DR0008_activity_accumulator_2016-12.csv'
]

def create_day_seq(days, length):

    tmp_dict = {}
    for day in days:
        try:
            tmp_dict[day] += 1
        except:
            tmp_dict[day] = 1
    res = [0]*(length+1)
    for k,v in tmp_dict.items():
        res[k] = v
    return res

def extract_function_seq(data_path, function, month='9', within_day=False):
    df                   = pd.read_csv(data_path, sep='\t')
    df_temp              = df[df['event_type'] == function][['De-id', 'timestamp']]
    df_temp['timestamp'] = df_temp['timestamp'].apply(pd.to_datetime)
    df_temp['day']       = df_temp['timestamp'].apply(lambda x: x.day)
    df_day_list          = df_temp[['De-id', 'day']].groupby('De-id').agg(create_day_seq, length=df_temp['day'].nunique()).reset_index()
    df_day_list.columns  = ['De-id', month + '_day_list']
    return df_day_list

def get_all_seq(data_path_list, function):
    first_flag = 1
    for data_path in data_path_list:
        df_day_list = extract_function_seq(data_path, function, data_path.split('.')[0][-2:])
        if first_flag:
            df_all = df_day_list.copy()
            first_flag = 0
        else:
            df_all = pd.merge(df_all, df_day_list, on='De-id', how='left')

    df_all['09_day_list'] = df_all['09_day_list'].fillna(0)
    df_all['10_day_list'] = df_all['10_day_list'].fillna(0)
    df_all['11_day_list'] = df_all['11_day_list'].fillna(0)
    df_all['12_day_list'] = df_all['12_day_list'].fillna(0)

    df_all['09_day_list'] = df_all['09_day_list'].apply(lambda x: [0]*31 if x == 0 else x)
    df_all['10_day_list'] = df_all['10_day_list'].apply(lambda x: [0]*32 if x == 0 else x)
    df_all['11_day_list'] = df_all['11_day_list'].apply(lambda x: [0]*31 if x == 0 else x)
    df_all['12_day_list'] = df_all['12_day_list'].apply(lambda x: [0]*32 if x == 0 else x)

    df_all['total_list_' + function]  = df_all.apply(lambda row: row['09_day_list'][1:] +  row['10_day_list'][1:]
                                           + row['11_day_list'][1:] +  row['12_day_list'][1:], axis=1)
    return df_all

def add_at_risk_label(df_all):
    at_rsk_label            = pd.read_csv('Std_list_atRist_2016_se1.csv')
    at_rsk_label['at_risk'] = at_rsk_label['CUM_GPA'].apply(lambda x: '1' if x <= 2.0 else '0')
    at_rsk_label.columns    = ['De-id', 'CUM_GPA', 'at_risk']
    df_all                  = pd.merge(df_all, at_rsk_label, on='De-id', how='left')
    df_all['at_risk']       = df_all['at_risk'].fillna('0')
    return df_all

def get_funtions_seq(df_temp=None, function=None):
    if df_temp is None:
        df_temp = get_all_seq(data_path_list, function)
    
    df_temp = df_temp.rename(columns={'De-id':'MASKED_STUDENT_ID'})
    return df_temp

In [13]:
df_se1 = pd.read_csv('2016_se1_lib_lms.csv')
df_se1.head()
df_se1_features = df_se1['MASKED_STUDENT_ID']
print(df_se1_features.shape)

lms_functions = ['COURSE_ACCESS', 'PAGE_ACCESS', 'LOGIN_ATTEMPT', 'SESSION_TIMEOUT', 'LOGOUT']
index = 0

for fun in lms_functions:
    print('getting seq of', fun, '...')
    df = get_funtions_seq(function=fun)
    features = [i for i in list(df.columns) if i.endswith(fun)]
    features.append('MASKED_STUDENT_ID')
    df_se1_features = pd.merge(df_se1_features, df[features], on='MASKED_STUDENT_ID', how='left').fillna(0)
    print(df_se1_features.shape)

(15503,)
getting seq of COURSE_ACCESS ...
(15503, 2)
getting seq of PAGE_ACCESS ...
(15503, 3)
getting seq of LOGIN_ATTEMPT ...
(15503, 4)
getting seq of SESSION_TIMEOUT ...
(15503, 5)
getting seq of LOGOUT ...
(15503, 6)


In [29]:
df_se1_features.to_csv('se1_LMS_function_seq.csv')

In [82]:
df_se1_features_list = df_se1_features[[i for i in df_se1_features if not i.startswith('reg_')]]
df_se1_features_list.head()

Unnamed: 0,MASKED_STUDENT_ID,total_list_COURSE_ACCESS,total_list_PAGE_ACCESS,total_list_LOGIN_ATTEMPT,total_list_SESSION_TIMEOUT,total_list_LOGOUT
0,8TMIKVZ5,"[0, 0, 0, 3, 4, 0, 0, 6, 0, 0, 0, 0, 15, 0, 85...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, ...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,N2YYKTMZ,"[0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 3, 8, 8, 27...","[0, 0, 0, 0, 0, 0, 2, 7, 0, 0, 0, 5, 1, 1, 4, ...","[0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, ...","[0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, ..."
2,BZRW4GD3,"[0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 21, 11, 0, ...","[0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 22, 10, 3, ...","[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 2, 1, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 1, 0, ...","[0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
3,HJTBF62Q,"[10, 18, 0, 19, 6, 7, 15, 0, 0, 0, 0, 10, 79, ...","[0, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 11, 1, 1,...","[3, 6, 0, 2, 9, 3, 3, 0, 0, 0, 0, 11, 5, 5, 1,...","[2, 3, 0, 1, 4, 1, 1, 0, 0, 0, 0, 3, 1, 2, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ..."
4,33XUIDIG,"[0, 0, 21, 0, 23, 0, 1, 9, 48, 0, 0, 0, 0, 0, ...","[0, 0, 22, 0, 7, 4, 5, 2, 4, 0, 0, 0, 0, 7, 0,...","[0, 0, 1, 0, 2, 1, 2, 2, 3, 0, 0, 0, 0, 2, 0, ...","[0, 0, 1, 0, 1, 1, 1, 2, 3, 1, 0, 0, 0, 1, 0, ...","[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."


In [99]:
df_se1_features_list = df_se1_features[[i for i in df_se1_features if not i.startswith('reg_')]]
df_se1_features_list.head()


year = 2016
semester = 1

num_scale = 4
stepsize = 1
min_seq_length = 1
overlap = 0;

if semester == 2:
    year_libData = year + 1
    semester_startMonth = '02'
    semester_endMonth = '05'
else:
    year_libData = year
    semester_startMonth = '09'
    semester_endMonth = '12'
    
df_se1_features_list['reg_'+ str(num_scale) +'_COURSE_ACCESS'] = df_se1_features_list['total_list_COURSE_ACCESS'].apply(
    lambda x: multiscale_reg_ext(x, num_scale, stepsize, min_seq_length, overlap))

df_se1_features_list['reg_'+ str(num_scale) +'_PAGE_ACCESS'] = df_se1_features_list['total_list_PAGE_ACCESS'].apply(
    lambda x: multiscale_reg_ext(x, num_scale, stepsize, min_seq_length, overlap))

df_se1_features_list['reg_'+ str(num_scale) +'_COURSE_ACCESS'] = df_se1_features_list['total_list_COURSE_ACCESS'].apply(
    lambda x: multiscale_reg_ext(x, num_scale, stepsize, min_seq_length, overlap))

df_se1_features_list['reg_'+ str(num_scale) +'_LOGIN_ATTEMPT'] = df_se1_features_list['total_list_LOGIN_ATTEMPT'].apply(
    lambda x: multiscale_reg_ext(x, num_scale, stepsize, min_seq_length, overlap))

df_se1_features_list['reg_'+ str(num_scale) +'_SESSION_TIMEOUT'] = df_se1_features_list['total_list_SESSION_TIMEOUT'].apply(
    lambda x: multiscale_reg_ext(x, num_scale, stepsize, min_seq_length, overlap))


df_lib_seq = pd.read_csv('Std_Lib_sequence_day_2016_se1.csv')
df_lib_seq = df_lib_seq[['0','1']]
df_lib_seq = df_lib_seq.rename(columns= {'0': 'MASKED_STUDENT_ID', '1':'lib_total_list'})
def split_str(s1):
    try:
        s = s1[2:-2].split()
        return [float(i) for i in s]
    except:
        return 0
df_lib_seq['lib_total_list'] = df_lib_seq['lib_total_list'].apply(split_str)
df_lib_seq['reg_'+ str(num_scale) +'_LIB'] = df_lib_seq['lib_total_list'].apply(
    lambda x: multiscale_reg_ext(x, num_scale, stepsize, min_seq_length, overlap))

df_se1_features_all = pd.merge(df_se1_features_list, df_lib_seq, on='MASKED_STUDENT_ID', how='right')
df_se1_features_all.to_csv('Se1_seq_feature_scale_'+str(num_scale)+'.csv')
df_se1_features_all.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

Unnamed: 0,MASKED_STUDENT_ID,total_list_COURSE_ACCESS,total_list_PAGE_ACCESS,total_list_LOGIN_ATTEMPT,total_list_SESSION_TIMEOUT,total_list_LOGOUT,reg_4_COURSE_ACCESS,reg_4_PAGE_ACCESS,reg_4_LOGIN_ATTEMPT,reg_4_SESSION_TIMEOUT,lib_total_list,reg_4_LIB
0,8TMIKVZ5,"[0, 0, 0, 3, 4, 0, 0, 6, 0, 0, 0, 0, 15, 0, 85...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, ...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[87.0, 6.0, 9.0, 72.0, 2.0, 0.0, 8.0, 0.0, 5.0...","[88.0, 6.0, 8.0, 74.0, 2.0, 0.0, 8.0, 0.0, 3.0...","[88.0, 6.0, 8.0, 74.0, 2.0, 0.0, 8.0, 0.0, 3.0...","[84.0, 6.0, 10.0, 68.0, 3.0, 2.0, 4.0, 0.0, 10...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[18.0, 6.0, 8.0, 4.0, 5.0, 5.0, 0.0, 4.0, 2.0,..."
1,OZ6FIGHH,"[0, 0, 0, 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, ...","[0, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 0, 3, ...","[0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, ...","[0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[35.0, 10.0, 9.0, 16.0, 5.0, 4.0, 6.0, 4.0, 0....","[34.0, 10.0, 10.0, 14.0, 5.0, 4.0, 8.0, 4.0, 0...","[35.0, 10.0, 9.0, 16.0, 5.0, 4.0, 6.0, 4.0, 0....","[34.0, 9.0, 9.0, 16.0, 5.0, 5.0, 6.0, 4.0, 0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[36.0, 7.0, 7.0, 22.0, 4.0, 4.0, 10.0, 3.0, 0...."
2,QSGBC7CZ,"[6, 0, 21, 0, 0, 0, 0, 0, 0, 4, 8, 35, 27, 0, ...","[1, 0, 11, 0, 0, 0, 0, 2, 0, 1, 4, 9, 12, 3, 2...","[1, 0, 2, 0, 0, 0, 0, 1, 0, 1, 1, 4, 3, 1, 1, ...","[1, 0, 2, 0, 0, 0, 0, 1, 0, 1, 1, 3, 3, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[87.0, 9.0, 10.0, 68.0, 0.0, 0.0, 4.0, 2.0, 15...","[91.0, 10.0, 11.0, 70.0, 0.0, 2.0, 2.0, 0.0, 1...","[91.0, 10.0, 11.0, 70.0, 0.0, 2.0, 2.0, 0.0, 1...","[92.0, 12.0, 8.0, 72.0, 0.0, 2.0, 6.0, 0.0, 10...","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[22.0, 8.0, 8.0, 6.0, 4.0, 3.0, 6.0, 3.0, 0.0,..."
3,EIC4AO9Q,"[0, 0, 0, 10, 4, 0, 0, 26, 3, 7, 9, 32, 0, 24,...","[0, 0, 0, 1, 10, 0, 0, 6, 3, 5, 2, 12, 0, 8, 0...","[0, 0, 0, 1, 1, 0, 0, 3, 2, 2, 1, 3, 0, 3, 0, ...","[0, 0, 0, 0, 1, 0, 0, 1, 4, 1, 1, 3, 1, 2, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[73.0, 17.0, 14.0, 42.0, 3.0, 0.0, 16.0, 0.0, ...","[73.0, 17.0, 14.0, 42.0, 4.0, 0.0, 14.0, 3.0, ...","[75.0, 16.0, 15.0, 44.0, 4.0, 0.0, 14.0, 2.0, ...","[71.0, 15.0, 16.0, 40.0, 4.0, 4.0, 16.0, 0.0, ...","[0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, ...","[42.0, 11.0, 13.0, 18.0, 7.0, 4.0, 8.0, 5.0, 6..."
4,S9BIH11O,"[0, 0, 18, 0, 29, 21, 17, 11, 5, 2, 11, 25, 30...","[0, 0, 4, 0, 13, 7, 9, 7, 5, 3, 5, 14, 14, 2, ...","[0, 0, 2, 0, 2, 1, 4, 2, 2, 1, 1, 2, 3, 2, 0, ...","[0, 0, 1, 1, 2, 1, 3, 2, 2, 2, 0, 2, 3, 3, 0, ...",0,"[102.0, 3.0, 5.0, 94.0, 2.0, 0.0, 2.0, 0.0, 4....","[107.0, 7.0, 4.0, 96.0, 0.0, 0.0, 2.0, 0.0, 8....","[107.0, 7.0, 4.0, 96.0, 0.0, 0.0, 2.0, 0.0, 8....","[103.0, 9.0, 4.0, 90.0, 2.0, 0.0, 6.0, 0.0, 9....","[0.0, 3.0, 1.0, 0.0, 6.0, 2.0, 1.0, 1.0, 3.0, ...","[106.0, 8.0, 6.0, 92.0, 2.0, 0.0, 8.0, 0.0, 2...."


Unnamed: 0,MASKED_STUDENT_ID,total_list_COURSE_ACCESS,total_list_PAGE_ACCESS,total_list_LOGIN_ATTEMPT,total_list_SESSION_TIMEOUT,total_list_LOGOUT,reg_3_COURSE_ACCESS,reg_3_PAGE_ACCESS,reg_3_LOGIN_ATTEMPT,reg_3_SESSION_TIMEOUT,lib_total_list,reg_3_LIB
0,8TMIKVZ5,"[0, 0, 0, 3, 4, 0, 0, 6, 0, 0, 0, 0, 15, 0, 85...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, ...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[87.0, 6.0, 9.0, 72.0, 2.0, 0.0, 8.0, 0.0, 5.0...","[88.0, 6.0, 8.0, 74.0, 2.0, 0.0, 8.0, 0.0, 3.0...","[88.0, 6.0, 8.0, 74.0, 2.0, 0.0, 8.0, 0.0, 3.0...","[84.0, 6.0, 10.0, 68.0, 3.0, 2.0, 4.0, 0.0, 10...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[18.0, 6.0, 8.0, 4.0, 5.0, 5.0, 0.0, 4.0, 2.0,..."
1,OZ6FIGHH,"[0, 0, 0, 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, ...","[0, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 0, 3, ...","[0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, ...","[0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[35.0, 10.0, 9.0, 16.0, 5.0, 4.0, 6.0, 4.0, 0....","[34.0, 10.0, 10.0, 14.0, 5.0, 4.0, 8.0, 4.0, 0...","[35.0, 10.0, 9.0, 16.0, 5.0, 4.0, 6.0, 4.0, 0....","[34.0, 9.0, 9.0, 16.0, 5.0, 5.0, 6.0, 4.0, 0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[36.0, 7.0, 7.0, 22.0, 4.0, 4.0, 10.0, 3.0, 0...."
2,QSGBC7CZ,"[6, 0, 21, 0, 0, 0, 0, 0, 0, 4, 8, 35, 27, 0, ...","[1, 0, 11, 0, 0, 0, 0, 2, 0, 1, 4, 9, 12, 3, 2...","[1, 0, 2, 0, 0, 0, 0, 1, 0, 1, 1, 4, 3, 1, 1, ...","[1, 0, 2, 0, 0, 0, 0, 1, 0, 1, 1, 3, 3, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[87.0, 9.0, 10.0, 68.0, 0.0, 0.0, 4.0, 2.0, 15...","[91.0, 10.0, 11.0, 70.0, 0.0, 2.0, 2.0, 0.0, 1...","[91.0, 10.0, 11.0, 70.0, 0.0, 2.0, 2.0, 0.0, 1...","[92.0, 12.0, 8.0, 72.0, 0.0, 2.0, 6.0, 0.0, 10...","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[22.0, 8.0, 8.0, 6.0, 4.0, 3.0, 6.0, 3.0, 0.0,..."
3,EIC4AO9Q,"[0, 0, 0, 10, 4, 0, 0, 26, 3, 7, 9, 32, 0, 24,...","[0, 0, 0, 1, 10, 0, 0, 6, 3, 5, 2, 12, 0, 8, 0...","[0, 0, 0, 1, 1, 0, 0, 3, 2, 2, 1, 3, 0, 3, 0, ...","[0, 0, 0, 0, 1, 0, 0, 1, 4, 1, 1, 3, 1, 2, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[73.0, 17.0, 14.0, 42.0, 3.0, 0.0, 16.0, 0.0, ...","[73.0, 17.0, 14.0, 42.0, 4.0, 0.0, 14.0, 3.0, ...","[75.0, 16.0, 15.0, 44.0, 4.0, 0.0, 14.0, 2.0, ...","[71.0, 15.0, 16.0, 40.0, 4.0, 4.0, 16.0, 0.0, ...","[0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, ...","[42.0, 11.0, 13.0, 18.0, 7.0, 4.0, 8.0, 5.0, 6..."
4,S9BIH11O,"[0, 0, 18, 0, 29, 21, 17, 11, 5, 2, 11, 25, 30...","[0, 0, 4, 0, 13, 7, 9, 7, 5, 3, 5, 14, 14, 2, ...","[0, 0, 2, 0, 2, 1, 4, 2, 2, 1, 1, 2, 3, 2, 0, ...","[0, 0, 1, 1, 2, 1, 3, 2, 2, 2, 0, 2, 3, 3, 0, ...",0,"[102.0, 3.0, 5.0, 94.0, 2.0, 0.0, 2.0, 0.0, 4....","[107.0, 7.0, 4.0, 96.0, 0.0, 0.0, 2.0, 0.0, 8....","[107.0, 7.0, 4.0, 96.0, 0.0, 0.0, 2.0, 0.0, 8....","[103.0, 9.0, 4.0, 90.0, 2.0, 0.0, 6.0, 0.0, 9....","[0.0, 3.0, 1.0, 0.0, 6.0, 2.0, 1.0, 1.0, 3.0, ...","[106.0, 8.0, 6.0, 92.0, 2.0, 0.0, 8.0, 0.0, 2...."


In [97]:
df_se1_features_all.head()

Unnamed: 0,MASKED_STUDENT_ID,total_list_COURSE_ACCESS,total_list_PAGE_ACCESS,total_list_LOGIN_ATTEMPT,total_list_SESSION_TIMEOUT,total_list_LOGOUT,reg_3_COURSE_ACCESS,reg_3_PAGE_ACCESS,reg_3_LOGIN_ATTEMPT,reg_3_SESSION_TIMEOUT,lib_total_list,reg_3_LIB
0,8TMIKVZ5,"[0, 0, 0, 3, 4, 0, 0, 6, 0, 0, 0, 0, 15, 0, 85...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 1, ...","[0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 2, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[87.0, 6.0, 9.0, 72.0, 2.0, 0.0, 8.0, 0.0, 5.0...","[88.0, 6.0, 8.0, 74.0, 2.0, 0.0, 8.0, 0.0, 3.0...","[88.0, 6.0, 8.0, 74.0, 2.0, 0.0, 8.0, 0.0, 3.0...","[84.0, 6.0, 10.0, 68.0, 3.0, 2.0, 4.0, 0.0, 10...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, ...","[18.0, 6.0, 8.0, 4.0, 5.0, 5.0, 0.0, 4.0, 2.0,..."
1,OZ6FIGHH,"[0, 0, 0, 0, 0, 0, 5, 8, 0, 0, 0, 0, 0, 0, 5, ...","[0, 0, 0, 0, 0, 0, 3, 5, 0, 0, 0, 0, 0, 0, 3, ...","[0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, ...","[0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 2, ...","[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...","[35.0, 10.0, 9.0, 16.0, 5.0, 4.0, 6.0, 4.0, 0....","[34.0, 10.0, 10.0, 14.0, 5.0, 4.0, 8.0, 4.0, 0...","[35.0, 10.0, 9.0, 16.0, 5.0, 4.0, 6.0, 4.0, 0....","[34.0, 9.0, 9.0, 16.0, 5.0, 5.0, 6.0, 4.0, 0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[36.0, 7.0, 7.0, 22.0, 4.0, 4.0, 10.0, 3.0, 0...."
2,QSGBC7CZ,"[6, 0, 21, 0, 0, 0, 0, 0, 0, 4, 8, 35, 27, 0, ...","[1, 0, 11, 0, 0, 0, 0, 2, 0, 1, 4, 9, 12, 3, 2...","[1, 0, 2, 0, 0, 0, 0, 1, 0, 1, 1, 4, 3, 1, 1, ...","[1, 0, 2, 0, 0, 0, 0, 1, 0, 1, 1, 3, 3, 1, 1, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...","[87.0, 9.0, 10.0, 68.0, 0.0, 0.0, 4.0, 2.0, 15...","[91.0, 10.0, 11.0, 70.0, 0.0, 2.0, 2.0, 0.0, 1...","[91.0, 10.0, 11.0, 70.0, 0.0, 2.0, 2.0, 0.0, 1...","[92.0, 12.0, 8.0, 72.0, 0.0, 2.0, 6.0, 0.0, 10...","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[22.0, 8.0, 8.0, 6.0, 4.0, 3.0, 6.0, 3.0, 0.0,..."
3,EIC4AO9Q,"[0, 0, 0, 10, 4, 0, 0, 26, 3, 7, 9, 32, 0, 24,...","[0, 0, 0, 1, 10, 0, 0, 6, 3, 5, 2, 12, 0, 8, 0...","[0, 0, 0, 1, 1, 0, 0, 3, 2, 2, 1, 3, 0, 3, 0, ...","[0, 0, 0, 0, 1, 0, 0, 1, 4, 1, 1, 3, 1, 2, 0, ...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[73.0, 17.0, 14.0, 42.0, 3.0, 0.0, 16.0, 0.0, ...","[73.0, 17.0, 14.0, 42.0, 4.0, 0.0, 14.0, 3.0, ...","[75.0, 16.0, 15.0, 44.0, 4.0, 0.0, 14.0, 2.0, ...","[71.0, 15.0, 16.0, 40.0, 4.0, 4.0, 16.0, 0.0, ...","[0.0, 2.0, 1.0, 0.0, 2.0, 2.0, 1.0, 1.0, 0.0, ...","[42.0, 11.0, 13.0, 18.0, 7.0, 4.0, 8.0, 5.0, 6..."
4,S9BIH11O,"[0, 0, 18, 0, 29, 21, 17, 11, 5, 2, 11, 25, 30...","[0, 0, 4, 0, 13, 7, 9, 7, 5, 3, 5, 14, 14, 2, ...","[0, 0, 2, 0, 2, 1, 4, 2, 2, 1, 1, 2, 3, 2, 0, ...","[0, 0, 1, 1, 2, 1, 3, 2, 2, 2, 0, 2, 3, 3, 0, ...",0,"[102.0, 3.0, 5.0, 94.0, 2.0, 0.0, 2.0, 0.0, 4....","[107.0, 7.0, 4.0, 96.0, 0.0, 0.0, 2.0, 0.0, 8....","[107.0, 7.0, 4.0, 96.0, 0.0, 0.0, 2.0, 0.0, 8....","[103.0, 9.0, 4.0, 90.0, 2.0, 0.0, 6.0, 0.0, 9....","[0.0, 3.0, 1.0, 0.0, 6.0, 2.0, 1.0, 1.0, 3.0, ...","[106.0, 8.0, 6.0, 92.0, 2.0, 0.0, 8.0, 0.0, 2...."
