In [20]:
import numpy as np
import pandas as pd
import datetime

START_TIME = datetime.datetime(2021, 4, 1, 0, 0)
TIME_OHV_LENGTH = 788
EVENT_OHV_LENGTH = 324

keatsLogs = pd.read_csv('../raw_data_sets/KEATS Dataset/KEATS_logs.csv')
finalMarks = pd.read_csv('../raw_data_sets/KEATS Dataset/finalMarksv8.csv')

event_contexts = keatsLogs['Event context']
component = keatsLogs['Component']
event_name = keatsLogs['Event name']
event_time = keatsLogs['Time']
keatsLogs['Time'] = pd.to_datetime(keatsLogs['Time'], dayfirst=True)

UNIQUE_EVENTS = sorted(list((event_contexts + '-' + component + '-'+ event_name).unique()))

def convert_time_to_index(event_time):
    delta = event_time - START_TIME
    hours = (delta.total_seconds())/3600.0
    index = int(hours//6)
    return index

def convert_event_to_index(event):
    if event not in UNIQUE_EVENTS:
        raise Exception("Event", event, "passed doesn't seem to exist in the UNIQUE EVENT array")
    
    return UNIQUE_EVENTS.index(event)    

def get_final_mark(sid):
    
    target = torch.zeros((1,))
    target[0] = int((finalMarks[finalMarks['Id'] == sid ].get('Final') >= 40).item())
    
    return target

def create_record(sid):
    student_activities = keatsLogs[keatsLogs['Id'] == sid]
    if len(student_activities) == 0:
        return None
    student_activities = student_activities.sort_values(by=["Time"]).reset_index(drop=True)
#     print(len(student_activities))
    # For now only taking final mark in consideration
    target = get_final_mark(sid)
    
    ohv_input = np.zeros((len(student_activities), EVENT_OHV_LENGTH+TIME_OHV_LENGTH))
    
    for i, each in student_activities.iterrows():
        event = each['Event context'] + '-' + each['Component'] + '-' + each['Event name']
        event_time = each['Time']
        event_index = convert_event_to_index(event)
        event_time_index = EVENT_OHV_LENGTH + convert_time_to_index(event_time)
        ohv_input[i, event_index] = 1
        ohv_input[i, event_time_index] = 1
    
    return (ohv_input, target)


def create_record_torch(sid):
    student_activities = keatsLogs[keatsLogs['Id'] == sid]
    if len(student_activities) == 0:
        return None
    student_activities = student_activities.sort_values(by=["Time"]).reset_index(drop=True)
#     print(len(student_activities))
    # For now only taking final mark in consideration
    target = get_final_mark(sid)
    
    ohv_input = torch.zeros((len(student_activities), EVENT_OHV_LENGTH+TIME_OHV_LENGTH))
    
    for i, each in student_activities.iterrows():
        event = each['Event context'] + '-' + each['Component'] + '-' + each['Event name']
        event_time = each['Time']
        event_index = convert_event_to_index(event)
        event_time_index = EVENT_OHV_LENGTH + convert_time_to_index(event_time)
        ohv_input[i, event_index] = 1
        ohv_input[i, event_time_index] = 1
    
    return (ohv_input, target)

def create_bow_record(sid):
    student_activities = keatsLogs[keatsLogs['Id'] == sid]
    
#     if len(student_activities) == 0:
#         return None
    
    target = get_final_mark(sid)
    
    bow = np.zeros((EVENT_OHV_LENGTH,))
    
    for i, each in student_activities.iterrows():    
        event = each['Event context'] + '-' + each['Component'] + '-' + each['Event name']
        event_index = convert_event_to_index(event)
        bow[event_index] +=  1
    
    return (bow, target)
    

def create_dataset():
    
    ids = finalMarks['Id'].unique()
    
    ohvs = []
    targets = []
    for i in ids:
        step = create_record(i)
       
        if step is not None:
            ohv, y = step
            ohvs.append(ohv)
            targets.append(y)
            
    ohvs = np.stack(ohvs, axis=0)
    targets = np.concatenate(targets, axis=0)
    
    return ohvs, targets

def create_dataset_stack():
    
    ids = finalMarks['Id'].unique()
    
    ohvs = []
    targets = []
    max_t_step = 0
    for i in ids:
        step = create_record_torch(i)
           
        if step is not None:
            ohv, y = step
            ohvs.append(ohv)
            if ohv.size(0) > max_t_step:
                max_t_step = ohv.size(0)
            targets.append(y)
            
    inputs = torch.zeros((max_t_step, len(ohvs), ohvs[0].size(1)))
    for i, ohv in enumerate(ohvs):
        inputs[:ohv.size(0), i, :] = ohv[:, :]
    targets = torch.cat(targets, axis=0)
    
    return inputs, targets
    
def create_bow_dataset():
    
    ids = finalMarks['Id'].unique()
    
    bows = []
    targets = []
    for sid in ids:
        step = create_bow_record(sid)
        if step is not None:
            bows.append(step[0])
            targets.append(step[1])
    
    bows = np.stack(bows, axis=0)
    targets = np.concatenate(targets, axis=0)
    
    return bows, targets

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F



class GritNet(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, input_size, target_size):
        
        super(GritNet, self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(1112, 2048)

        self.blstm = nn.LSTM(2048, 128, bidirectional=True)

        self.dense = nn.Linear(hidden_dim*2, target_size)
    
    def forward(self, x):
        embed = self.embedding(x)
        h0 = torch.zeros(2, embed.size(1), self.hidden_dim)
        c0 = torch.zeros(2, embed.size(1), self.hidden_dim)

        blstm_output, _ = self.blstm(embed, (h0, c0))

        gmp_output = torch.max(blstm_output, 0)

        return nn.Sigmoid(self.dense(gmp_output))



ModuleNotFoundError: No module named 'torch'

In [28]:
inputs, targets = create_dataset_stack()

In [29]:
inputs.shape

torch.Size([823, 415, 1112])

In [30]:
targets.shape

torch.Size([415])

In [31]:
grit_net_model = GritNet(1024, 128, (None, 32, 1024), 1)

TypeError: empty(): argument 'size' must be tuple of ints, but found element of type tuple at pos 2