In [5]:
import models
from permutation_metrics import rank_similarities
import utils
import torch
import torch.nn as nn
import torch.nn.functional as f
import numpy as np
import matplotlib.pyplot as plt
from torch import optim
from torch.autograd import Variable


%load_ext autoreload
%autoreload 2

torch.manual_seed(42)

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

Y = torch.tensor(np.load('../datasets/floored_exp_uncorrel.npy'), dtype=dtype)
Yt = Y.transpose(0,1)
real_data = torch.tensor(np.genfromtxt('../datasets/real_data.csv', delimiter = ','))
A_true = np.load('../datasets/students_uncorrel.npy')
D_true = np.load('../datasets/questions_uncorrel.npy')

# We assume we know the relevant concept of each question beforehand
concepts = np.nonzero(D_true)
num_students, num_concepts = A_true.shape
num_questions = D_true.shape[0]
guess_prob = 1/5

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
def accuracy(R_pred, R_true):
    R_pred = R_pred.data.numpy()
    R_true = R_true.data.numpy()
    R_pred_cpy = np.copy(R_pred)
    R_pred_cpy[R_pred_cpy > 0.5] = 1
    R_pred_cpy[R_pred_cpy <= 0.5] = 0
    print("Accuracy: {}".format(np.sum(R_pred_cpy == R_true) / (R_true.shape[0]*R_true.shape[1])))
    return np.sum(R_pred_cpy == R_true) / (R_true.shape[0]*R_true.shape[1])

In [7]:
def rmse_min_max(A,B):
    a_norm = (A - A.min())/(A.max()-A.min())
    b_norm = (B - B.min())/(B.max()-B.min())
    return np.sqrt(np.mean(np.square(a_norm-b_norm))) 

# Run on Simulated Data

In [52]:
#Preparing to train simulated data set
observations = Yt
idx_train = int(0.7*observations.size()[1])
idx_val = int(0.8*observations.size()[1])
train = observations[:,:idx_train]
val = observations[:,idx_train:idx_val]
test = observations[:,idx_val:]
idx_test = observations.size()[1] - idx_val
idx_val = idx_train-idx_val

## Simple RNN to predict students performance

In [55]:
n_epochs = 100
hidden_size = 128
layers = 8
batch_size = 100 #needs to have idx_train, idx_val, and idx_train as a multiple
rate = 0.002
dropout = 0.2

model = models.RNN_Model(hidden_size, batch_size, layers, dropout)
criterion = nn.functional.binary_cross_entropy
optimizer = optim.Adam(model.parameters(), lr=rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

In [56]:
#Training
losses = np.zeros(n_epochs) # For plotting
num_batches = int(idx_train/batch_size)
preds = torch.zeros(train.size()[0]-1,train.size()[1])

for epoch in range(n_epochs):
    for batch in range(num_batches):
        inputs = Variable(train[:-1,batch*batch_size:(batch+1)*batch_size])
        targets = Variable(train[1:,batch*batch_size:(batch+1)*batch_size])

        outputs, hidden = model(inputs, None)
        preds[:,batch*batch_size:(batch+1)*batch_size] = outputs
        
        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        #if batch % 2 == 0:
            #print("Batch Done")

    losses[epoch] += loss.data[0]
    print(epoch, loss.data[0])
    accuracy(preds, train[1:,:])



0 tensor(0.6950)
Accuracy: 0.49763347763347765
1 tensor(0.6933)
Accuracy: 0.5271284271284271
2 tensor(0.6943)
Accuracy: 0.5383694083694084
3 tensor(0.6902)
Accuracy: 0.5398845598845599
4 tensor(0.6891)
Accuracy: 0.541962481962482
5 tensor(0.6886)
Accuracy: 0.5421067821067821
6 tensor(0.6884)
Accuracy: 0.5394516594516594
7 tensor(0.6884)
Accuracy: 0.5416594516594516
8 tensor(0.6885)
Accuracy: 0.5406349206349207
9 tensor(0.6885)
Accuracy: 0.53997113997114
10 tensor(0.6885)
Accuracy: 0.5414285714285715
11 tensor(0.6886)
Accuracy: 0.5405194805194805
12 tensor(0.6886)
Accuracy: 0.5402453102453102
13 tensor(0.6886)
Accuracy: 0.5402453102453102
14 tensor(0.6885)
Accuracy: 0.5403030303030303
15 tensor(0.6885)
Accuracy: 0.5412987012987013
16 tensor(0.6882)
Accuracy: 0.5413852813852814
17 tensor(0.6863)
Accuracy: 0.5418759018759018
18 tensor(0.6824)
Accuracy: 0.5485281385281385
19 tensor(0.6870)
Accuracy: 0.5416738816738816
20 tensor(0.6895)
Accuracy: 0.5333910533910534
21 tensor(0.6870)
Accurac

In [58]:
#Validation
num_batches = int(idx_val/batch_size)
preds = torch.zeros(val.size()[0]-1,val.size()[1])

for batch in range(num_batches):
    inputs = Variable(val[:-1,batch*batch_size:(batch+1)*batch_size])
    targets = Variable(val[1:,batch*batch_size:(batch+1)*batch_size])

    outputs, hidden = model(inputs, None)
    preds[:,batch*batch_size:(batch+1)*batch_size] = outputs
    
accuracy(preds, val[1:,:])

Accuracy: 0.526969696969697


0.526969696969697

In [None]:
#Test
num_batches = int(idx_test/batch_size)
preds = torch.zeros(test.size()[0]-1,test.size()[1])

for batch in range(num_batches):
    inputs = Variable(test[:-1,batch*batch_size:(batch+1)*batch_size])
    targets = Variable(test[1:,batch*batch_size:(batch+1)*batch_size])

    outputs, hidden = model(inputs, None)
    preds[:,batch*batch_size:(batch+1)*batch_size] = outputs

accuracy(preds, test[1:,:])

## Skill RNN to predict students performance

In [24]:
n_epochs = 3000
average = True
sigmoid = False
#concepts = ([0,1,2,3,4,5,6,7,8],[0,0,0,0,0,0,0,0,0])
hidden_size = 128
dropout = 0.2
num_layers = 8
rate = 0.002

model = models.RNN_Skills_Model(average, concepts, num_concepts, num_questions, hidden_size, num_students, num_layers, dropout, sigmoid)
criterion = nn.functional.binary_cross_entropy
optimizer = optim.Adam(model.parameters(), lr=rate betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

In [29]:
losses = np.zeros(n_epochs) # For plotting

for epoch in range(n_epochs):

    inputs = Variable(Yt[:-1]).float()
    targets = Variable(Yt[1:]).float()

    outputs, hidden, skills, D = model(inputs, None)

    optimizer.zero_grad()
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    losses[epoch] += loss.data[0]

    if epoch % 20 == 0:
        print(epoch, loss.data[0])
        accuracy(outputs,targets)
        #print('RMSE A: {}'.format(rmse_min_max(skills.data.numpy(), A_true)))
        #print('RMSE D: {}'.format(rmse_min_max(D.data.numpy(), D_true)))

  from ipykernel import kernelapp as app


0 tensor(2.7996)
Accuracy: 0.526953125
20 tensor(0.8965)
Accuracy: 0.579296875
40 tensor(0.7353)
Accuracy: 0.670703125
60 tensor(0.6410)
Accuracy: 0.68046875
80 tensor(0.5628)
Accuracy: 0.6845703125
100 tensor(0.5195)
Accuracy: 0.7572265625
120 tensor(0.5047)
Accuracy: 0.764453125
140 tensor(0.5001)
Accuracy: 0.7658203125
160 tensor(0.4958)
Accuracy: 0.7666015625
180 tensor(0.4898)
Accuracy: 0.7720703125
200 tensor(0.4870)
Accuracy: 0.771875
220 tensor(0.4830)
Accuracy: 0.775
240 tensor(0.4792)
Accuracy: 0.776953125
260 tensor(0.4781)
Accuracy: 0.7771484375
280 tensor(0.4774)
Accuracy: 0.77734375
300 tensor(0.4748)
Accuracy: 0.779296875
320 tensor(0.4737)
Accuracy: 0.78125
340 tensor(0.4718)
Accuracy: 0.7830078125
360 tensor(0.4705)
Accuracy: 0.7833984375
380 tensor(0.4695)
Accuracy: 0.7837890625
400 tensor(0.4683)
Accuracy: 0.7845703125
420 tensor(0.4675)
Accuracy: 0.7849609375
440 tensor(0.4668)
Accuracy: 0.78515625
460 tensor(0.4665)
Accuracy: 0.78515625
480 tensor(0.4658)
Accuracy:

KeyboardInterrupt: 

In [30]:
Y_eval = (real_data[640:].transpose(0,1))

inputs = Variable(Y_eval[:-1]).float()
targets_eval = Variable(Y_eval[1:]).float()

outputs, hidden, skills, D = model(inputs, None)

accuracy(outputs,targets_eval)

RuntimeError: size mismatch, m1: [1 x 160], m2: [640 x 128] at /opt/conda/conda-bld/pytorch_1533739672741/work/aten/src/TH/generic/THTensorMath.cpp:2070

In [14]:
print(rank_similarities(A_true, Y.data.numpy(), (skills.data.numpy() - skills.data.numpy().min())/(skills.data.numpy().max()-skills.data.numpy().min()))['summary'])


        Summary of Ranking Evaluation: 
        The correlations are with the true rankings derived from A_true.
        For the baseline, we get a Kendall Rank correlation of 0.033, with p-value of 0.124, and a Spearman correlation of 0.049, with p-value of 0.125. 
        For the prediction, we get a Kendall Rank correlation of -0.012, with p-value of 0.558, and a Spearman correlation of -0.019, with p-value of 0.546.  
        Which gives us an average difference of -0.0565 versus the baseline. 
        


In [32]:
#Hyperparameter Search
n_epochs = 50
hidden_size = {512,256,128,64}
dropout = np.random.uniform(size=5)
print(dropout)
num_layers = {32,16,8,4}
#l_rate = np.random.uniform(0.01,0.5,5)
l_rate = {0.002,0.01,0.05,0.2,0.5}
print(l_rate)
A_rmse = 1
D_rmse = 1
max_acc = 0
params = {}
concepts = np.nonzero(D_true)
acc = []
h_loss = {}
i=0
for size in hidden_size:
    for drop in dropout:
        for layers in num_layers:
            for rate in l_rate:
                model = models.RNN_Skills_Model(False, concepts, num_concepts, num_questions, size, num_students, layers, drop)
                criterion = nn.functional.binary_cross_entropy
                optimizer = optim.Adam(model.parameters(), lr=rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

                losses = np.zeros(n_epochs)
                
                for epoch in range(n_epochs):

                    inputs = Variable(Yt[:-1])
                    targets = Variable(Yt[1:])

                    outputs, hidden, skills, D = model(inputs, None)

                    optimizer.zero_grad()
                    loss = criterion(outputs, targets)
                    loss.backward()
                    optimizer.step()
                    
                    losses[epoch] += loss.data[0]
                    
                    if epoch>0 and np.abs(losses[epoch]-losses[epoch-1])<0.00005:
                        break

                    if epoch % 10 == 0:
                        print(epoch, loss.data[0])
                        print(accuracy(outputs,targets))
                        print('RMSE A: {}'.format(rmse_min_max(skills.data.numpy(), A_true)))
                        print('RMSE D: {}'.format(rmse_min_max(D.data.numpy(), D_true)))

                
                print(accuracy(outputs,targets))
                if accuracy(outputs,targets)>max_acc:
                    params['accuracy'] = (size,drop,layers,rate)
                    max_acc = accuracy(outputs,targets)
                if rmse_min_max(skills.data.numpy(), A_true)<A_rmse:
                    params['A'] = (size,drop,layers,rate)
                    A_rmse = rmse_min_max(skills.data.numpy(), A_true)
                if rmse_min_max(D.data.numpy(), D_true)<D_rmse:
                    params['D'] = (size,drop,layers,rate)
                    D_rmse = rmse_min_max(D.data.numpy(), D_true)
                acc.append(accuracy(outputs,targets))
                h_loss[i] = losses
                print('done')
                i+=1
                    
print(params)
print(max_acc)
print(A_rmse)
print(D_rmse)
print(acc)

[0.67363352 0.34875009 0.45567222 0.76393018 0.62340218]
{0.5, 0.1, 0.3, 0.2, 0.02}




0 tensor(1.3280)
0.48944444444444446
RMSE A: 0.41321895522657626
RMSE D: 0.46254148619455304
10 tensor(0.8601)
0.4803131313131313
RMSE A: 0.36741722820092293
RMSE D: 0.3854353724191575
0.4787070707070707
done
0 tensor(1.9985)
0.5032929292929293
RMSE A: 0.50919118529673
RMSE D: 0.4492963114053607
10 tensor(0.8606)
0.4785656565656566
RMSE A: 0.36756588297951304
RMSE D: 0.3713725979540209
20 tensor(0.8554)
0.48397979797979795
RMSE A: 0.3641292753668714
RMSE D: 0.42039191691995553
0.48597979797979796
done
0 tensor(1.2593)
0.5110707070707071
RMSE A: 0.48182128081911146
RMSE D: 0.330943770978714
0.4784242424242424
done
0 tensor(2.4409)
0.5184444444444445
RMSE A: 0.47526985063165056
RMSE D: 0.34508595146288723
10 tensor(0.8406)
0.5003636363636363
RMSE A: 0.35819794728253196
RMSE D: 0.43959553735739854
20 tensor(0.8348)
0.5008181818181818
RMSE A: 0.3194916266610254
RMSE D: 0.4446673889964636
0.5054848484848485
done
0 tensor(1.4675)
0.4865959595959596
RMSE A: 0.4976868465537525
RMSE D: 0.387302

KeyboardInterrupt: 