In [None]:
import numpy as np
from mord import LogisticAT
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt

In [None]:
saved_data = np.load('processed_data.npz')
X = saved_data['X']
X_sym = saved_data['X_sym']
Y = saved_data['Y']
Z = saved_data['Z']
pids = saved_data['pids']

M = X.shape[0]
p = X.shape[1]; q = Y.shape[1]

kf = KFold(n_splits=5)

In [None]:
runs = 5
uw_train_err = np.zeros((runs,q))
w_train_err = np.zeros((runs,q))
uw_test_err = np.zeros((runs,q))
w_test_err = np.zeros((runs,q))


s = 0
for train_index, test_index in kf.split(X):
    if s >= runs:
        break
        
    len_train = len(train_index)
    len_test = len(test_index)

    x_train = X[train_index,:,:].reshape((len_train, p*14))
    x_test = X[test_index,:,:].reshape((len_test, p*14))
#     x_train = X[train_index,:,0]
#     x_test = X[test_index,:,0]
    for qi in range(q):
        y_train = Y[train_index,qi].astype(int); y_test = Y[test_index,qi].astype(int)
        clf = LogisticAT(alpha=5000).fit(x_train, y_train)
        yh_train = clf.predict(x_train)
        yh_test = clf.predict(x_test)
        
        uw_train_err[s,qi] = np.mean(np.abs(yh_train - y_train))
        uw_test_err[s,qi] = np.mean(np.abs(yh_test - y_test))
        
        for i in range(4):
            idx_i_train = (y_train == i)
            idx_i_test = (y_test == i)
            w_train_err[s,qi] += np.sum(np.abs(yh_train[idx_i_train] - y_train[idx_i_train])) / len(idx_i_train)
            w_test_err[s,qi] += np.sum(np.abs(yh_test[idx_i_test] - y_test[idx_i_test])) / len(idx_i_test)
        
        w_train_err[s,qi] /= 4
        w_test_err[s,qi] /= 4
        
        print('Run %d, Q%d: UW Train Error = %.2f, UW Test Error = %.2f' % (s+1,qi+1,uw_train_err[s,qi],uw_test_err[s,qi]))
        print('Run %d, Q%d: W Train Error = %.2f, W Test Error = %.2f' % (s+1,qi+1,w_train_err[s,qi],w_test_err[s,qi]))

    s += 1

print("Mean UW Training Error Across All Questions: %.3f" % np.mean(uw_train_err))
print("Mean UW Test Error Across All Questions: %.3f" % np.mean(uw_test_err))
print("Mean W Training Error Across All Questions: %.3f" % np.mean(w_train_err))
print("Mean W Test Error Across All Questions: %.3f" % np.mean(w_test_err))

In [None]:
runs = 5
q2 = Z.shape[1]
uw_train_err = np.zeros((runs,q2))
w_train_err = np.zeros((runs,q2))
uw_test_err = np.zeros((runs,q2))
w_test_err = np.zeros((runs,q2))

s = 0
for train_index, test_index in kf.split(X):
    if s >= runs:
        break
        
    len_train = len(train_index)
    len_test = len(test_index)

    x_train = X[train_index,:,:].reshape((len_train, p*14))
    x_test = X[test_index,:,:].reshape((len_test, p*14))
#     x_train = X[train_index,:,0]
#     x_test = X[test_index,:,0]
    for qi in range(q2):
        y_train = Z[train_index,qi].astype(int); y_test = Z[test_index,qi].astype(int)
        clf = LogisticAT(alpha=5000).fit(x_train, y_train)
        yh_train = clf.predict(x_train)
        yh_test = clf.predict(x_test)
        
        uw_train_err[s,qi] = np.mean(np.abs(yh_train - y_train))
        uw_test_err[s,qi] = np.mean(np.abs(yh_test - y_test))
        
        for i in range(4):
            idx_i_train = (y_train == i)
            idx_i_test = (y_test == i)
            w_train_err[s,qi] += np.sum(np.abs(yh_train[idx_i_train] - y_train[idx_i_train])) / len(idx_i_train)
            w_test_err[s,qi] += np.sum(np.abs(yh_test[idx_i_test] - y_test[idx_i_test])) / len(idx_i_test)
        
        w_train_err[s,qi] /= 4
        w_test_err[s,qi] /= 4
        
        print('Run %d, Q%d: UW Train Error = %.2f, UW Test Error = %.2f' % (s+1,qi+1,uw_train_err[s,qi],uw_test_err[s,qi]))
        print('Run %d, Q%d: W Train Error = %.2f, W Test Error = %.2f' % (s+1,qi+1,w_train_err[s,qi],w_test_err[s,qi]))

    s += 1

print("Mean UW Training Error Across All Questions: %.4f" % np.mean(uw_train_err))
print("Mean UW Test Error Across All Questions: %.4f" % np.mean(uw_test_err))
print("Mean W Training Error Across All Questions: %.4f" % np.mean(w_train_err))
print("Mean W Test Error Across All Questions: %.4f" % np.mean(w_test_err))

In [None]:
runs = 5
train_err2 = np.zeros((runs,q))
test_err2 = np.zeros((runs,q))

s = 0
for train_index, test_index in kf.split(X):
    if s >= runs:
        break

    x_train = X[train_index,:,0]
    x_test = X[test_index,:,0]
    for qi in range(q):
        y_train = Y[train_index,qi].astype(int); y_test = Y[test_index,qi].astype(int)
        clf = LogisticAT(alpha=5000).fit(x_train, y_train)
        yh_train = clf.predict(x_train)
        yh_test = clf.predict(x_test)
        
        train_err2[s,qi] = np.mean(np.abs(yh_train - y_train))
        test_err2[s,qi] = np.mean(np.abs(yh_test - y_test))
        
        print('Run %d, Q%d: Train Error = %.2f, Test Error = %.2f' % (s+1,qi+1,train_err2[s,qi],test_err2[s,qi]))

    s += 1

print("Mean Training Error Across All Questions: %.3f" % np.mean(train_err2))
print("Mean Test Error Across All Questions: %.3f" % np.mean(test_err2))