In [1]:
from models import *
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, accuracy_score
from scoop import futures
import argparse
import os
import idx2numpy
from itertools import chain
import pickle

In [2]:
seed = 1
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
NETWORK = [36, 25, 25, 10, 16]
LEARNING_RATE = 0.1
BASELINE_BATCH_SIZE = 10
BASELINE_EPOCHS = 40
BASELINE_MOMENTUM = 0

In [3]:
def to_one_hot(dense, n):
    one_hot = np.zeros(n)
    one_hot[dense] = 1
    return one_hot

def to_perm_matrix(ranking, items):
    # We're going to flatten along the rows, i.e. entries 0-4 are a row (the one hot ranking of the first item), 5-9, etc.
    ret = []
    n = len(items)
    for item in items:
        ret.extend(to_one_hot(ranking.index(item), n))

    return ret

In [4]:
DATA_IND = [1, 2, 3, 5, 7, 8]
LABEL_IND = [4, 6, 9, 10]
    
data_path = 'sushi.soc'
instances = []
labels = []
with open(data_path) as file:
    for line in file:
        tokens = line.strip().split(',')
        # Doesn't have enough entries, isn't data
        if len(tokens) < 10: continue
        # First digit is useless
        ranking = [int(x) for x in tokens[1:]]
        cur_data = []
        cur_label = []
        for item in ranking:
            if item in DATA_IND:
                cur_data.append(item)
            else:
                cur_label.append(item)
        instances.append(to_perm_matrix(cur_data, DATA_IND))
        labels.append(to_perm_matrix(cur_label, LABEL_IND))
    
X = np.array(instances, dtype=np.float32)
y = np.array(labels, dtype=np.float32)

# number of folds for cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=random.randint(0, 1000))
split_data = list(kf.split(X))


In [5]:
def constraint_satsified(p):
    if (sum(p[:4]) == 1) and (sum(p[4:8]) == 1) and (sum(p[8:12]) == 1) and (sum(p[12:]) == 1) and (sum([p[0 + j*4] for j in range(4)]) == 1) and (sum([p[1 + j*4] for j in range(4)]) == 1) and (sum([p[2 + j*4] for j in range(4)]) == 1) and (sum([p[3 + j*4] for j in range(4)]) == 1):
        return True
    else:
        return False

def coherent_accuracy(pred, y):
    coherent_accuracy = 0
    for i in range(len(pred)):
        if all(pred[i] == y[i]):
            coherent_accuracy += 1
    return  coherent_accuracy/len(pred)

def corrected_accuracy(pred, y, accuracy_type='standard'):
    if accuracy_type == 'coherent':
        return coherent_accuracy(pred, y)
    elif accuracy_type == 'incoherent':
        return accuracy_score(np.array(pred).flatten(), y.flatten())
    else:
        return NeuralPreferenceClassifier.accuracy_score_ml(pred, y)

def get_violations(pred):
    count = 0
    for p in pred:
        if (sum(p[:4]) == 1) and (sum(p[4:8]) == 1) and (sum(p[8:12]) == 1) and (sum(p[12:]) == 1) and (sum([p[0 + j*4] for j in range(4)]) == 1) and (sum([p[1 + j*4] for j in range(4)]) == 1) and (sum([p[2 + j*4] for j in range(4)]) == 1) and (sum([p[3 + j*4] for j in range(4)]) == 1):
            count += 1
    return len(pred) - count

In [7]:
def single_baseline_run(X, y, train_idx=None, test_idx=None, K=None, alpha=0):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    net = PreferenceRegularizedModel(sizes=NETWORK)
    a = alpha

    net.learn(X_train, y_train, learning_rate=LEARNING_RATE, batch_size=BASELINE_BATCH_SIZE, epochs=BASELINE_EPOCHS,
              momentum=BASELINE_MOMENTUM, alpha=a)

    pred_train = net.predict(X_train)
    pred_test = net.predict(X_test)

    train_loss = net.get_BCEWithLogitsLoss(X_train, y_train, alpha)
    test_loss = net.get_BCEWithLogitsLoss(X_test, y_test, alpha)

    if np.sum(np.isnan(pred_train)) == 0 and np.sum(np.isnan(pred_test)) == 0:
        accuracy_train_coherent = corrected_accuracy(pred_train, y_train, accuracy_type='coherent')
        accuracy_test_coherent = corrected_accuracy(pred_test, y_test, accuracy_type='coherent')
        
        accuracy_train_incoherent = corrected_accuracy(pred_train, y_train, accuracy_type='incoherent')
        accuracy_test_incoherent = corrected_accuracy(pred_test, y_test, accuracy_type='incoherent')
        
        accuracy_train = corrected_accuracy(pred_train, y_train)
        accuracy_test = corrected_accuracy(pred_test, y_test)

        total_violations_train = get_violations(pred_train)
        total_violations_test = get_violations(pred_test)
        
        return [[a, accuracy_train_coherent, accuracy_test_coherent, accuracy_train_incoherent, 
                 accuracy_test_incoherent, accuracy_train, accuracy_test,
                 total_violations_train, total_violations_test, 1 - total_violations_train/len(X_train), 1 - total_violations_test/len(X_test), 
                 net.runtime]], pred_test, pred_train, train_loss, test_loss

In [8]:
BASELINE_EPOCHS = 40
output_final = {}
i = 1
for train_index, test_index in split_data:
    output = {}
    for alpha in [0, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]:
        output[alpha] = single_baseline_run(X, y, train_idx=train_index, test_idx=test_index, K=True, alpha=alpha)
    output_final[i] = output
    i += 1


epoch 1, validation loss: 0.5254076719284058; training loss: 0.5281479954719543; best loss: 0.5254076719284058
epoch 2, validation loss: 0.5164588689804077; training loss: 0.519027590751648; best loss: 0.5164588689804077
epoch 3, validation loss: 0.5167211294174194; training loss: 0.5186731219291687; best loss: 0.5164588689804077
epoch 4, validation loss: 0.5164679288864136; training loss: 0.518557071685791; best loss: 0.5164588689804077
epoch 5, validation loss: 0.5166116952896118; training loss: 0.5184214115142822; best loss: 0.5164588689804077
epoch 6, validation loss: 0.5160672068595886; training loss: 0.518367350101471; best loss: 0.5160672068595886
epoch 7, validation loss: 0.5161877274513245; training loss: 0.5182380676269531; best loss: 0.5160672068595886
epoch 8, validation loss: 0.5157537460327148; training loss: 0.5181806087493896; best loss: 0.5157537460327148
epoch 9, validation loss: 0.5155569314956665; training loss: 0.5180009007453918; best loss: 0.5155569314956665
epoc

dict_keys([0])

In [9]:
for i in output_final:
    print(i)
    for k in output_final[i]:
        print(output_final[i][k][-1])

1
tensor(0.5040)
tensor(0.5320)
tensor(0.6381)
tensor(0.7649)
tensor(0.9884)
tensor(1.1997)
tensor(1.4851)
tensor(2.0754)
2
tensor(0.5119)
tensor(0.5524)
tensor(0.5940)
tensor(0.7996)
tensor(0.9772)
tensor(1.5125)
tensor(1.5387)
tensor(2.1588)
3
tensor(0.5040)
tensor(0.5360)
tensor(0.5790)
tensor(0.7850)
tensor(0.9922)
tensor(1.3517)
tensor(1.6686)
tensor(2.1887)
4
tensor(0.5036)
tensor(0.5406)
tensor(0.5911)
tensor(0.7247)
tensor(1.0245)
tensor(1.2945)
tensor(1.5855)
tensor(2.4276)
5
tensor(0.5080)
tensor(0.5455)
tensor(0.5774)
tensor(0.6826)
tensor(1.0207)
tensor(1.3645)
tensor(1.5502)
tensor(2.3221)


In [12]:
deepsade_loss = {}
i = 1
for train_index, test_index in split_data:
    model = torch.load('results-wtih-validation-accuracy-crossval/{}/deepsade_model_fold_{}.pt'.format(seed, i))
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    l = nn.BCEWithLogitsLoss()
    train_loss = l(model(torch.tensor(X_train)).detach(), torch.tensor(y_train)).item()
    test_loss = l(model(torch.tensor(X_test)).detach(), torch.tensor(y_test)).item()
    deepsade_loss[i] = [train_loss, test_loss]
    i += 1

In [13]:
train_losses_deepsade = list(map(lambda x:deepsade_loss[x][0] , deepsade_loss))
test_losses_deepsade = list(map(lambda x:deepsade_loss[x][1] , deepsade_loss))
train_losses_deepsade, test_losses_deepsade

([0.53621906042099,
  0.533242404460907,
  0.5380823612213135,
  0.5503613948822021,
  0.5342352986335754],
 [0.5314278602600098,
  0.542514443397522,
  0.5381230711936951,
  0.5500898361206055,
  0.5326858162879944])

In [14]:
import statistics
processed_output = []
for a in [0, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]:
    train_losses = list(map(lambda x:output_final[x][a][-2].item() , output_final))
    test_losses = list(map(lambda x:output_final[x][a][-1].item() , output_final))
    train_violations = list(map(lambda x:output_final[x][a][0][0][-5] , output_final))
    test_violations = list(map(lambda x:output_final[x][a][0][0][-4] , output_final))
#     processed_output.append([a, 'train loss SL', statistics.mean(train_losses)])
    processed_output.append([a, 'SL', statistics.mean(test_losses), statistics.mean(test_violations)])
    processed_output.append([a, 'DeepSaDe', statistics.mean(test_losses_deepsade), 0])

processed_output = pd.DataFrame(processed_output)
processed_output.columns = ['lambda', 'Approach', 'test loss', 'test violation']

In [18]:
processed_output.to_csv('reg-exp.csv')
processed_output

Unnamed: 0,lambda,Approach,test loss,test violation
0,0.0,SL,0.506312,982.6
1,0.0,DeepSaDe,0.538968,0.0
2,0.05,SL,0.541292,676.4
3,0.05,DeepSaDe,0.538968,0.0
4,0.1,SL,0.595935,536.2
5,0.1,DeepSaDe,0.538968,0.0
6,0.2,SL,0.751368,363.4
7,0.2,DeepSaDe,0.538968,0.0
8,0.3,SL,1.000595,0.0
9,0.3,DeepSaDe,0.538968,0.0


In [86]:
processed_output

Unnamed: 0,lambda,Approach,test loss,test violation
0,0.0,SL,0.506312,982.6
1,0.0,DeepSaDe,0.538968,0.0
2,0.05,SL,0.541292,676.4
3,0.05,DeepSaDe,0.538968,0.0
4,0.1,SL,0.595935,536.2
5,0.1,DeepSaDe,0.538968,0.0
6,0.2,SL,0.751368,363.4
7,0.2,DeepSaDe,0.538968,0.0
8,0.3,SL,1.000595,0.0
9,0.3,DeepSaDe,0.538968,0.0


In [19]:
def big_chart(chart, fontsize = 20): 
    return chart.configure_axis(
        grid = True, 
    labelFontSize = fontsize,
    titleFontSize = fontsize
).configure_title(
    fontSize = fontsize
    ).configure_legend(
titleFontSize=fontsize,
labelFontSize=fontsize
).configure_view(
    strokeWidth=0
)

def small_chart(chart, fontsize=None): 
    return big_chart(chart.properties(width=150,
                             height=150
                            ), fontsize)

Unnamed: 0,Unnamed: 1
0,0.3


In [94]:
import altair as alt
loss_plot = alt.Chart(processed_output).mark_line().encode(
    x='lambda:Q',
    y='test loss',
    strokeDash=alt.Color('Approach:N', legend=alt.Legend(
        orient='none',
        legendX=90, legendY=10,
        direction='vertical',
        titleAnchor='middle',
        strokeColor='black',
        padding=10,
        labelFontSize=25,
        symbolSize=500,
        symbolStrokeWidth=2,
        title=None))
)

loss_line = alt.Chart(pd.DataFrame({'test loss': [statistics.mean(train_losses_deepsade)]})).mark_rule().encode(y='test loss', strokeWidth=alt.value(2), color=alt.value('steelblue'))
selected_lambda = alt.Chart(pd.DataFrame({'lambda': [0.3]})).mark_rule().encode(y='lambda', strokeWidth=alt.value(2), color=alt.value('steelblue'))

first = loss_plot + loss_line
big_chart(first, fontsize = 30)

In [101]:
violations_plot = alt.Chart(processed_output).mark_line().encode(
    x='lambda:Q',
    y='test violation',
    color='Approach:N'
)

loss_line = alt.Chart(pd.DataFrame({'test violation': [0]})).mark_rule().encode(y='test violation', strokeWidth=alt.value(2), color=alt.value('steelblue'))

second = violations_plot + loss_line
final = alt.layer(first, second).resolve_scale(
    y='independent'
)
big_chart(final, fontsize = 30)

# T-SNE

In [197]:
seed = 5
model = torch.load('{}/deepsade_model_fold_{}.pt'.format(seed, 4))

model

FFNeuralNetTorch(
  (Layers): ModuleList(
    (0): Linear(in_features=36, out_features=25, bias=True)
    (1): Linear(in_features=25, out_features=25, bias=True)
    (2): Linear(in_features=25, out_features=10, bias=True)
    (3): Linear(in_features=10, out_features=16, bias=True)
  )
)

In [198]:
import torch.nn.functional as F

def input_last_layer(model, X):
    out = X
    for i in range(len(model.Layers) - 1):
        out = model.Layers[i](out)
        out = F.relu(out)
    return out.detach().numpy()

In [199]:
X_train_last_layer_input = input_last_layer(model, torch.tensor(X))

In [200]:
X_train_last_layer_input

array([[2.555375  , 0.        , 2.3371773 , ..., 0.        , 2.149161  ,
        0.78307897],
       [2.2751064 , 0.        , 2.1646342 , ..., 0.        , 1.9636047 ,
        0.6473768 ],
       [2.4500546 , 0.        , 2.2335978 , ..., 0.        , 2.0526373 ,
        0.7381433 ],
       ...,
       [1.8316104 , 0.        , 1.615855  , ..., 0.        , 1.5613763 ,
        0.44326037],
       [2.0733545 , 0.        , 1.8752984 , ..., 0.        , 1.7428086 ,
        0.5616966 ],
       [1.4059017 , 0.        , 1.2283801 , ..., 0.        , 1.1842288 ,
        0.25799125]], dtype=float32)

In [205]:
y_train

array([[1., 0., 0., ..., 1., 0., 0.],
       [1., 0., 0., ..., 1., 0., 0.],
       [1., 0., 0., ..., 1., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [206]:
score = model(torch.tensor(X_train)).detach().numpy()
pred = []
for p in score:
    pred.append([1 if j > 0 else 0 for j in p])
pred = np.array(pred)

pred, np.sum(pred, axis=0)

(array([[1, 0, 0, ..., 1, 0, 0],
        [1, 0, 0, ..., 1, 0, 0],
        [1, 0, 0, ..., 1, 0, 0],
        ...,
        [1, 0, 0, ..., 1, 0, 0],
        [1, 0, 0, ..., 1, 0, 0],
        [1, 0, 0, ..., 1, 0, 0]]),
 array([3941,    0,    0,    0,    0,    0,    0, 3941,    0,    0, 3941,
           0,    0, 3941,    0,    0]))

In [207]:
def accuracy_score_ml(pred, y):
    s = 0
    for i in range(len(y)):
        s += np.dot(pred[i], y[i]) / len(set.union(set(np.where(y[i] == 1)[0]), set(np.where(pred[i] == np.float32(1))[0])))
    return s / len(y)

accuracy_score_ml(pred, y_train)

0.2682302050482726

In [185]:
from sklearn.manifold import TSNE

In [186]:
X_embedded = TSNE(n_components=2, init='random', perplexity=3).fit_transform(X_train_last_layer_input)

In [187]:
X_embedded = pd.DataFrame(X_embedded)
X_embedded.columns = ['x1', 'x2']
X_embedded

Unnamed: 0,x1,x2
0,8.807647,7.383164
1,-47.591892,-17.263577
2,37.548386,39.846497
3,81.863945,-13.997884
4,62.181362,-28.769341
...,...,...
4921,33.020168,-130.695984
4922,-77.918190,-6.867886
4923,68.638092,-41.637119
4924,3.398934,-6.985308


In [188]:
alt.Chart(X_embedded).mark_point().encode(
    x='x1',
    y='x2'
)

In [209]:
X_train_last_layer_input[0]

array([2.555375  , 0.        , 2.3371773 , 0.8118825 , 0.        ,
       0.        , 2.1726537 , 0.        , 2.149161  , 0.78307897],
      dtype=float32)