In [5]:
from dataset_slide import *
import torch
import torch.nn as nn

import numpy as np

from scipy.stats import uniform, randint

from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error
import xgboost as xgb

Loading...
Finished Loading...


In [6]:
person_order = {'F1_Interaction_1': {'P2': 1, 'P1': 1, 'P3': 2},
 'F1_Interaction_2': {'P2': 1, 'P1': 1, 'P3': 2},
 'F2_Interaction_1': {'P4': 1, 'P5': 3},
 'F2_Interaction_2': {'P4': 1},
 'F3_Interaction_1': {'P8': 3, 'P6': 1, 'P7': 1},
 'F3_Interaction_2': {'P6': 1, 'P7': 1},
 'F4_Interaction_1': {'P14': 2,
  'P12': 1,
  'P11': 1,
  'P10': 1,
  'P9': 1,
  'P13': 3},
 'F4_Interaction_2': {'P12': 1,
  'P11': 1,
  'P10': 1,
  'P9': 1,
  'P13': 3},
 'F5_Interaction_1': {'P16': 2, 'P15': 1},
 'F5_Interaction_2': {'P16': 2, 'P15': 1},
 'F6_Interaction_1': {'P19': 3, 'P18': 1, 'P17': 1},
 'F6_Interaction_2': {'P19': 3, 'P18': 1, 'P17': 1},
 'F7_Interaction_1': {'P22': 3,
  'P20': 1,
  'P21': 1,
  'P23': 2},
 'F8_Interaction_1': {'P24': 1, 'P25': 3},
 'F8_Interaction_2': {'P24': 1, 'P25': 3},
 'F8_Interaction_3': {'P24': 1, 'P25': 3},
 'F10_Interaction_1': {'P27': 1, 'P28': 1},
 'F11_Interaction_1': {'P29': 1, 'P30': 2},
 'F11_Interaction_2': {'P29': 1, 'P30': 2},
 'F13_Interaction_1': {'P32': 1, 'P33': 2},
 'F17_Interaction_1': {'P37': 1, 'P38': 2},
 'F17_Interaction_2': {'P37': 1, 'P38': 2}}


group_nums = {1: ['F2_Interaction_2'],
 2: ['F2_Interaction_1',
  'F3_Interaction_2',
  'F5_Interaction_1',
  'F5_Interaction_2',
  'F8_Interaction_1',
  'F8_Interaction_2',
  'F8_Interaction_3',
  'F10_Interaction_1',
  'F11_Interaction_1',
  'F11_Interaction_2',
  'F13_Interaction_1',
  'F17_Interaction_1',
  'F17_Interaction_2'],
 3: ['F1_Interaction_1',
  'F1_Interaction_2',
  'F3_Interaction_1',
  'F6_Interaction_1',
  'F6_Interaction_2'],
 4: ['F7_Interaction_1'],
 5: ['F4_Interaction_2'],
 6: ['F4_Interaction_1']}

group_all_dataset = []
group_ids = group_nums[3]
for group_id in group_ids:
    group_specific_dataset = SpeedDatingDS(group_id = group_id, social_rel = person_order[group_id])
    group_all_dataset.append(group_specific_dataset)

SD = torch.utils.data.ConcatDataset(group_all_dataset)

########################################################################
#Dataloader
########################################################################
train_len = len(SD) - len(SD)//5
val_len = len(SD)//10 + 1
test_len = len(SD)//10

train, val, test = torch.utils.data.random_split(SD, (train_len, val_len, test_len), generator=torch.Generator().manual_seed(0))

trainloader = DataLoader(train, batch_size = train_len, shuffle = True, num_workers = 8)
valloader = DataLoader(val, batch_size = val_len, shuffle = True, num_workers = 8)
testloader = DataLoader(test, batch_size = test_len, shuffle = True, num_workers = 8)

In [7]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for idx, batch in enumerate(trainloader):

    x_train, vb_output = batch['context'], batch['vb_output']

    labels = vb_output.sum(2).to(device).flatten(start_dim =1)
    index_labels = torch.zeros(x_train.shape[0]).long().to(device)
    index_labels[labels.nonzero()[:,0]] = labels.nonzero()[:,1] + 1 
    y_train = index_labels

for idx, batch in enumerate(testloader):
    x_test, vb_output = batch['context'], batch['vb_output']

    labels = vb_output.sum(2).to(device).flatten(start_dim =1)
    index_labels = torch.zeros(x_test.shape[0]).long().to(device)
    index_labels[labels.nonzero()[:,0]] = labels.nonzero()[:,1] + 1 
    y_test = index_labels

for idx, batch in enumerate(valloader):
    x_val, vb_output = batch['context'], batch['vb_output']

    labels = vb_output.sum(2).to(device).flatten(start_dim =1)
    index_labels = torch.zeros(x_val.shape[0]).long().to(device)
    index_labels[labels.nonzero()[:,0]] = labels.nonzero()[:,1] + 1 
    y_val = index_labels




In [8]:
x = x_train.flatten(start_dim =1).cpu().numpy()
y = y_train.cpu().numpy()

x_test = x_test.flatten(start_dim =1).cpu().numpy()
y_test = y_test.cpu().numpy()

x_val = x_val.flatten(start_dim =1).cpu().numpy()
y_val = y_val.cpu().numpy()

In [29]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import sklearn

print(__doc__)


# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]},
                    {'kernel':['sigmoid'], 'gamma':["auto"]},
                     {'kernel':['poly'], 'gamma':["auto"]}
                   ]

scores = ['f1']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        SVC(), tuned_parameters, scoring='%s_macro' % score
    )
    clf.fit(x, y)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(x_test)

    

Automatically created module for IPython interactive environment
# Tuning hyper-parameters for f1

Best parameters set found on development set:

{'C': 10, 'kernel': 'linear'}

Grid scores on development set:

0.233 (+/-0.000) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.233 (+/-0.000) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.233 (+/-0.000) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.233 (+/-0.000) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.233 (+/-0.000) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.233 (+/-0.000) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.316 (+/-0.054) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.233 (+/-0.000) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.287 (+/-0.052) for {'C': 1, 'kernel': 'linear'}
0.345 (+/-0.063) for {'C': 10, 'kernel': 'linear'}
0.343 (+/-0.068) for {'C': 100, 'kernel': 'linear'}
0.342 (+/-0.067) for {'C': 1000, 'kernel': 'linear'}
0.233 (+/-0.000) for {'gamma': 'auto', 'kernel': 'sigmoid'}
0.23

In [35]:
svclassifier = SVC(kernel = 'linear', C = 10)

svclassifier.fit(x, y)# Make prediction
y_pred = svclassifier.predict(x_test)# Evaluate our model

print(confusion_matrix(y_test, y_pred))
print("f1: {}".format(sklearn.metrics.f1_score(y_pred, y_test, average='macro')))
print("acc: {}".format((y_pred == y_test).mean()))

[[281   0   1   0]
 [  8   2   0   0]
 [ 22   1   3   2]
 [  9   0   0   3]]
f1: 0.4454220753596609
acc: 0.8704819277108434
