In [1]:
%load_ext autoreload
%autoreload 2
import torch
import torch.nn as nn
import numpy as np
from linear_cca import linear_cca
from torch.utils.data import BatchSampler, SequentialSampler
from DeepCCAModels import DeepCCA
from main import Solver
from utils import load_data, svm_classify
try:
    import cPickle as thepickle
except ImportError:
    import _pickle as thepickle

import gzip
import numpy as np
torch.set_default_tensor_type(torch.DoubleTensor)

In [2]:
############
# Parameters Section

device = torch.device('cuda')
print("Using", torch.cuda.device_count(), "GPUs")

# the path to save the final learned features
save_to = './new_features.gz'

# the size of the new space learned by the model (number of the new features)
outdim_size = 10

# size of the input for view 1 and view 2
input_shape1 = 784
input_shape2 = 784

# number of layers with nodes in each one
layer_sizes1 = [1024, 1024, 1024, outdim_size]
layer_sizes2 = [1024, 1024, 1024, outdim_size]

# layer_sizes1 = [1024, outdim_size]
# layer_sizes2 = [1024, outdim_size]

# the parameters for training the network
learning_rate = 1e-3
epoch_num = 10
batch_size = 800

# the regularization parameter of the network
# seems necessary to avoid the gradient exploding especially when non-saturating activations are used
reg_par = 1e-5

# specifies if all the singular values should get used to calculate the correlation or just the top outdim_size ones
# if one option does not work for a network or dataset, try the other one
use_all_singular_values = False

# if a linear CCA should get applied on the learned features extracted from the networks
# it does not affect the performance on noisy MNIST significantly
apply_linear_cca = True
# end of parameters section
############

# Each view is stored in a gzip file separately. They will get downloaded the first time the code gets executed.
# Datasets get stored under the datasets folder of user's Keras folder
# normally under [Home Folder]/.keras/datasets/
data1 = load_data('./noisymnist_view1.gz')
data2 = load_data('./noisymnist_view2.gz')

Using 4 GPUs
loading data ...
loading data ...


In [3]:
# Building, training, and producing the new features by DCCA
model = DeepCCA(layer_sizes1, layer_sizes2, input_shape1,
                input_shape2, outdim_size, use_all_singular_values, device=device).double()
l_cca = None
if apply_linear_cca:
    l_cca = linear_cca()
solver = Solver(model, l_cca, outdim_size, epoch_num, batch_size,
                learning_rate, reg_par,loss_type = 'MSE', device=device)
train1, train2 = data1[0][0], data2[0][0]
val1, val2 = data1[1][0], data2[1][0]
test1, test2 = data1[2][0], data2[2][0]
# val1=None
# test1=None
solver.fit(train1, train2, val1, val2, test1, test2)
# TODO: Save linear_cca model if needed

set_size = [0, train1.size(0), train1.size(
    0) + val1.size(0), train1.size(0) + val1.size(0) + test1.size(0)]
loss, outputs = solver.test(torch.cat([train1, val1, test1], dim=0), torch.cat(
    [train2, val2, test2], dim=0), apply_linear_cca)
# print(outputs.size)

[ INFO : 2024-02-02 23:30:02,500 ] - DataParallel(
  (module): DeepCCA(
    (attention1): SelfAttention(
      (query_projection): Linear(in_features=784, out_features=1024, bias=True)
      (key_projection): Linear(in_features=784, out_features=1024, bias=True)
      (value_projection): Linear(in_features=784, out_features=1024, bias=True)
    )
    (attention2): SelfAttention(
      (query_projection): Linear(in_features=784, out_features=1024, bias=True)
      (key_projection): Linear(in_features=784, out_features=1024, bias=True)
      (value_projection): Linear(in_features=784, out_features=1024, bias=True)
    )
    (model1): MlpNet(
      (layers): ModuleList(
        (0-2): 3 x Sequential(
          (0): Linear(in_features=1024, out_features=1024, bias=True)
          (1): Sigmoid()
          (2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
        )
        (3): Sequential(
          (0): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=

loss is MSE!!!!!!


[ INFO : 2024-02-02 23:30:10,780 ] - Epoch 1: val_loss improved from 10.0000 to 2.7974, saving model to checkpoint.model


saving model 1


[ INFO : 2024-02-02 23:30:11,577 ] - Epoch 1/10 - time: 8.98 - training_loss: 2.4008 - val_loss: 2.7974
[ INFO : 2024-02-02 23:30:18,995 ] - Epoch 2: val_loss improved from 2.7974 to 0.7472, saving model to checkpoint.model


saving model 1


[ INFO : 2024-02-02 23:30:19,780 ] - Epoch 2/10 - time: 8.20 - training_loss: 1.3480 - val_loss: 0.7472
[ INFO : 2024-02-02 23:30:27,053 ] - Epoch 3: val_loss did not improve from 0.7472
[ INFO : 2024-02-02 23:30:27,054 ] - Epoch 3/10 - time: 7.27 - training_loss: 1.0167 - val_loss: 16.3302
[ INFO : 2024-02-02 23:30:34,291 ] - Epoch 4: val_loss did not improve from 0.7472
[ INFO : 2024-02-02 23:30:34,291 ] - Epoch 4/10 - time: 7.24 - training_loss: 0.8365 - val_loss: 61.7502
[ INFO : 2024-02-02 23:30:41,617 ] - Epoch 5: val_loss did not improve from 0.7472
[ INFO : 2024-02-02 23:30:41,618 ] - Epoch 5/10 - time: 7.33 - training_loss: 0.7019 - val_loss: 49482.5396
[ INFO : 2024-02-02 23:30:48,893 ] - Epoch 6: val_loss did not improve from 0.7472
[ INFO : 2024-02-02 23:30:48,894 ] - Epoch 6/10 - time: 7.27 - training_loss: 0.6695 - val_loss: 258685397.4063
[ INFO : 2024-02-02 23:30:56,159 ] - Epoch 7: val_loss did not improve from 0.7472
[ INFO : 2024-02-02 23:30:56,160 ] - Epoch 7/10 - t

Linear CCA started!


In [4]:
new_data = []
# print(outputs)
for idx in range(3):
    new_data.append([outputs[0][set_size[idx]:set_size[idx + 1], :],
                     outputs[1][set_size[idx]:set_size[idx + 1], :], data1[idx][1]])
# Training and testing of SVM with linear kernel on the view 1 with new features
[test_acc, valid_acc] = svm_classify(new_data, C=0.01)
print("Accuracy on view 1 (validation data) is:", valid_acc * 100.0)
print("Accuracy on view 1 (test data) is:", test_acc*100.0)


training SVM...
Accuracy on view 1 (validation data) is: 22.939999999999998
Accuracy on view 1 (test data) is: 21.16


In [5]:
# Attention loss
new_data = []
# print(outputs)
for idx in range(3):
    new_data.append([outputs[0][set_size[idx]:set_size[idx + 1], :],
                     outputs[1][set_size[idx]:set_size[idx + 1], :], data1[idx][1]])
# Training and testing of SVM with linear kernel on the view 1 with new features
[test_acc, valid_acc] = svm_classify(new_data, C=0.01)
print("Accuracy on view 1 (validation data) is:", valid_acc * 100.0)
print("Accuracy on view 1 (test data) is:", test_acc*100.0)

"""
Attention size = 256; 10 ep; 
training SVM...
Accuracy on view 1 (validation data) is: 64.02
Accuracy on view 1 (test data) is: 61.19


Attention size = 1024; 10 ep
training SVM...
Accuracy on view 1 (validation data) is: 80.2
Accuracy on view 1 (test data) is: 76.86

Attention size = 1024; 25 ep
training SVM...
Accuracy on view 1 (validation data) is: 79.3
Accuracy on view 1 (test data) is: 77.24

Attention size = 1024; 10 ep; mse loss
training SVM...
Accuracy on view 1 (validation data) is: 79.29
Accuracy on view 1 (test data) is: 77.25
"""

training SVM...
Accuracy on view 1 (validation data) is: 79.75
Accuracy on view 1 (test data) is: 77.83


'\nAttention size = 256; 10 ep; \ntraining SVM...\nAccuracy on view 1 (validation data) is: 64.02\nAccuracy on view 1 (test data) is: 61.19\n\n\nAttention size = 1024; 10 ep\ntraining SVM...\nAccuracy on view 1 (validation data) is: 80.2\nAccuracy on view 1 (test data) is: 76.86\n\nAttention size = 1024; 25 ep\ntraining SVM...\nAccuracy on view 1 (validation data) is: 79.3\nAccuracy on view 1 (test data) is: 77.24\n\nAttention size = 1024; 10 ep; mse loss\ntraining SVM...\nAccuracy on view 1 (validation data) is: 79.29\nAccuracy on view 1 (test data) is: 77.25\n'

In [12]:
# CCA loss

new_data = []
# print(outputs)
for idx in range(3):
    new_data.append([outputs[0][set_size[idx]:set_size[idx + 1], :],
                     outputs[1][set_size[idx]:set_size[idx + 1], :], data1[idx][1]])
# Training and testing of SVM with linear kernel on the view 1 with new features
[test_acc, valid_acc] = svm_classify(new_data, C=0.01)
print("Accuracy on view 1 (validation data) is:", valid_acc * 100.0)
print("Accuracy on view 1 (test data) is:", test_acc*100.0)

training SVM...
Accuracy on view 1 (validation data) is: 95.54
Accuracy on view 1 (test data) is: 95.38


In [4]:
# REFERENCE OUTPUT ################
#####################################
####### DON"T RUN ##################
new_data = []
# print(outputs)
for idx in range(3):
    new_data.append([outputs[0][set_size[idx]:set_size[idx + 1], :],
                     outputs[1][set_size[idx]:set_size[idx + 1], :], data1[idx][1]])
# Training and testing of SVM with linear kernel on the view 1 with new features
[test_acc, valid_acc] = svm_classify(new_data, C=0.01)
print("Accuracy on view 1 (validation data) is:", valid_acc * 100.0)
print("Accuracy on view 1 (test data) is:", test_acc*100.0)

training SVM...
Accuracy on view 1 (validation data) is: 95.61
Accuracy on view 1 (test data) is: 95.1


In [13]:
# Saving new features in a gzip pickled file specified by save_to
print('saving new features ...')
f1 = gzip.open(save_to, 'wb')
thepickle.dump(new_data, f1)
f1.close()

saving new features ...


In [14]:
d = torch.load('checkpoint.model')
solver.model.load_state_dict(d)
solver.model.parameters()

<generator object Module.parameters at 0x7f1aa81e9bd0>