In [1]:
import trainHelper
import utils
from mds.lmds import landmarkMDS
from mds.cmds import classicalMDS
from mds.fastmap import fastmap
import numpy
from torch.utils.data import DataLoader
import torch
from torch import nn, Tensor
import lossFunction as lossF
from model.AutoEncoder import AutoEncoder
from model.DynParam import DynParam
from model.Linear import Linear
from model.VAE import VAE
import os.path
import glob

import sys
from importlib import reload
reload(sys.modules['utils']);
reload(sys.modules['trainHelper']);
reload(sys.modules['lossFunction']);

torch.set_default_tensor_type('torch.DoubleTensor')


In [2]:
sample_space = (1000000, 100)
ss, N, d = 1600, 10, 2

try:
    euclidean_data1 = utils.load_variable('data/euclidean_data1.pkl')
    euclidean_data2 = utils.load_variable('data/euclidean_data2.pkl')
    
    rand_data1 = utils.load_variable('data/rand_data.pkl')
    rand_data2 = utils.load_variable('data/rand_data.pkl')
    
    if euclidean_data.size() != (ss, 1, N, N):
        print("Updated data for requirement !")
        raise Exception("Previous data not match requirement !")

except:
    euclidean_data1 = utils.generate_euclidean_DM(
        N=N, d=d,
        sample_size=ss,
        sample_space=sample_space, isInt=True)

    euclidean_data2 = utils.generate_euclidean_DM(
        N=N, d=d,
        sample_size=ss,
        sample_space=sample_space, isInt=True)

    rand_data1 = utils.generate_rand_DM(
        N=N,
        sample_size=ss,
        sample_space=sample_space, isInt=True)

    rand_data2 = utils.generate_rand_DM(
        N=N,
        sample_size=ss,
        sample_space=sample_space, isInt=True)

    utils.dump_variable(euclidean_data1, 'data/euclidean_data1.pkl')
    utils.dump_variable(euclidean_data2, 'data/euclidean_data2.pkl')

    utils.dump_variable(rand_data1, 'data/rand_data1.pkl')
    utils.dump_variable(rand_data2, 'data/rand_data2.pkl')

In [87]:
test_batch = 1000
test_data = utils.generate_rand_DM(
                N=N,
                sample_size=test_batch, 
                sample_space=sample_space, isInt=True, 
                v_size=2, 
                transform_func=lambda x: x[0] + x[1])

test_data = test_data.view(test_batch, 1, N, N)
test_data = utils.minmax_norm(test_data, dmin=0)[0]


# %%
cmds_loss, fastmap_loss = [], []

for d in test_data:

    d1 = numpy.array(d[0].data)

    cmds_rs = classicalMDS(d1, 2)
    cmds_rs = torch.tensor(cmds_rs)

    cmds_dm = utils.get_distance_matrix(cmds_rs)
    cmds_dm = utils.minmax_norm(cmds_dm, dmin=0)[0]

    cmds_loss.append(torch.sum((cmds_dm - d)** 2))

    fastmap_rs = fastmap(d1, 2)
    fastmap_rs = torch.tensor(fastmap_rs)
    
    fastmap_dm = utils.get_distance_matrix(fastmap_rs)
    fastmap_dm = utils.minmax_norm(fastmap_dm, dmin=0)[0]

    fastmap_loss.append(torch.sum((fastmap_dm - d)** 2))


print("cmds_loss: \t", 
        torch.tensor(cmds_loss).sum(), "|" , 
        len(cmds_loss), "success")

print("fastmap_loss: \t", 
        torch.tensor(fastmap_loss).sum(), "|" , 
        len(fastmap_loss), "success")


# cmds_loss: 	 tensor(12066.6691) | 1000 success
# fastmap_loss: 	 tensor(73897.4167) | 1000 success

cmds_loss: 	 tensor(3384.4374) | 1000 success
fastmap_loss: 	 tensor(8198.4327) | 1000 success


In [90]:


def test_model(model, preprocess, test_data, target_dm):

    t_data = preprocess(test_data)
    t_data = utils.minmax_norm(t_data, dmin=0)[0]

    model_rs = model(t_data)
    # model_rs = model_rs.view(model_rs.size()[0], *target_dm)
    model_dm = torch.stack([
        utils.unvectorize_distance(m, N) for m in model_rs
    ])
    
    score = []

    for dm in model_dm:

        rs = classicalMDS(dm[0].data, 2)
        rs = torch.tensor(rs)

        rs_dm = utils.get_distance_matrix(rs)
        rs_dm = utils.minmax_norm(rs_dm, dmin=0)[0]

        loss = (rs_dm - dm.view_as(rs_dm)) ** 2

        score.append(torch.sum(loss))

    return torch.tensor(score).sum()

    # loss = loss.view(loss.size()[0], 1, -1)
    # losssumMSE = torch.sum(loss, dim=2) ** 2
    # return torch.sum(losssumMSE)


result_score = []

for filepath in glob.iglob('backup/AE*.model'):
    h: trainHelper.TrainHelper = utils.load_variable(filepath)
    result_score.append([
        filepath, 
        test_model(h.model, h.preprocess, test_data, (N, 2))
        ])

result_score = sorted(result_score, key=lambda x: x[1])

for rss in result_score[:10]:
    print(rss[0], "   \t\t\t|", rss[1].data)

backup\AE_SGD_3_64_distance_900.model    			| tensor(2310.0738)
backup\AE_SGD_2_64_distance_900.model    			| tensor(2374.3670)
backup\AE_SGD_3_64_distance_600.model    			| tensor(2427.4779)
backup\AE_SGD_2_64_distance_600.model    			| tensor(2486.6837)
backup\AE_SGD_2_32_distance_900.model    			| tensor(2656.1486)
backup\AE_SGD_2_64_distance_300.model    			| tensor(2729.6968)
backup\AE_SGD_2_32_distance_600.model    			| tensor(2810.5057)
backup\AE_SGD_2_32_distance_300.model    			| tensor(3226.2111)
backup\AE_SGD_3_64_distance_300.model    			| tensor(3249.5551)
backup\AE_SGD_3_32_distance_900.model    			| tensor(3954.2415)


In [3]:
data = torch.stack([
            euclidean_data1,
            euclidean_data2,
            # rand_data1,
            # rand_data2,
        ])
        
data = data.view(ss * 2, 1, N, N)

data = utils.minmax_norm(data, dmin=0)[0]

batch = 32
dlr = DataLoader(data, batch_size=batch, shuffle=True)

In [36]:
def MSES(model_dm, target_dm):

    loss = (model_dm - target_dm.view_as(model_dm)) ** 2
    loss = loss.view(loss.size()[0], 1, -1)

    return torch.sum(torch.sum(loss, dim=2) ** 2)

def preprocess(x):
    return torch.tensor(
        utils.vectorize_distance_from_DM(x).data, requires_grad=True)

for neuron in [16, 32, 64]:

    for i in range(2, 6):

        model_id = "AE_SGD_" + str(i)+ "_" + str(neuron) + "_distance"

        in_dim = int((N * N - N) /2) 
        out_dim = N * 2

        model = AutoEncoder([in_dim, *[neuron for j in range(i)], out_dim],
                 final_activation=None)

        optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
        
        lossFun = MSES

        helper = trainHelper.TrainHelper(id=model_id,
            model=model, 
            optimizer=optimizer, 
            preprocess=preprocess,
            lossFun=lossFun, lr_factor=0.1)
        
        for i in range(3):

            EPOCH = 300
            
            print("Training ", helper.id)

            helper.train(dlr, EPOCH, print_on_each=10)
            helper.backup()

            print("Time used for the training: ", 
                            helper.records['train_time'].sum(), "s")    
            
            print("Test Result: ", 
                            test_model(helper.model.encode, helper.preprocess, test_data, (N, 2)))


 Mean loss: 315.31459514641705
510 	| Mean loss: 315.3145841745517
520 	| Mean loss: 315.3145847307575
530 	| Mean loss: 315.31458143870543
540 	| Mean loss: 315.314577852435
550 	| Mean loss: 315.3145640460267
560 	| Mean loss: 315.3145686338578
570 	| Mean loss: 315.31456886220866
580 	| Mean loss: 315.3145646147291
590 	| Mean loss: 315.31456089831426
Time used for the training:  212.30386300000004 s
Test Result:  tensor(13558.5307, grad_fn=<SumBackward0>)
Training  AE_SGD_4_32_distance
600 	| Mean loss: 315.3145578244642
610 	| Mean loss: 315.3145590783664
620 	| Mean loss: 315.3145566692312
630 	| Mean loss: 315.3145489875231
640 	| Mean loss: 315.3145505006942
650 	| Mean loss: 315.31454035557704
660 	| Mean loss: 315.314532603628
670 	| Mean loss: 315.3145296792866
680 	| Mean loss: 315.3145298528115
690 	| Mean loss: 315.31452619208284
700 	| Mean loss: 315.31452256535744
710 	| Mean loss: 315.3145171905174
720 	| Mean loss: 315.3145183338085
730 	| Mean loss: 315.3145165027522

In [None]:

utils.plot_records(
    helper.records[
        ['loss_mean', 'loss_max', 'loss_min']
    ].to_dict(orient='list'), 
    helper.epoch, value_label='loss')

utils.plot_records(
    helper.records[['train_time']].to_dict(orient='list'), 
    helper.epoch, value_label='train_time')

