In [1]:
initial_path = 'peptide-QML'
# initial_path = '..'

In [2]:
import sys, time, pickle
sys.path.append(initial_path)

%load_ext autoreload
%autoreload 2
from my_code import helper_classes as c
from my_code import quantum_nodes as q
# from my_code import pytorch_model as m

In [3]:
def time_left(time_start, n_epochs_total, n_batches_total, n_epochs_done, current_batch):
    time_left = (time.time() - time_start) * (n_epochs_total*n_batches_total / (n_epochs_done*n_batches_total + current_batch) - 1)
    total_hours = int(time_left // 3600)
    total_minutes = int((time_left - total_hours * 3600) // 60)
    total_seconds = int(time_left - total_hours * 3600 - total_minutes * 60)

    # remaining time for the current epoch
    time_left_epoch = (time.time() - time_start) / (n_epochs_done*n_batches_total + current_batch) * (n_batches_total - current_batch)
    epoch_hours = int(time_left_epoch // 3600)
    epoch_minutes = int((time_left_epoch - epoch_hours * 3600) // 60)
    epoch_seconds = int(time_left_epoch - epoch_hours * 3600 - epoch_minutes * 60)

    return epoch_hours, epoch_minutes, epoch_seconds, total_hours, total_minutes, total_seconds

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F



# Define the VAE model
class VAE(nn.Module):

    N_EMB = 18
    RNN_types_dict = {'LSTM': nn.LSTM, 'GRU': nn.GRU, 'RNN': nn.RNN}

    def __init__(self, emb_dim:int, latent_dim:int, output_dim:int, RNN_type:str, RNN_units:list, bidirectional:bool=True, dropout:float=0, num_layers:int=2, one_hot:bool=False):
        super(VAE, self).__init__()

        # Define the hyperparameters
        self.emb_dim = emb_dim
        self.latent_dim = latent_dim
        self.output_dim = output_dim
        self.RNN_type = RNN_type
        self.RNN_units = RNN_units if isinstance(RNN_units, list) else [RNN_units]
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.num_layers = num_layers
        self.one_hot = one_hot
        # TODO: add temperature for softmax

        # Encoder and Decoder
        self.encoder = VAE.Encoder(emb_dim, latent_dim, RNN_type, RNN_units, bidirectional, dropout, num_layers)
        self.decoder = VAE.Decoder(latent_dim, output_dim, RNN_type, RNN_units[::-1], bidirectional, dropout, num_layers, one_hot)

        # score predictor
        self.score_predictor = q.circuit(
            n_qubits = 4, 
            device = "default.qubit.torch",
            device_options = {'shots': None},
            embedding = q.parts.AmplitudeEmbedding,
            # embedding_ansatz = sweep_point['ansatz'],
            block_ansatz = q.parts.Ansatz_11,
            final_ansatz = q.parts.Ansatz_11, 
            measurement = q.parts.Measurement('Z', 1),
            # embedding_n_layers = sweep_point['embedding_n_layers'],
            # different_inputs_per_layer = False,
            block_n_layers = 10,
            # wrapper_qlayer = pw.QLayerEmpty,
        )()

    # Define the Encoder
    class Encoder(nn.Module):
        def __init__(self, emb_dim:int, latent_dim:int, RNN_type:str, RNN_units:int, bidirectional:bool, dropout:float, num_layers:int):
            super(VAE.Encoder, self).__init__()
            rnn_layer = VAE.RNN_types_dict[RNN_type]
            lstm_out_dim = RNN_units[-1] * 2 if bidirectional else RNN_units[-1]
            self.RNN_units = RNN_units

            self.embedding = nn.Embedding(VAE.N_EMB, emb_dim)

            in_units = emb_dim
            for i, out_units in enumerate(RNN_units):
                setattr(self, f'lstm_{i}', rnn_layer(in_units, out_units, batch_first=True, bidirectional=bidirectional, dropout=dropout, num_layers=num_layers)) 
                in_units = out_units * 2 if bidirectional else out_units

            self.fc_mean = nn.Linear(lstm_out_dim, latent_dim)
            self.fc_log_var = nn.Linear(lstm_out_dim, latent_dim)
        
        def forward(self, x):
            x = self.embedding(x)
            for i in range(len(self.RNN_units)):
                x, _ = getattr(self, f'lstm_{i}')(x)
            x = x[:, -1, :]  # Take the last time step output
            z_mean = self.fc_mean(x)
            z_log_var = self.fc_log_var(x)
            return z_mean, z_log_var

    # Define the Decoder
    class Decoder(nn.Module):
        def __init__(self, latent_dim:int, output_dim:int, RNN_type:str, RNN_units:int, bidirectional:bool, dropout:float, num_layers:int, one_hot:bool):
            super(VAE.Decoder, self).__init__()
            self.output_dim = output_dim
            self.one_hot = one_hot
            rnn_layer = VAE.RNN_types_dict[RNN_type]
            lstm_out_dim = RNN_units[-1] * 2 if bidirectional else RNN_units[-1]
            self.RNN_units = RNN_units

            self.fc_pre = nn.Linear(latent_dim, RNN_units[0])

            in_units = RNN_units[0]
            for i, out_units in enumerate(RNN_units):
                setattr(self, f'lstm_{i}', rnn_layer(in_units, out_units, batch_first=True, bidirectional=bidirectional, dropout=dropout, num_layers=num_layers)) 
                in_units = out_units * 2 if bidirectional else out_units

            self.fc_post = nn.Linear(lstm_out_dim, output_dim if not one_hot else output_dim*VAE.N_EMB)
        
        def forward(self, x):
            x = self.fc_pre(x)
            x = x.unsqueeze(1).repeat(1, self.output_dim, 1)
            
            for i in range(len(self.RNN_units)):
                x, _ = getattr(self, f'lstm_{i}')(x)
                
            x = self.fc_post(x[:, -1, :])
            if not self.one_hot:
                x = F.relu(x)
            else:
                x = x.view(-1, self.output_dim, VAE.N_EMB)
                x = F.softmax(x, dim=-1)       
            return x

    @staticmethod
    def reparameterize(z_mean, z_log_var):
        epsilon = torch.randn_like(z_mean)
        return z_mean + torch.exp(0.5 * z_log_var) * epsilon
    
    @staticmethod
    def loss_function(reconstructed, x, mu=None, logvar=None, one_hot:bool=False, reduction:str='sum', kl_weight:float=1.0):
        if not one_hot:
            reconstruction_loss = F.mse_loss(reconstructed, x.float(), reduction=reduction)
        else:
            reconstruction_loss = F.cross_entropy(reconstructed.view(-1, VAE.N_EMB), x.view(-1).long(), reduction=reduction)
            
        if mu is None or logvar is None or kl_weight == 0:
            return reconstruction_loss
        
        kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())  
        return reconstruction_loss + kl_divergence*kl_weight
    
    @staticmethod
    def process_decoded_outputs(reconstructed, one_hot:bool=False):
        if not one_hot:
            return torch.round(reconstructed).int()
        else:
            return torch.argmax(reconstructed, dim=-1)
    
    def forward(self, x):
        z_mean, z_log_var = self.encoder(x)
        z = self.reparameterize(z_mean, z_log_var)
        decoded_sequence = self.decoder(z)
        score = self.score_predictor(z)
        return decoded_sequence, z_mean, z_log_var, score


In [5]:
128*4

512

In [6]:
sweep_points = [
    {
        'emb_dim': 512,
        'latent_dim': 6,
        'RNN_type': 'LSTM',
        'RNN_units': [512, 512],
        'bidirectional': True,
        'dropout': 0,
        'num_layers': 1,
        'one_hot': False,
        'lr': 3e-4,
        'n_epochs': 10,
        'save_name': 'vae_6.pickle'
    },
    {
        'emb_dim': 512,
        'latent_dim': 16,
        'RNN_type': 'LSTM',
        'RNN_units': [512],
        'bidirectional': True,
        'dropout': 0.05,
        'num_layers': 2,
        'one_hot': False,
        'lr': 3e-4,
        'n_epochs': 1000,
        'save_name': 'vae_L16.pickle'
    },
]

In [7]:
for sweep_point in sweep_points:

    print('\n\n\n ## ----- NEW SWEEP POINT ----- ## \n') 
    print(sweep_point, '\n')   

    # Create the VAE model and get data
    device = "cuda:0"

    data = c.Data.load(initial_path=initial_path, file_name='PET_SCORES_12')
    data.set_test_ptc(0.1), data.to(device)
    data_loader = data.get_loader(batch_size=64, shuffle=True)

    # Define model
    vae_model = VAE(
        emb_dim = sweep_point['emb_dim'],
        latent_dim = sweep_point['latent_dim'],
        output_dim = len(data.x_train[0]), #12
        RNN_type = sweep_point['RNN_type'],
        RNN_units = sweep_point['RNN_units'],
        bidirectional = sweep_point['bidirectional'],
        dropout = sweep_point['dropout'],
        num_layers = sweep_point['num_layers'],
        one_hot = sweep_point['one_hot'],
    ).to(device)

    # Define your optimizer and loss function
    optimizer = optim.Adam(vae_model.parameters(), lr=3e-4)


    #training
    vae_model.loss_list_batch, vae_model.loss_list_epoch, vae_model.loss_list_epoch_test, vae_model.accuracy_list_epoch_test = [], [], [], []
    time_start, n_epochs, n_batches = time.time(), sweep_point['n_epochs'], len(data_loader)
    for epoch in range(n_epochs):

        vae_model.train()
        for i, (x, y) in enumerate(data_loader):

            #train
            optimizer.zero_grad()
            x_reconstructed, mu, logvar = vae_model(x)
            loss = VAE.loss_function(x_reconstructed, x, mu, logvar, one_hot=vae_model.one_hot, reduction='sum')
            loss.backward()
            optimizer.step()

            #time and print
            h, m, s, th, tm, ts = time_left(time_start, n_epochs, n_batches, epoch, i+1)

            #loss
            loss = loss.item()
            vae_model.loss_list_batch.append(loss)

            #print
            print(f'Epoch {epoch+1}/{n_epochs}, batch {i+1}/{n_batches}, loss={loss/data_loader.batch_size:.4f}, \t total time left = {th}h {tm}m {ts}s, \t epoch time left = {h}h {m}m {s}s                         ', end='\r')

        #validation
        vae_model.eval()
        with torch.no_grad():
            x_reconstructed, mu, logvar = vae_model(data.x_test_ptc)
            loss_test = VAE.loss_function(x_reconstructed, data.x_test_ptc, mu, logvar, one_hot=vae_model.one_hot, reduction='sum')
            vae_model.loss_list_epoch_test.append(loss_test.item() / len(data.x_test_ptc))

            prediction = VAE.process_decoded_outputs(x_reconstructed, one_hot=vae_model.one_hot)
            accuracy = (prediction == data.x_test_ptc).float().mean().item()
            vae_model.accuracy_list_epoch_test.append(accuracy)

        #save
        pickle.dump(vae_model, open(initial_path+'/saved/Pickle/'+sweep_point['save_name'], 'wb'))

        # print loss on test set
        vae_model.loss_list_epoch.append(sum(vae_model.loss_list_batch[-n_batches:]) / (n_batches*data_loader.batch_size))
        print(f'Epoch {epoch+1}/{n_epochs}, \t loss={vae_model.loss_list_epoch[-1]:.6f}, \t loss test={vae_model.loss_list_epoch_test[-1]:.6f}, \t accuracy test={vae_model.accuracy_list_epoch_test[-1]:.6f},                                          ', end='\n')
        
        torch.cuda.synchronize()
        torch.cuda.empty_cache()




 ## ----- NEW SWEEP POINT ----- ## 

{'emb_dim': 512, 'latent_dim': 6, 'RNN_type': 'LSTM', 'RNN_units': [512, 512], 'bidirectional': True, 'dropout': 0, 'num_layers': 1, 'one_hot': False, 'lr': 0.0003, 'n_epochs': 10, 'save_name': 'vae_6.pickle'} 



Epoch 1/10, 	 loss=101.309856, 	 loss test=52.574729, 	 accuracy test=0.268745,                                                   
Epoch 2/10, 	 loss=49.296483, 	 loss test=46.894972, 	 accuracy test=0.288736,                                                   
Epoch 3/10, 	 loss=43.948999, 	 loss test=42.476243, 	 accuracy test=0.317935,                                                   
Epoch 4/10, batch 2762/5359, loss=40.4994, 	 total time left = 0h 11m 51s, 	 epoch time left = 0h 0m 53s                         

KeyboardInterrupt: 

In [None]:
vae_model

VAE(
  (encoder): Encoder(
    (embedding): Embedding(18, 512)
    (lstm_0): LSTM(512, 32, batch_first=True, bidirectional=True)
    (lstm_1): LSTM(64, 512, batch_first=True, bidirectional=True)
    (fc_mean): Linear(in_features=1024, out_features=6, bias=True)
    (fc_log_var): Linear(in_features=1024, out_features=6, bias=True)
  )
  (decoder): Decoder(
    (fc_pre): Linear(in_features=6, out_features=512, bias=True)
    (lstm_0): LSTM(512, 512, batch_first=True, bidirectional=True)
    (lstm_1): LSTM(1024, 32, batch_first=True, bidirectional=True)
    (fc_post): Linear(in_features=64, out_features=12, bias=True)
  )
)

In [None]:
# def check_if_tensor_in_data(data, tensor):
#     in_test = torch.any(torch.all(data.x_test == tensor, dim=-1))
#     in_train = torch.any(torch.all(data.x_train == tensor, dim=-1))
#     return in_test or in_train

# def get_random_tensor(data, length=12, max_int=17):
#     tensor = torch.randint(0, max_int+1, (length,))
#     while check_if_tensor_in_data(data, tensor):
#         tensor = torch.randint(0, max_int+1, (length,))
#     return tensor

# def get_random_tensors(data, n_tensors, length=12, max_int=17):
#     tensors = []
#     for i in range(n_tensors):
#         print(i+1, '', end='\r')
#         tensors.append(get_random_tensor(data, length, max_int))
#     return torch.stack(tensors)

# data.to('cpu')
# random_x = get_random_tensors(data, n_tensors=100000, length=12, max_int=17)
# zeros_y = torch.zeros(len(random_x))

# #change type of tensors of random_x and zeros_y to old_x.dtype and old_y.dtype
# random_x = random_x.to(data.x_train.dtype)
# zeros_y = zeros_y.to(data.y_train.dtype)

# new_x = torch.cat((data.x_train.to('cpu'), random_x.to('cpu')), dim=0).to(device)
# new_y = torch.cat((data.y_train.to('cpu'), zeros_y.to('cpu')), dim=0).to(device)

# from torch.utils.data import DataLoader, TensorDataset
# new_data_loader = DataLoader(TensorDataset(new_x, new_y), batch_size=data_loader.batch_size, shuffle=True)

# data.to(device)

In [None]:
data.x_test[:100]

tensor([[12, 16, 11,  ..., 16, 10,  9],
        [ 5,  5,  2,  ..., 10, 13, 11],
        [ 5, 13, 16,  ..., 13,  4, 11],
        ...,
        [ 3,  5,  8,  ...,  7,  0, 12],
        [16, 16,  0,  ...,  5,  4,  4],
        [16, 12, 12,  ..., 11, 17,  5]], device='cuda:0', dtype=torch.int32)

In [None]:
x_reconstructed_test, mu_test, logvar_test = vae_model(data.x_test[:100])
x_reconstructed_test

tensor([[14.1442, 15.6111, 12.0678,  ..., 15.9444, 10.2537, 12.3579],
        [ 3.8819,  4.3333,  3.6080,  ..., 13.7256, 11.5863, 13.3234],
        [ 4.3966, 14.2658, 16.4740,  ..., 11.7938,  2.7009, 12.2304],
        ...,
        [ 3.0459,  5.2259,  7.3096,  ...,  5.0524,  1.6652, 13.8624],
        [15.0855, 16.4794,  0.0000,  ...,  4.8057,  4.4658,  3.9144],
        [15.3774, 11.2329, 11.7390,  ..., 11.8292, 13.1665,  4.0878]],
       device='cuda:0', grad_fn=<ReluBackward0>)

In [None]:
loss = VAE.loss_function(x_reconstructed_test, data.x_test[:100], mu_test, logvar_test, one_hot=vae_model.one_hot, reduction='mean')
print(f'Loss on test set: {loss.item():.6f}')

Loss on test set: 1780.327393


In [None]:
pred = VAE.process_decoded_outputs(x_reconstructed_test, one_hot=vae_model.one_hot)
pred

tensor([[14, 16, 12,  ..., 16, 10, 12],
        [ 4,  4,  4,  ..., 14, 12, 13],
        [ 4, 14, 16,  ..., 12,  3, 12],
        ...,
        [ 3,  5,  7,  ...,  5,  2, 14],
        [15, 16,  0,  ...,  5,  4,  4],
        [15, 11, 12,  ..., 12, 13,  4]], device='cuda:0', dtype=torch.int32)

In [None]:
accuracy = (pred == data.x_test[:100]).float().mean().item()
accuracy

0.30000001192092896

In [None]:
for x, p in zip(data.x_test, pred):
    print(x.tolist(), p.tolist())

[12, 16, 11, 3, 12, 3, 2, 1, 13, 16, 10, 9] [14, 16, 12, 3, 12, 2, 4, 3, 14, 16, 10, 12]
[5, 5, 2, 5, 15, 3, 16, 15, 16, 10, 13, 11] [4, 4, 4, 4, 14, 4, 16, 14, 15, 14, 12, 13]
[5, 13, 16, 16, 3, 7, 2, 7, 10, 13, 4, 11] [4, 14, 16, 16, 2, 7, 3, 7, 13, 12, 3, 12]
[10, 3, 10, 0, 8, 3, 5, 2, 4, 16, 5, 11] [13, 4, 9, 3, 10, 2, 5, 4, 6, 16, 7, 12]
[17, 16, 11, 7, 11, 2, 3, 16, 12, 0, 10, 2] [16, 16, 11, 8, 13, 1, 3, 15, 13, 2, 13, 3]
[16, 16, 1, 10, 5, 15, 6, 10, 12, 16, 5, 11] [15, 15, 0, 11, 3, 15, 5, 10, 13, 15, 4, 12]
[12, 16, 11, 11, 0, 9, 15, 5, 7, 16, 3, 2] [15, 16, 11, 15, 0, 11, 15, 6, 8, 14, 2, 2]
[3, 5, 11, 5, 13, 13, 0, 13, 17, 17, 0, 1] [5, 4, 10, 5, 13, 14, 2, 12, 15, 15, 4, 4]
[2, 14, 3, 5, 8, 13, 6, 5, 2, 7, 0, 6] [4, 16, 3, 5, 9, 14, 3, 4, 4, 7, 1, 4]
[16, 5, 14, 9, 3, 1, 0, 16, 3, 17, 5, 14] [15, 3, 15, 10, 3, 3, 3, 15, 5, 14, 5, 13]
[12, 5, 0, 16, 14, 2, 11, 6, 11, 16, 4, 9] [12, 5, 2, 16, 12, 3, 10, 3, 12, 16, 4, 10]
[13, 7, 12, 2, 0, 6, 6, 2, 9, 5, 13, 3] [13, 5, 12, 1,

In [None]:
for x, p in zip(data.x_test, x_reconstructed_test):
    print(x.tolist(), p.tolist())

[12, 16, 11, 3, 12, 3, 2, 1, 13, 16, 10, 9] [14.144230842590332, 15.611113548278809, 12.067825317382812, 3.142987012863159, 11.509044647216797, 1.5868299007415771, 4.047634601593018, 3.0503456592559814, 14.309393882751465, 15.94442367553711, 10.253666877746582, 12.35794734954834]
[5, 5, 2, 5, 15, 3, 16, 15, 16, 10, 13, 11] [3.8819241523742676, 4.333313465118408, 3.607973337173462, 3.751281261444092, 13.501014709472656, 3.7417407035827637, 15.623453140258789, 13.998591423034668, 14.970654487609863, 13.72558307647705, 11.586260795593262, 13.32337760925293]
[5, 13, 16, 16, 3, 7, 2, 7, 10, 13, 4, 11] [4.396600723266602, 14.265751838684082, 16.47396469116211, 16.02397346496582, 1.8799333572387695, 7.235604763031006, 3.362311601638794, 6.559706211090088, 13.185684204101562, 11.79377269744873, 2.7009334564208984, 12.230436325073242]
[10, 3, 10, 0, 8, 3, 5, 2, 4, 16, 5, 11] [13.027217864990234, 3.769914150238037, 8.802730560302734, 3.3196163177490234, 9.94933795928955, 2.28277850151062, 5.0766