In [1]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import lightning as L

## Dataloader

In [2]:
training_path="/pscratch/sd/l/lemonboy/PDB70_training_ver_A/eigenvalue_training/saxs_r/"

In [3]:
test=os.path.join(training_path,'6LN0_A.pdb.pr.csv')

In [4]:
pd.read_csv(test)

Unnamed: 0,r,P(r)
0,0.0,0.000000e+00
1,0.5,0.000000e+00
2,1.0,0.000000e+00
3,1.5,3.420565e-04
4,2.0,1.758611e-04
...,...,...
240,120.0,1.840159e-07
241,120.5,7.102368e-08
242,121.0,3.551184e-08
243,121.5,6.456698e-09


In [5]:
class SAXSDataset(Dataset):
    def __init__(self, csv_list):
        self.csv_list = csv_list
        
    def __len__(self):
        return len(self.csv_list)
    def __getitem__(self, idx):
        data = pd.read_csv(self.csv_list[idx])
        # The first point is always zero so I didn't include it into the dataset
        features = torch.tensor(np.pad(data['P(r)'].values[1:], (0, 512-len(data['P(r)'].values[1:])),constant_values=(0,0)), dtype=torch.float32)
        return features, features


In [6]:
import glob
csv_list = glob.glob(training_path+'*.csv')

In [7]:
#max_length=[]
#for i in csv_list:
    #pd_data=pd.read_csv(i)
    #max_length.append(len(pd_data))
    
#print(max(max_length))
#max_length.index(3179)
#indices_of_largest_10 = sorted(range(len(max_length)), key=lambda i: max_length[i], reverse=True)[:10]
#for i in indices_of_largest_10:
    #print(max_length[i])
#csv_list[42033]

In [8]:
batch_size = 32
shuffle = True
validation_split= 0.2

In [9]:
dataset = SAXSDataset(csv_list)

In [10]:
num_samples = len(dataset)
num_validation_samples = int(validation_split * num_samples)
num_train_samples = num_samples - num_validation_samples

In [11]:
train_dataset, val_dataset = random_split(dataset, [num_train_samples, num_validation_samples])

In [12]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [13]:
train_dataset

<torch.utils.data.dataset.Subset at 0x7ff2362bd940>

In [14]:
#for batch_id, (data, target) in enumerate(train_dataloader):
#    print(batch_id)
#    print("datasize is %d" % len(data))
#    print("y size is %d" % len(target))
    

## VAE model

In [15]:
'''
class VAE(nn.Module):
    # For P(r) the latent_size should be between 6-12. Longer sequence should have a larger
    # latent. For testing purpose we will set latent_size as 10
    
    def __init__(self,input_size=245, hidden_size=20, latent_size=10):
        super(VAE, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.latent_size = latent_size
        self.encoder_conv1 = nn.Conv1d(in_channels=1, out_channels=hidden_size,
                                      kernel_size=3, stride=1, padding=1)
        # The output should be 244,20
        self.encoder_avgpool = nn.AvgPool1d(kernal_size=4, stride=4)
        # The output shoule be 61,20
        self.encoder_fc_mu = nn.Linear(hidden_size, latent_size)
        self.encoder_fc_var = nn.Linear(hidden_size, latent_size)
        
        self.decoder_fc1 = nn.Linear(latent_size, hidden_size)
        self.decoder_fc2 = nn.Linear(hidden_size, input_size)
        
    def encode(self, x):
        x = F.relu(self.encoder_conv1(x.unsqueeze(1)))
        x = self.encoder_maxpool(x)
        mu = self.encoder_fc_mu(x)
        log_var self.encoder_fc_var(x)
    
    def decode(self, z):
        z = F.relu(self.decoder_fc1(z))
        return torch.sigmoid(self.decoder_fc2(z))
    
    def forward(self, x):

        x_encoded = self.encoder(x)
        mu, log_var = self.fc_mu(x_encoded), self.fc_var(x_encoded)

        std = torch.exp(log_var / 2)
        q = torch.distributions.Normal(mu, std)
        z = q.rsample()
        
        x_hat = self.decoder(z)
'''     

'\nclass VAE(nn.Module):\n    # For P(r) the latent_size should be between 6-12. Longer sequence should have a larger\n    # latent. For testing purpose we will set latent_size as 10\n    \n    def __init__(self,input_size=245, hidden_size=20, latent_size=10):\n        super(VAE, self).__init__()\n        self.input_size = input_size\n        self.hidden_size = hidden_size\n        self.latent_size = latent_size\n        self.encoder_conv1 = nn.Conv1d(in_channels=1, out_channels=hidden_size,\n                                      kernel_size=3, stride=1, padding=1)\n        # The output should be 244,20\n        self.encoder_avgpool = nn.AvgPool1d(kernal_size=4, stride=4)\n        # The output shoule be 61,20\n        self.encoder_fc_mu = nn.Linear(hidden_size, latent_size)\n        self.encoder_fc_var = nn.Linear(hidden_size, latent_size)\n        \n        self.decoder_fc1 = nn.Linear(latent_size, hidden_size)\n        self.decoder_fc2 = nn.Linear(hidden_size, input_size)\n        \n

## Self-Attention

In [16]:
device=torch.device("cuda")

In [17]:
x=np.random.rand(100)

In [18]:
x.shape

(100,)

In [19]:
#h_=torch.bmm(x,x)

In [20]:
class SAXS_to_Eigen(nn.Module):
    def __init__(self, hidden_features, output_dim):
        super().__init__()
        self.channels=hidden_features
        #self.upscale = nn.Linear(1,hidden_features)
        self.q = nn.Linear(1, hidden_features)
        self.k = nn.Linear(1, hidden_features)
        self.v = nn.Linear(1, hidden_features)
        self.out_layer = nn.Linear(hidden_features, output_dim)
        #self.orthognal_vector = nn.utils.parametrizations.orthogonal(nn.Linear(output_dim,output_dim))
    def forward(self, x):
        h_ = x[:, :, np.newaxis]
        #print(x.shape, h_.shape)
        #h_ = self.upscale(h_)
        q = self.q(h_)
        k = self.k(h_)
        v = self.v(h_)
        w_ = torch.bmm(q,k.permute(0,2,1))
        w_ = w_ * (self.channels**(-0.5))
        w_ = torch.nn.functional.softmax(w_,dim=2)
        h_ = torch.bmm(w_,v)
        h_ = self.out_layer(h_)
        h_ = nn.ReLU()(h_)
        #Q = self.orthognal_vector.weight
        print(h_.shape)
        return h_
        #return torch.matmul(h_,Q)
        #return torch.matmul(h_,Q)

In [21]:
class SAXS_to_Eigen_Cov(nn.Module):
    def __init__(self, hidden_features, output_dim):
        super().__init__()
        self.channels=hidden_features
        self.q = nn.Conv1d(in_channels=1, out_channels=hidden_features, kernel_size=1)
        self.k = nn.Conv1d(in_channels=1, out_channels=hidden_features, kernel_size=1)
        self.v = nn.Conv1d(in_channels=1, out_channels=hidden_features, kernel_size=1)
        self.out_layer = nn.Linear(hidden_features, output_dim)
    def forward(self, x):
        h_ = x[:, np.newaxis, :]
        #print(x.shape, h_.shape)
        #h_ = self.upscale(h_)
        q = self.q(h_)
        k = self.k(h_)
        v = self.v(h_)
        print(k.shape)
        w_ = torch.bmm(q.permute(0,2,1),k)
        w_ = w_ * (self.channels**(-0.5))
        w_ = torch.nn.functional.softmax(w_,dim=2)
        print(w_.shape)
        print(v.shape)
        h_ = torch.bmm(w_,v.permute(0,2,1))   
        h_ = self.out_layer(h_)
        h_ = nn.ReLU()(h_)
        print(h_.shape)
        return h_

In [37]:
class SAXS_to_Eigen_Cov(nn.Module):
    def __init__(self, input_shape, hidden_features, output_dim):
        super().__init__()
        self.channels=hidden_features
        self.q = nn.Conv1d(in_channels=1, out_channels=hidden_features, kernel_size=1)
        self.k = nn.Conv1d(in_channels=1, out_channels=hidden_features, kernel_size=1)
        self.v = nn.Conv1d(in_channels=1, out_channels=hidden_features, kernel_size=1)
        self.out_layer = nn.Linear(hidden_features, output_dim)
        self.out_layer_2 = nn.Linear(input_shape, output_dim)
        
    def gram_schmidt(self, vv):
        
        def projection(u, v):
            return (v * u).sum() / (u * u).sum() * u
        
        batch_size = vv.size(0)
        nk = vv.size(1)

        uu = torch.zeros_like(vv, device=vv.device)
        for i in range(batch_size):
            ui = vv[i].clone()
            uu[i, :, 0] = ui[:, 0].clone()
            for k in range(1, nk):
                vk = vv[i, k].clone()
                uk = 0
                for j in range(0, k):
                    uj = uu[i, :, j].clone()
                    uk = uk + projection(uj, vk)
                uu[i, :, k] = vk - uk
            for k in range(nk):
                uk = uu[i, :, k].clone()
                uu[i, :, k] = uk / uk.norm()
        return uu

    def forward(self, x):
        h_ = x[:, np.newaxis, :]
        #print(x.shape, h_.shape)
        #h_ = self.upscale(h_)
        q = self.q(h_)
        k = self.k(h_)
        v = self.v(h_)
        print(k.shape)
        w_ = torch.bmm(q.permute(0,2,1),k)
        w_ = w_ * (self.channels**(-0.5))
        w_ = torch.nn.functional.softmax(w_,dim=2)
        print(w_.shape)
        print(v.shape)
        h_ = torch.bmm(w_,v.permute(0,2,1))   
        h_ = self.out_layer(h_)
        h_ = self.out_layer_2(h_.permute(0,2,1))
        h_ = self.gram_schmidt(h_)
        return h_

In [36]:
import torch

def gram_schmidt_batch(vv):
    def projection(u, v):
        return (v * u).sum() / (u * u).sum() * u

    batch_size = vv.size(0)
    nk = vv.size(1)
    dim = vv.size(2)

    uu = torch.zeros_like(vv)
    for i in range(batch_size):
        # Apply Gram-Schmidt process to each sample in the batch
        u = vv[i].clone()
        uu[i, :, 0] = u[:, 0].clone()
        for k in range(1, nk):
            v = vv[i, k].clone()
            uk = 0
            for j in range(0, k):
                uj = uu[i, :, j].clone()
                uk = uk + projection(uj, v)
            uu[i, :, k] = v - uk
        for k in range(nk):
            uk = uu[i, :, k].clone()
            uu[i, :, k] = uk / uk.norm()
    return uu

# Example usage
batch_data = torch.randn(2, 256, 256)  # Example batch data with dimensions (2, 256, 256)
processed_batch = gram_schmidt_batch(batch_data)
print("Processed batch shape:", processed_batch.shape) 

Processed batch shape: torch.Size([2, 256, 256])


In [None]:
#class Decoder()

In [38]:
model = SAXS_to_Eigen_Cov(512,64,256)

## Training

In [24]:
learning_rate = 1e-3
loss_fn = nn.CrossEntropyLoss()
model = SAXS_to_Eigen(64,128)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
for batch_id,(data,target) in enumerate(train_dataloader):
    print(batch_id,data,target)
    break

0 tensor([[0.0000e+00, 0.0000e+00, 8.3879e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 4.4323e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 1.3920e-07, 3.3187e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [0.0000e+00, 2.1273e-08, 1.3552e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 9.7588e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 1.6695e-03,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]]) tensor([[0.0000e+00, 0.0000e+00, 8.3879e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 0.0000e+00, 4.4323e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [0.0000e+00, 1.3920e-07, 3.3187e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [0.0000e+00, 2.1273e-08, 1.3552e-04,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
 

In [39]:
model(data[0:2])

torch.Size([2, 64, 512])
torch.Size([2, 512, 512])
torch.Size([2, 64, 512])


tensor([[[-0.0601, -0.0875,  0.0272,  ...,  0.0880, -0.0880,  0.0880],
         [-0.0719, -0.0140, -0.0960,  ..., -0.0495,  0.0495, -0.0495],
         [-0.0061,  0.0111, -0.0923,  ..., -0.1017,  0.1017, -0.1017],
         ...,
         [-0.0386, -0.0120,  0.0021,  ...,  0.0055, -0.0055,  0.0055],
         [ 0.0580, -0.0331,  0.0169,  ..., -0.0880,  0.0880, -0.0880],
         [-0.0761, -0.0308, -0.0189,  ...,  0.0055, -0.0055,  0.0055]],

        [[-0.0601, -0.0875,  0.0272,  ..., -0.0964,  0.0964, -0.0964],
         [-0.0719, -0.0140, -0.0960,  ...,  0.0999, -0.0999,  0.0999],
         [-0.0061,  0.0111, -0.0923,  ...,  0.0482, -0.0482,  0.0482],
         ...,
         [-0.0386, -0.0120,  0.0021,  ..., -0.0103,  0.0103, -0.0103],
         [ 0.0580, -0.0331,  0.0169,  ...,  0.0551, -0.0551,  0.0551],
         [-0.0761, -0.0308, -0.0189,  ...,  0.0620, -0.0620,  0.0620]]],
       grad_fn=<CopySlices>)

In [None]:
'''
size = len(train_dataloader.dataset)
model.train()
for batch_id, (data, target) in enumerate(train_dataloader):
    pred = model(data)
    loss = loss_fn(pred, target)#Shift one digit
    
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    print(f'Training Batch {batch_idx}: Data shape: {data.shape}, Target shape: {target.shape}')
    if batch % 100 == 0:
        loss, current = loss.item(), batch * batch_size + len(data)
        print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]" )
    if batch_id==1:
        break
'''

## Pytorch lightning

In [32]:
class SAXSEncoderLightning(L.LightningModule):
    def __init__(self, eigenvalue):
        super().__init__()
        self.eigenvalue=eigenvalue
    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        loss = loss_fn(pred, target)
        self.log("train_loss",loss)
        return loss
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [33]:
SAXS= SAXSEncoderLightning(SAXS_to_Eigen(64,128))

In [34]:
trainer=L.Trainer()

/pscratch/sd/l/lemonboy/alphaflow/lib/python3.9/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /pscratch/sd/l/lemonboy/alphaflow/lib/python3.9/site ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/pscratch/sd/l/lemonboy/alphaflow/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one o

In [36]:
trainer.fit(model=SAXS, train_dataloaders=train_dataloader)

/pscratch/sd/l/lemonboy/alphaflow/lib/python3.9/site-packages/lightning/pytorch/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
You are using a CUDA device ('NVIDIA A100-PCIE-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: /pscratch/sd/l/lemonboy/alphaflow_develop/alphaflow/alphaflow/notebook/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type          | Params
---------------------------------------------
0 | eigenvalue | SAXS_to_Eigen | 8.7 K 
---------------------------------------------
8.7 K     Trainable params
0         Non-trainable params
8.7 K     Total params
0.035     Total estimat

Training: |          | 0/? [00:00<?, ?it/s]

NameError: name 'pred' is not defined

In [38]:
data[0].shape

torch.Size([512])

In [50]:
test=torch.rand(64,64)

In [57]:
test_orthogonal=nn.utils.parametrizations.orthogonal(nn.Linear(64,64))

In [58]:
Q = test_orthogonal.weight

In [62]:
Q

tensor([[ 0.0398, -0.1448, -0.1121,  ...,  0.0513, -0.0057, -0.0592],
        [-0.1119, -0.1731,  0.0243,  ..., -0.0794, -0.0856, -0.1963],
        [ 0.0984,  0.0217,  0.1215,  ...,  0.2400,  0.1108, -0.0297],
        ...,
        [-0.0209,  0.0513, -0.0187,  ..., -0.0663,  0.1545,  0.0543],
        [ 0.1879, -0.0800, -0.0381,  ...,  0.0574, -0.0467, -0.1177],
        [-0.0782, -0.2289,  0.0723,  ..., -0.1609,  0.3219, -0.0163]],
       grad_fn=<MmBackward0>)

In [60]:
Q.shape

torch.Size([64, 64])

In [53]:
orth=test_orthogonal(test)

In [61]:
torch.matmul(Q, torch.transpose(Q,0,1))

tensor([[ 1.0000e+00,  1.9035e-08,  1.4000e-07,  ..., -2.1942e-08,
          1.2855e-08, -3.4921e-08],
        [ 1.9035e-08,  1.0000e+00,  5.3175e-09,  ..., -6.2299e-08,
         -2.6486e-08,  2.7964e-08],
        [ 1.4000e-07,  5.3175e-09,  1.0000e+00,  ..., -1.3193e-08,
         -3.2332e-08, -3.1538e-08],
        ...,
        [-2.1942e-08, -6.2299e-08, -1.3193e-08,  ...,  1.0000e+00,
         -2.0197e-08,  1.8693e-08],
        [ 1.2855e-08, -2.6486e-08, -3.2332e-08,  ..., -2.0197e-08,
          1.0000e+00, -3.7568e-09],
        [-3.4921e-08,  2.7964e-08, -3.1538e-08,  ...,  1.8693e-08,
         -3.7568e-09,  1.0000e+00]], grad_fn=<MmBackward0>)

## cross attention between sequence and p(r)

In [None]:
one_hot_