
## Alpha fold Simple


In [1]:

import torch
import torch.nn as nn


In [2]:

# Evoformer Block (simplified)
class EvoformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(EvoformerBlock, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim, num_heads)
        self.feed_forward = nn.Sequential(
            nn.Linear(embed_dim, embed_dim * 4),
            nn.ReLU(),
            nn.Linear(embed_dim * 4, embed_dim),
        )
        self.layer_norm1 = nn.LayerNorm(embed_dim)
        self.layer_norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        # Self-attention
        attn_output, _ = self.attention(x, x, x)
        x = self.layer_norm1(x + attn_output)
        # Feed-forward
        ff_output = self.feed_forward(x)
        x = self.layer_norm2(x + ff_output)
        return x


In [3]:

# AlphaFold-like Model
class AlphaFoldSimplified(nn.Module):
    def __init__(self, embed_dim, num_heads, num_layers):
        super(AlphaFoldSimplified, self).__init__()
        self.embedding = nn.Embedding(20, embed_dim)  # 20 amino acids
        self.layers = nn.ModuleList(
            [EvoformerBlock(embed_dim, num_heads) for _ in range(num_layers)]
        )
        self.structure_head = nn.Linear(embed_dim, 3)  # Predict 3D coordinates

    def forward(self, sequence):
        # Embed amino acid sequence
        x = self.embedding(sequence)  # [Batch, Sequence Length, Embedding Dim]
        print(f"Shape after embedding: {x.shape}")
        x = x.permute(1, 0, 2)  # [Sequence Length, Batch, Embedding Dim] for Attention
        for layer in self.layers:
            x = layer(x)
        x = x.permute(1, 0, 2)  # Back to [Batch, Sequence Length, Embedding Dim]
        coordinates = self.structure_head(x)  # [Batch, Sequence Length, 3]
        print(f"Shape after structure head: {coordinates.shape}")
        return coordinates


In [4]:

# Generate Random Data
def generate_random_data(batch_size, seq_length, embed_dim):
    sequences = torch.randint(0, 20, (batch_size, seq_length))  # Random amino acids
    coordinates = torch.randn(batch_size, seq_length, 3)  # Simulated 3D coordinates
    print(f"Generated sequences shape: {sequences.shape}")
    print(f"Generated coordinates shape: {coordinates.shape}")
    return sequences, coordinates


In [5]:

# Training Loop
def train_model(model, optimizer, criterion, epochs, batch_size, seq_length, embed_dim):
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for _ in range(10):  # Simulating 10 batches per epoch
            # Generate random data
            sequences, coordinates = generate_random_data(batch_size, seq_length, embed_dim)
            optimizer.zero_grad()
            # Forward pass
            pred_coordinates = model(sequences)
            # Compute loss
            loss = criterion(pred_coordinates, coordinates)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {total_loss / 10:.4f}")


In [6]:

# Usage Example
def use_model(model, sequence):
    model.eval()
    with torch.no_grad():
        predicted_coords = model(sequence)
    return predicted_coords


In [7]:


# Model Hyperparameters
embed_dim = 128
num_heads = 8
num_layers = 6
batch_size = 16
seq_length = 64
epochs = 5


In [10]:

# Initialize model, optimizer, and loss function
model = AlphaFoldSimplified(embed_dim, num_heads, num_layers)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()


In [11]:

# Train the model
train_model(model, optimizer, criterion, epochs, batch_size, seq_length, embed_dim)


Generated sequences shape: torch.Size([16, 64])
Generated coordinates shape: torch.Size([16, 64, 3])
Shape after embedding: torch.Size([16, 64, 128])
Shape after structure head: torch.Size([16, 64, 3])
Generated sequences shape: torch.Size([16, 64])
Generated coordinates shape: torch.Size([16, 64, 3])
Shape after embedding: torch.Size([16, 64, 128])
Shape after structure head: torch.Size([16, 64, 3])
Generated sequences shape: torch.Size([16, 64])
Generated coordinates shape: torch.Size([16, 64, 3])
Shape after embedding: torch.Size([16, 64, 128])
Shape after structure head: torch.Size([16, 64, 3])
Generated sequences shape: torch.Size([16, 64])
Generated coordinates shape: torch.Size([16, 64, 3])
Shape after embedding: torch.Size([16, 64, 128])
Shape after structure head: torch.Size([16, 64, 3])
Generated sequences shape: torch.Size([16, 64])
Generated coordinates shape: torch.Size([16, 64, 3])
Shape after embedding: torch.Size([16, 64, 128])
Shape after structure head: torch.Size([16

In [12]:


# Test the model
test_sequence = torch.randint(0, 20, (1, seq_length))  # Single random test sequence
print(f"\nTest sequence shape: {test_sequence.shape}")
predicted_coordinates = use_model(model, test_sequence)

print("\nPredicted 3D Coordinates:")
print(predicted_coordinates.shape)



Test sequence shape: torch.Size([1, 64])
Shape after embedding: torch.Size([1, 64, 128])
Shape after structure head: torch.Size([1, 64, 3])

Predicted 3D Coordinates:
torch.Size([1, 64, 3])


In [13]:

'''

Generated sequences shape: torch.Size([16, 64])
Generated coordinates shape: torch.Size([16, 64, 3])
Shape after embedding: torch.Size([16, 64, 128])
Shape after structure head: torch.Size([16, 64, 3])
Test sequence shape: torch.Size([1, 64])

'''


'\n\nGenerated sequences shape: torch.Size([16, 64])\nGenerated coordinates shape: torch.Size([16, 64, 3])\nShape after embedding: torch.Size([16, 64, 128])\nShape after structure head: torch.Size([16, 64, 3])\nTest sequence shape: torch.Size([1, 64])\n\n'