In [2]:
import torch
import torch.nn as nn
import numpy as np


In [3]:
# Set random seed for reproducibility
torch.manual_seed(1)
np.random.seed(1)

# Data preparation
X = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]], dtype=np.float32)
y = np.array([[0, 1, 1, 1]], dtype=np.float32).T

# Convert to PyTorch tensors
X = torch.from_numpy(X)
y = torch.from_numpy(y)

In [5]:
#Define Transformer model
class TransformerLogicGate(nn.Module):
    def __init__(self, input_dim=3, hidden_dim=8, num_heads=1):
        super(TransformerLogicGate, self).__init__()
        # Embedding layer to project input to hidden dimension
        self.embedding = nn.Linear(input_dim, hidden_dim)
        # Transformer encoder layer
        self.transformer = nn.TransformerEncoderLayer(
            d_model=hidden_dim,
            nhead=num_heads,
            dim_feedforward=hidden_dim*2,
            batch_first=True
        )
        # Output layer
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x shape: (batch, seq_len=1, input_dim)
        x = self.embedding(x)  # (batch, seq_len=1, hidden_dim)
        x = self.transformer(x)  # (batch, seq_len=1, hidden_dim)
        x = x.squeeze(1)  # (batch, hidden_dim)
        x = self.fc(x)  # (batch, 1)
        return self.sigmoid(x)


In [6]:
# Initialize model, loss, and optimizer
model = TransformerLogicGate()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Reshape X to (batch, seq_len=1, input_dim)
X = X.unsqueeze(1)  # Shape: (4, 1, 3)

In [7]:
# Training loop
model.train()
for epoch in range(5000):
    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output, y)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 1000 == 0:
        print(f'Epoch [{epoch+1}/5000], Loss: {loss.item():.4f}')


Epoch [1000/5000], Loss: 0.0000
Epoch [2000/5000], Loss: 0.0000
Epoch [3000/5000], Loss: 0.0000
Epoch [4000/5000], Loss: 0.0000
Epoch [5000/5000], Loss: 0.0000


In [10]:
# Evaluate
model.eval()
with torch.no_grad():
    predictions = model(X)
    print("\nOutput After Training:")
    print(predictions.round().numpy())



Output After Training:
[[0.]
 [1.]
 [1.]
 [1.]]


In [11]:
# Print model weights
print("\nModel Weights:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}:\n{param.data.numpy()}")


Model Weights:
embedding.weight:
[[ 0.11108943 -0.33844364 -0.03082856]
 [-0.18726821 -0.88523227  0.26892415]
 [ 0.11290883  0.5221304   0.0326332 ]
 [-0.41176346 -0.25387388 -0.03718068]
 [ 0.40241894  0.15432599  0.20127067]
 [ 0.40189672  0.33079574 -0.2228381 ]
 [ 0.5907744   0.35460702 -0.26542073]
 [-0.3258588  -0.13497585 -0.23730388]]
embedding.bias:
[-0.10391381 -0.04967952  0.29654732  0.24814929 -0.32102492  0.13737954
  0.11087943  0.55937827]
transformer.self_attn.in_proj_weight:
[[-0.2799025  -0.3682468   0.415644    0.02261052  0.29681206  0.08973382
   0.13926673  0.32344255]
 [ 0.41058898 -0.287339    0.05413913  0.3231389   0.3136896   0.26900414
  -0.31337947 -0.31181428]
 [-0.26187378  0.05439867  0.43156853 -0.27352354  0.2307442  -0.23966739
  -0.40712532 -0.09203237]
 [ 0.24948932  0.4019981  -0.26891768  0.09396861  0.37362942  0.28693742
   0.2698635   0.3077129 ]
 [ 0.27388763  0.11184561 -0.2961028  -0.3636261  -0.19841534 -0.05042255
  -0.2654161   0.15841