In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
# Define the linear model
class LinearModel(nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.a = nn.Parameter(torch.randn(1, 1, requires_grad=True))  # Learnable parameter a
        self.b = nn.Parameter(torch.randn(1, 1, requires_grad=True))  # Learnable parameter b

    def forward(self, x):
        return self.a * x + self.b

# learn ax+b

In [3]:
# Example data
X = torch.tensor([[1.0], [2.0], [3.0], [4.0], [5.0]])  # Feature vector
y = torch.tensor([[3.0], [5.0], [7.0], [9.0], [11.0]])  # Target vector

# Instantiate the model
model = LinearModel()

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    y_pred = model(X)
    loss = criterion(y_pred, y)

    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    # Print progress
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Print the learned parameters
print(f'Learned parameter a: {model.a.item():.4f}')
print(f'Learned parameter b: {model.b.item():.4f}')

Epoch [100/1000], Loss: 0.0040
Epoch [200/1000], Loss: 0.0020
Epoch [300/1000], Loss: 0.0010
Epoch [400/1000], Loss: 0.0005
Epoch [500/1000], Loss: 0.0003
Epoch [600/1000], Loss: 0.0001
Epoch [700/1000], Loss: 0.0001
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
Learned parameter a: 2.0019
Learned parameter b: 0.9930


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

# Example data
X = torch.randn(100, 5)  # 100 samples, 5 features
y = torch.randint(0, 2, (100,)).float()  # Binary target

# Define the neural network
class FeedForwardNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(FeedForwardNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.sigmoid(out)
        return out

# Instantiate the model
model = FeedForwardNet(5, 10, 8, 1)  # Input size 5, hidden sizes 10 and 8, output size 1

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    y_pred = model(X)
    loss = criterion(y_pred, y.unsqueeze(1))

    # Backward pass and optimization
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    # Print progress
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.6873
Epoch [200/1000], Loss: 0.6841
Epoch [300/1000], Loss: 0.6822
Epoch [400/1000], Loss: 0.6809
Epoch [500/1000], Loss: 0.6799
Epoch [600/1000], Loss: 0.6789
Epoch [700/1000], Loss: 0.6779
Epoch [800/1000], Loss: 0.6769
Epoch [900/1000], Loss: 0.6759
Epoch [1000/1000], Loss: 0.6749


In [44]:
import torch

n_heads = 2
input_dim = 2
key_dim = 5
batch_size = 2
graph_size = 4

# Create a sample tensor
W = torch.ones(n_heads, input_dim, key_dim) # (n_heads, input_dim, key_dim) = (2, 2, 5)

h = torch.ones(batch_size, graph_size, input_dim)

hflat = h.contiguous().view(-1, input_dim) # (batch_size * graph_size, input_dim) = (8, 2)

out = torch.matmul(hflat, W) # (batch_size * graph_size, key_dim) 

In [85]:
hflat

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [86]:
W

tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]])

In [133]:
torch.softmax(torch.ones(3,2,2,2), dim=-1)

tensor([[[[0.5000, 0.5000],
          [0.5000, 0.5000]],

         [[0.5000, 0.5000],
          [0.5000, 0.5000]]],


        [[[0.5000, 0.5000],
          [0.5000, 0.5000]],

         [[0.5000, 0.5000],
          [0.5000, 0.5000]]],


        [[[0.5000, 0.5000],
          [0.5000, 0.5000]],

         [[0.5000, 0.5000],
          [0.5000, 0.5000]]]])

In [129]:
x

tensor([[[-2.3554,  0.9251],
         [-0.7913, -1.1901]],

        [[-0.7913, -0.3347],
         [-0.0350, -0.0901]],

        [[ 0.4142, -0.3328],
         [-0.8271,  1.6290]]])

In [130]:
(torch.ones(3,2,2)+1) @ (x)

tensor([[[-6.2935, -0.5299],
         [-6.2935, -0.5299]],

        [[-1.6527, -0.8495],
         [-1.6527, -0.8495]],

        [[-0.8258,  2.5922],
         [-0.8258,  2.5922]]])

In [123]:
(torch.ones(3) @ torch.ones(3,2))

tensor([3., 3.])

In [95]:
torch.ones(2) @ (torch.ones(2,2))

tensor([4., 4.])

In [89]:
(hflat @ W).view(n_heads, batch_size, graph_size, -1)

tensor([[[[2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.]],

         [[2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.]]],


        [[[2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.]],

         [[2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.],
          [2., 2., 2., 2., 2.]]]])

In [45]:
out.shape

torch.Size([2, 8, 5])

In [38]:
import torch

n_heads = 2
input_dim = 2
key_dim = 5
batch_size = 2
graph_size = 4

# Create a sample tensor
W = torch.ones(n_heads, input_dim, key_dim)

h = torch.ones(batch_size, graph_size, input_dim)

hflat = h.contiguous().view(-1, input_dim)

out = torch.matmul(hflat, W)


In [39]:
h

tensor([[[1., 1.],
         [1., 1.],
         [1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.],
         [1., 1.]]])

In [40]:
W

tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]])

In [41]:
hflat

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.],
        [1., 1.]])

In [42]:
h.shape, hflat.shape, W.shape, out.shape

(torch.Size([2, 4, 2]),
 torch.Size([8, 2]),
 torch.Size([2, 2, 5]),
 torch.Size([2, 8, 5]))

In [43]:
out

tensor([[[2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]],

        [[2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]]])

In [3]:
reshaped_x

tensor([[ 1.1343, -0.1192,  0.0265],
        [-0.2524, -0.0422,  0.9785],
        [-0.5827, -1.7806,  1.7354],
        [ 0.2570,  1.4897,  1.8424]])

In [13]:
x.view(-1)

tensor([-0.1683,  0.1338,  0.4927, -0.4967,  0.5698, -1.4790])

In [3]:
x

tensor([[[ 0.0161, -0.2674, -2.1117,  1.7751],
         [ 0.3761,  0.3804,  1.5195,  1.0489],
         [-1.3908, -0.8066, -0.4466, -0.7620]],

        [[ 0.5718, -0.6892, -1.2683,  0.7038],
         [ 0.2249, -0.5766, -0.2630,  0.0182],
         [ 0.2342, -1.4991, -0.8797, -0.1346]]])

In [4]:
reshaped_x

tensor([[ 0.0161, -0.2674, -2.1117,  1.7751],
        [ 0.3761,  0.3804,  1.5195,  1.0489],
        [-1.3908, -0.8066, -0.4466, -0.7620],
        [ 0.5718, -0.6892, -1.2683,  0.7038],
        [ 0.2249, -0.5766, -0.2630,  0.0182],
        [ 0.2342, -1.4991, -0.8797, -0.1346]])

In [5]:
final_x

tensor([[[ 0.0161, -0.2674, -2.1117,  1.7751],
         [ 0.3761,  0.3804,  1.5195,  1.0489],
         [-1.3908, -0.8066, -0.4466, -0.7620]],

        [[ 0.5718, -0.6892, -1.2683,  0.7038],
         [ 0.2249, -0.5766, -0.2630,  0.0182],
         [ 0.2342, -1.4991, -0.8797, -0.1346]]])