In [1]:
# Standard Library Imports
import os                       # Operating system functions
import subprocess               # Subprocess management
import random                   # Random number generation
from tqdm import tqdm           # Progress bars

# Data Manipulation and Visualization
import json                     # JSON data manipulation
import inspect                  # Inspection of live objects
import pandas as pd              # Data manipulation using DataFrames
import numpy as np               # Numerical operations
import matplotlib.pyplot as plt  # Plotting
import seaborn as sns            # Styling plots

# Image Processing
from PIL import Image            # Python Imaging Library for image processing

# Model Libraries
import timm                     # Used for pre-trained models such as EfficientNet, ViT

# PyTorch
import torch                    # PyTorch overall import
import torch.optim as optim     # Optimization of model parameters
import torch.nn as nn           # Specification of neural networks
from torch.utils.data import Dataset, DataLoader  # Data loading utilities
import torchvision.transforms as transforms      # Pre-processing data

  from .autonotebook import tqdm as notebook_tqdm


In [52]:
# Define the Transformer model
class TransformerModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_heads, output_size):
        super(TransformerModel, self).__init__()

        self.embedding = nn.Linear(input_size, hidden_size)
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
        )
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.embedding(x)
        x = x.permute(2, 0, 1)  # Permute to (sequence_length, batch_size, input_size)
        x = self.transformer(x, x)
        x = x.permute(1, 2, 0)  # Permute back to (batch_size, input_size, sequence_length)
        # x = x.mean(dim=1)  # Take the mean along the batch dimension (second dimension)
        x = self.fc(x)
        return x

# Set the input size, hidden size, number of layers, number of heads, and output size
input_size = 1536
hidden_size = 4
num_layers = 8
num_heads = 4
output_size = 10  # Adjust as needed

# Create an instance of the TransformerModel
model = TransformerModel(input_size, hidden_size, num_layers, num_heads, output_size)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Example input data with shape (batch_size, input_size, sequence_length)
input_data = torch.randn(32, 2*2, input_size)

# Example target data with shape (batch_size,)
target_data = torch.randint(0, 2, (32, output_size))

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(input_data)
    loss = criterion(outputs.squeeze(), target_data)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss for monitoring
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

ValueError: Expected input batch_size (4) to match target batch_size (32).

In [47]:
print(input_data.shape)

torch.Size([32, 4, 1536])


In [50]:
outputs = model(input_data)
outputs = outputs.squeeze()
print(outputs)

tensor([[[ 0.0794, -0.3366, -0.3976,  ...,  0.2945,  1.1435,  1.6424],
         [ 0.4907, -0.0594, -0.2210,  ..., -0.0563,  0.9765,  0.8920],
         [ 0.0992, -0.2409,  0.9599,  ...,  0.0962, -0.2762, -0.3120],
         [ 0.4419,  0.0579,  0.8107,  ..., -0.0055, -0.1924, -0.3209]],

        [[ 0.2797, -0.2210, -0.5159,  ...,  0.1250,  1.2915,  1.5387],
         [ 0.2443, -0.2015, -0.0981,  ...,  0.1488,  0.8222,  1.0467],
         [ 0.3390, -0.0454,  0.8310,  ...,  0.0260, -0.1910, -0.2933],
         [ 0.2481, -0.1114,  0.9351,  ...,  0.0288, -0.2710, -0.3913]],

        [[ 0.2268, -0.2149, -0.4845,  ...,  0.2332,  1.2156,  1.6483],
         [ 0.3032, -0.2170, -0.1152,  ...,  0.0148,  0.8943,  0.8828],
         [ 0.3992, -0.0163,  0.8329,  ..., -0.0085, -0.1990, -0.3624],
         [ 0.1815, -0.1311,  0.9190,  ...,  0.0895, -0.2594, -0.2670]],

        ...,

        [[ 0.1542, -0.3247, -0.4799,  ...,  0.2060,  1.2558,  1.6114],
         [ 0.4325, -0.0451, -0.1532,  ...,  0.0289,  0.87

In [49]:
print(target_data)

tensor([[0, 0, 0, 1, 0, 1, 1, 0, 0, 1],
        [0, 0, 1, 0, 0, 1, 1, 1, 1, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0, 0, 0, 1, 0, 1],
        [1, 0, 1, 1, 1, 0, 0, 0, 1, 0],
        [0, 0, 1, 0, 0, 1, 0, 1, 0, 1],
        [0, 0, 1, 0, 1, 1, 0, 0, 1, 1],
        [1, 0, 1, 1, 0, 0, 0, 1, 0, 0],
        [0, 1, 1, 0, 1, 1, 1, 1, 1, 0],
        [1, 0, 0, 1, 0, 0, 1, 0, 0, 0],
        [0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 1, 0, 1],
        [0, 0, 0, 1, 1, 0, 0, 1, 0, 1],
        [1, 0, 1, 0, 1, 1, 0, 0, 1, 1],
        [0, 1, 0, 0, 1, 1, 0, 0, 0, 0],
        [1, 1, 0, 0, 1, 1, 1, 0, 0, 1],
        [0, 0, 0, 1, 1, 0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
        [1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 0, 0, 1, 0, 1, 0, 1],
        [0, 1, 0, 0, 1, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 1, 1, 0, 0, 1],
        [1, 0, 0, 1, 0, 1, 1, 0, 1, 0],
        [1, 1, 0, 1, 1, 0, 0, 1, 1, 0],
