In [1]:
import torch

In [16]:
# Load the model
model = torch.load('Neuro-GPT/pretrained_model/pytorch_model.bin', map_location=torch.device('cpu'))

# List all keys in the state dictionary to find the correct key
print(model.keys())

# Extract embeddings from this model (replace 'transformer.tokens_embed.weight' with the correct key)
embeddings = model['correct_key_here']  # Replace 'correct_key_here' with the actual key

odict_keys(['encoder.patch_embedding.shallownet.0.weight', 'encoder.patch_embedding.shallownet.0.bias', 'encoder.patch_embedding.shallownet.1.weight', 'encoder.patch_embedding.shallownet.1.bias', 'encoder.patch_embedding.shallownet.2.weight', 'encoder.patch_embedding.shallownet.2.bias', 'encoder.patch_embedding.shallownet.2.running_mean', 'encoder.patch_embedding.shallownet.2.running_var', 'encoder.patch_embedding.shallownet.2.num_batches_tracked', 'encoder.patch_embedding.projection.0.weight', 'encoder.patch_embedding.projection.0.bias', 'encoder.transformer.0.0.fn.0.weight', 'encoder.transformer.0.0.fn.0.bias', 'encoder.transformer.0.0.fn.1.keys.weight', 'encoder.transformer.0.0.fn.1.keys.bias', 'encoder.transformer.0.0.fn.1.queries.weight', 'encoder.transformer.0.0.fn.1.queries.bias', 'encoder.transformer.0.0.fn.1.values.weight', 'encoder.transformer.0.0.fn.1.values.bias', 'encoder.transformer.0.0.fn.1.projection.weight', 'encoder.transformer.0.0.fn.1.projection.bias', 'encoder.tran

KeyError: 'correct_key_here'

In [None]:
# Define the model architecture (example architecture, replace with actual)
import torch.nn as nn

class Encoder(nn.Module):
	def __init__(self):
		super(Encoder, self).__init__()
		self.patch_embedding = nn.Sequential(
			nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
			nn.BatchNorm2d(64),
			nn.ReLU(inplace=True),
			nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
			nn.BatchNorm2d(128),
			nn.ReLU(inplace=True),
			nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
			nn.BatchNorm2d(256),
			nn.ReLU(inplace=True)
		)
		self.transformer = nn.ModuleList([
			nn.TransformerEncoderLayer(d_model=256, nhead=8) for _ in range(6)
		])

	def forward(self, x):
		x = self.patch_embedding(x)
		x = x.flatten(2).transpose(1, 2)
		for layer in self.transformer:
			x = layer(x)
		return x

class Decoder(nn.Module):
	def __init__(self):
		super(Decoder, self).__init__()
		self.transformer = nn.ModuleList([
			nn.TransformerDecoderLayer(d_model=256, nhead=8) for _ in range(6)
		])
		self.fc = nn.Linear(256, 10)

	def forward(self, x, memory):
		for layer in self.transformer:
			x = layer(x, memory)
		x = self.fc(x)
		return x

class ExampleModel(nn.Module):
	def __init__(self):
		super(ExampleModel, self).__init__()
		self.encoder = Encoder()
		self.decoder = Decoder()

	def forward(self, x):
		memory = self.encoder(x)
		x = self.decoder(x, memory)
		return x

# Initialize the model
model_architecture = ExampleModel()

# Load the state dictionary into the model
model_architecture.load_state_dict(model, strict=False)

# Find the number of parameters in the model
total_params = sum(p.numel() for p in model_architecture.parameters())
print(f'Total parameters: {total_params}')

# Find the architecture of the model
print(model_architecture)

Total parameters: 17744906
ExampleModel(
  (encoder): Encoder(
    (patch_embedding): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (8): ReLU(inplace=True)
    )
    (transformer): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (linear1): Linear(in_features=256, out_features=2048, bias=True)
 

In [13]:
def analyze_input_layer(state_dict):
    # Get the first convolutional layer weights
    first_layer_weight = state_dict['encoder.patch_embedding.shallownet.0.weight']
    
    # Extract key dimensions
    out_channels, in_channels, kernel_height, kernel_width = first_layer_weight.shape
    
    print(f"First Layer Analysis:")
    print(f"Input Channels: {in_channels}")
    print(f"Output Channels: {out_channels}")
    print(f"Kernel Size: {kernel_height}x{kernel_width}")
    
    return {
        'in_channels': in_channels,
        'out_channels': out_channels,
        'kernel_size': (kernel_height, kernel_width)
    }

# Analyze the model
dims = analyze_input_layer(model)

First Layer Analysis:
Input Channels: 1
Output Channels: 40
Kernel Size: 1x25


In [14]:
analyze_input_layer(model)


First Layer Analysis:
Input Channels: 1
Output Channels: 40
Kernel Size: 1x25


{'in_channels': 1, 'out_channels': 40, 'kernel_size': (1, 25)}