### Base Bart Model

In [7]:
from transformers import BartModel, BartTokenizer

# Load tokenizer and base model
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartModel.from_pretrained("facebook/bart-base")

# Tokenize input text
inputs = tokenizer("Hello, how are you?", return_tensors="pt")

# Forward pass through the model
outputs = model(**inputs)

# Access encoder's last hidden state
last_hidden_state = outputs.last_hidden_state
print(last_hidden_state.shape)


torch.Size([1, 8, 768])


### Architecture of base Bart Model:

In [8]:
model

BartModel(
  (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
  (encoder): BartEncoder(
    (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
    (layers): ModuleList(
      (0-5): 6 x BartEncoderLayer(
        (self_attn): BartSdpaAttention(
          (k_proj): Linear(in_features=768, out_features=768, bias=True)
          (v_proj): Linear(in_features=768, out_features=768, bias=True)
          (q_proj): Linear(in_features=768, out_features=768, bias=True)
          (out_proj): Linear(in_features=768, out_features=768, bias=True)
        )
        (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (activation_fn): GELUActivation()
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      )
   

In [3]:
from transformers import BartTokenizer, BartModel
import torch

# Load tokenizer and base BartModel
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartModel.from_pretrained("facebook/bart-base")

# Input text (for encoder)
input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt")

# Decoder input (start token + optionally previous tokens)
decoder_input_ids = torch.tensor([[tokenizer.eos_token_id]])  # start with </s> token

# Forward pass through model
outputs = model(input_ids=inputs.input_ids, decoder_input_ids=decoder_input_ids)

# Extract decoder output (last hidden states of the decoder)
decoder_output = outputs.last_hidden_state

print("Decoder output shape:", decoder_output.shape)
# print("Decoder output:", decoder_output)


Decoder output shape: torch.Size([1, 1, 768])


In [9]:
from transformers import BartTokenizer, BartModel
import torch

tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartModel.from_pretrained("facebook/bart-base")

# 1️⃣ Encode input
input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt")
print("Input shape:", inputs.input_ids.shape)   

encoder_outputs = model.encoder(input_ids=inputs.input_ids)
encoder_hidden_states = encoder_outputs.last_hidden_state  # shape: (batch, seq_len, hidden_size)

print("Encoder output shape:", encoder_hidden_states.shape)

# 2️⃣ Prepare decoder input (start token </s>)
decoder_input_ids = torch.tensor([[tokenizer.eos_token_id]])

# 3️⃣ Decode, passing encoder_hidden_states manually
decoder_outputs =model.decoder(
    input_ids=decoder_input_ids,
    encoder_hidden_states=encoder_hidden_states,
    encoder_attention_mask=inputs.attention_mask
)
decoder_hidden_states = decoder_outputs.last_hidden_state  # (batch, dec_seq_len, hidden_size)

print("Decoder output shape:", decoder_hidden_states.shape)


Input shape: torch.Size([1, 8])
Encoder output shape: torch.Size([1, 8, 768])
Decoder output shape: torch.Size([1, 1, 768])


In [10]:
from transformers import BartTokenizer, BartModel
import torch

tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
model = BartModel.from_pretrained("facebook/bart-base")

# 1️⃣ Encode input
input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
print("Input shape:", inputs.input_ids.shape)

# Encoder attention mask (1 for real tokens, 0 for padding)
encoder_attention_mask = inputs.attention_mask

# Forward pass through encoder
encoder_outputs = model.encoder(input_ids=inputs.input_ids, attention_mask=encoder_attention_mask)
encoder_hidden_states = encoder_outputs.last_hidden_state  # shape: (batch, seq_len, hidden_size)

print("Encoder output shape:", encoder_hidden_states.shape)

# 2️⃣ Prepare decoder input (start token </s>)
decoder_input_ids = torch.tensor([[tokenizer.eos_token_id]])

# Decoder attention mask (for this case — all tokens are real, so 1)
decoder_attention_mask = torch.ones_like(decoder_input_ids)

# 3️⃣ Decode, passing encoder_hidden_states manually
decoder_outputs = model.decoder(
    input_ids=decoder_input_ids,
    attention_mask=decoder_attention_mask,                   # Self-attention mask for decoder
    encoder_hidden_states=encoder_hidden_states,
    encoder_attention_mask=encoder_attention_mask            # Cross-attention mask
)
decoder_hidden_states = decoder_outputs.last_hidden_state  # (batch, dec_seq_len, hidden_size)

print("Decoder output shape:", decoder_hidden_states.shape)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Input shape: torch.Size([1, 8])
Encoder output shape: torch.Size([1, 8, 768])
Decoder output shape: torch.Size([1, 1, 768])
