In [1]:
import re
import random
import torch
import torch.nn as nn

In [2]:
text='Science is the systematic pursuit of knowledge through observation, experimentation, and logical reasoning. It helps us understand the natural world, from the smallest particles to the vastness of the universe. Through scientific inquiry, humans uncover patterns, develop theories, and create technologies that improve daily life. Science encourages curiosity, critical thinking, and evidence-based decision-making. It evolves continuously as new discoveries challenge old ideas and expand our understanding. Whether exploring biological systems, studying energy, or investigating cosmic phenomena, science provides a reliable method for explaining how things work. Its progress shapes society and guides future innovation for the benefit of all.'

In [3]:
words = [token for token in re.split(r'([,.:;?_!"()\']|--|\s)', text) if token.strip() != '']
words

['Science',
 'is',
 'the',
 'systematic',
 'pursuit',
 'of',
 'knowledge',
 'through',
 'observation',
 ',',
 'experimentation',
 ',',
 'and',
 'logical',
 'reasoning',
 '.',
 'It',
 'helps',
 'us',
 'understand',
 'the',
 'natural',
 'world',
 ',',
 'from',
 'the',
 'smallest',
 'particles',
 'to',
 'the',
 'vastness',
 'of',
 'the',
 'universe',
 '.',
 'Through',
 'scientific',
 'inquiry',
 ',',
 'humans',
 'uncover',
 'patterns',
 ',',
 'develop',
 'theories',
 ',',
 'and',
 'create',
 'technologies',
 'that',
 'improve',
 'daily',
 'life',
 '.',
 'Science',
 'encourages',
 'curiosity',
 ',',
 'critical',
 'thinking',
 ',',
 'and',
 'evidence-based',
 'decision-making',
 '.',
 'It',
 'evolves',
 'continuously',
 'as',
 'new',
 'discoveries',
 'challenge',
 'old',
 'ideas',
 'and',
 'expand',
 'our',
 'understanding',
 '.',
 'Whether',
 'exploring',
 'biological',
 'systems',
 ',',
 'studying',
 'energy',
 ',',
 'or',
 'investigating',
 'cosmic',
 'phenomena',
 ',',
 'science',
 'pro

In [4]:
unique_words = sorted(list(set(words)))
vocab={
    '[CLS]':0,
    '[SEP]':1,
    '[MASK]':2,
    '[PAD]':3
}
for i, word in enumerate(unique_words):
  vocab[word]=i+4
vocab

{'[CLS]': 0,
 '[SEP]': 1,
 '[MASK]': 2,
 '[PAD]': 3,
 ',': 4,
 '.': 5,
 'It': 6,
 'Its': 7,
 'Science': 8,
 'Through': 9,
 'Whether': 10,
 'a': 11,
 'all': 12,
 'and': 13,
 'as': 14,
 'benefit': 15,
 'biological': 16,
 'challenge': 17,
 'continuously': 18,
 'cosmic': 19,
 'create': 20,
 'critical': 21,
 'curiosity': 22,
 'daily': 23,
 'decision-making': 24,
 'develop': 25,
 'discoveries': 26,
 'encourages': 27,
 'energy': 28,
 'evidence-based': 29,
 'evolves': 30,
 'expand': 31,
 'experimentation': 32,
 'explaining': 33,
 'exploring': 34,
 'for': 35,
 'from': 36,
 'future': 37,
 'guides': 38,
 'helps': 39,
 'how': 40,
 'humans': 41,
 'ideas': 42,
 'improve': 43,
 'innovation': 44,
 'inquiry': 45,
 'investigating': 46,
 'is': 47,
 'knowledge': 48,
 'life': 49,
 'logical': 50,
 'method': 51,
 'natural': 52,
 'new': 53,
 'observation': 54,
 'of': 55,
 'old': 56,
 'or': 57,
 'our': 58,
 'particles': 59,
 'patterns': 60,
 'phenomena': 61,
 'progress': 62,
 'provides': 63,
 'pursuit': 64,
 '

In [5]:
class Tokenizer:
  def __init__(self, vocab):
    self.str_to_int = vocab
    self.int_to_str = {idx: word for word, idx in vocab.items()}

  def encode(self,text):
    return [self.str_to_int[token] for token in re.split(r'([,.:;?_!"()\']|--|\s)', text) if token.strip() != '']

  def decode(self,token_ids):
    return [self.int_to_str[id] for id in token_ids]

In [6]:
tokenizer=Tokenizer(vocab)

ids=tokenizer.encode('Science is the systematic')
ids

[8, 47, 77, 73]

In [7]:
print(tokenizer.decode(ids))

['Science', 'is', 'the', 'systematic']


In [8]:
class BertDataset:
    def __init__(self, sentence_1, sentence_2, max_len=20, max_pred=10):
        self.tokenizer = Tokenizer(vocab)

        CLS = vocab['[CLS]']
        SEP = vocab['[SEP]']
        MASK = vocab['[MASK]']

        # tokenized sentences
        tokens1 = self.tokenizer.encode(sentence_1)
        tokens2 = self.tokenizer.encode(sentence_2)

        # build input
        input_ids = [CLS] + tokens1 + [SEP] + tokens2 + [SEP]

        # segment ids
        segment_ids = (
            [0] * (1 + len(tokens1) + 1) +
            [1] * (len(tokens2) + 1)
        )

        # ----- MLM MASKING -----
        cand_pos = [i for i, tid in enumerate(input_ids) if tid not in [CLS, SEP]]
        random.shuffle(cand_pos)

        mask_len = min(max_pred, max(1, int(len(input_ids) * 0.15)))

        masked_tokens = []
        masked_position = []

        for i in range(mask_len):
            pos = cand_pos[i]
            masked_tokens.append(input_ids[pos])
            masked_position.append(pos)

            prob = random.random()

            if prob < 0.8:
                input_ids[pos] = MASK  # 80% mask token
            elif prob < 0.9:
                rand_id = random.randint(0, len(vocab) - 1)
                input_ids[pos] = rand_id  # 10% random
            else:
                pass  # 10% keep original

        # pad input to max_len
        padding = max_len - len(input_ids)
        input_ids += [0] * padding
        segment_ids += [0] * padding

        # pad masked labels
        pad_mlm = max_pred - len(masked_tokens)
        masked_tokens += [0] * pad_mlm
        masked_position += [0] * pad_mlm

        # save
        self.input_ids = input_ids
        self.segment_ids = segment_ids
        self.masked_tokens = masked_tokens
        self.masked_position = masked_position
        self.is_next = True  # or False for NSP


In [9]:
d=BertDataset('Science is the systematic','knowledge through observation')
print('Input IDs: ',d.input_ids)
print('Segment IDs: ',d.segment_ids)
print('Mask Token: ',d.masked_tokens)
print('Mask Token Position: ',d.masked_position)


Input IDs:  [0, 8, 47, 77, 73, 1, 48, 81, 54, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Segment IDs:  [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Mask Token:  [48, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Mask Token Position:  [6, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [10]:
class SelfAttention(nn.Module):
  def __init__(self,d_in,d_out):
    super().__init__()
    self.d_out=d_out
    self.d_in=d_in


    self.w_k=nn.Linear(d_in,d_out)
    self.w_q=nn.Linear(d_in,d_out)
    self.w_v=nn.Linear(d_in,d_out)

  def forward(self,x):
    keys=self.w_k(x)
    values=self.w_v(x)
    queries=self.w_q(x)

    attn_scores=queries @ keys.transpose(-1, -2)
    scaled_weight=torch.softmax(attn_scores/torch.sqrt(torch.tensor(self.d_out)),dim=-1)
    attn_matrix=scaled_weight @ values
    return attn_matrix




In [11]:
class MultiHeadAttention(nn.Module):
  def __init__(self,num_heads,d_in,d_out):
    super().__init__()
    self.heads=nn.ModuleList(
        [SelfAttention(d_in,d_out) for _ in range(num_heads)]
    )
  def forward(self,x):
    return torch.cat([head(x) for head in self.heads],dim=-1)


In [12]:
class GELU(nn.Module):
  def __init__(self):
    super().__init__()

  def forward(self,x):
    return 0.5*x*(1+torch.tanh(torch.sqrt(torch.tensor(2/torch.pi))*(x+0.044715*x**3)))


In [13]:
class FeedForward(nn.Module):
  def __init__(self,emb_dim):
    super().__init__()
    self.layers=nn.Sequential(
        nn.Linear(emb_dim,4*emb_dim),
        GELU(),
        nn.Linear(4*emb_dim,emb_dim)
    )

  def forward(self,x):
    return self.layers(x)


In [14]:
class LayerNormalization(nn.Module):
  def __init__(self,emb_dim):
    super().__init__()
    self.scale = nn.Parameter(torch.ones(emb_dim))
    self.shift = nn.Parameter(torch.zeros(emb_dim))
    self.eps=1e-5

  def forward(self,x):
    mean=torch.mean(x,dim=-1,keepdim=True)
    variance=torch.var(x,dim=-1,keepdim=True, unbiased=False)
    norm=(x-mean)/torch.sqrt(variance+self.eps)
    return self.scale*norm+self.shift

In [15]:
class BERTEmbedding(nn.Module):
    def __init__(self, vocab_size, emb_dim, segment_token_type=2, max_token=512, dropout_prob=0.1):
        super().__init__()
        self.token_embedding = nn.Embedding(vocab_size, emb_dim)
        self.segment_embedding = nn.Embedding(segment_token_type, emb_dim)
        self.position_embedding = nn.Embedding(max_token, emb_dim)
        self.layer_norm = LayerNormalization(emb_dim)
        self.dropout = nn.Dropout(dropout_prob)
        self.register_buffer('position_ids', torch.arange(max_token).unsqueeze(0))
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            nn.init.normal_(module.weight, mean=0.0, std=0.02)
        elif isinstance(module, LayerNormalization):
            nn.init.ones_(module.scale)
            nn.init.zeros_(module.shift)

    def forward(self, x, segment_ids=None, position_ids=None):
        batch_size, seq_length = x.shape
        token_embeds = self.token_embedding(x)

        if position_ids is None:
            position_ids = self.position_ids[:, :seq_length]
        position_embeds = self.position_embedding(position_ids)

        if segment_ids is None:
            segment_ids = torch.zeros_like(x, dtype=torch.long)
        segment_embeds = self.segment_embedding(segment_ids)

        embeddings = token_embeds + segment_embeds + position_embeds

        embeddings = self.layer_norm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings

In [16]:
vocab_size = 30522
emb_dim = 768
max_seq_len = 512
bert_embedding = BERTEmbedding(vocab_size, emb_dim)

batch_size = 2
seq_len = 128

input_ids = torch.randint(0, vocab_size, (batch_size, seq_len))
# print(input_ids)

# Segment IDs (for sentence pairs)
segment_ids = torch.cat([
    torch.zeros(batch_size, seq_len // 2, dtype=torch.long),
    torch.ones(batch_size, seq_len // 2, dtype=torch.long)
], dim=1)
# print(segment_ids)

embeddings = bert_embedding(input_ids, segment_ids)

print("Input shape:", input_ids.shape)
print("Output embeddings shape:", embeddings.shape)
print("Embedding dimension:", emb_dim)

Input shape: torch.Size([2, 128])
Output embeddings shape: torch.Size([2, 128, 768])
Embedding dimension: 768


In [17]:
class BERTEncoder(nn.Module):
    def __init__(self, emb_dim, num_heads, dropout_prob=0.1):
        super().__init__()

        self.emb_dim = emb_dim
        self.num_heads = num_heads

        self.multihead_attention = MultiHeadAttention(num_heads, emb_dim, emb_dim // num_heads)
        self.layer_normalization = LayerNormalization(emb_dim)
        self.feed_forward = FeedForward(emb_dim)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, x):
        residual = x
        attention_weight = self.multihead_attention(x)
        x = self.layer_normalization(residual + self.dropout(attention_weight))
        residual = x
        ff_output = self.feed_forward(x)
        output = self.layer_normalization(residual + self.dropout(ff_output))
        return output

In [18]:
encoder_block = BERTEncoder(emb_dim=768, num_heads=12, dropout_prob=0.1)

# Input tensor [batch_size, seq_len, emb_dim]
batch_size = 2
seq_len = 128
emb_dim = 768
x = torch.randn(batch_size, seq_len, emb_dim)

# Forward pass
output = encoder_block(x)
print(f"Input shape: {x.shape}")
print(f"Output shape: {output.shape}")

Input shape: torch.Size([2, 128, 768])
Output shape: torch.Size([2, 128, 768])


BERT for sentiment Analysis

In [19]:
from torch.utils.data import Dataset

In [20]:
class CustomSentimentDataset(Dataset):
  def __init__(self, texts, labels, tokenizer, max_length=128):
    super().__init__()
    self.texts = texts
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_length = max_length

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
    text = self.texts[idx]
    label = self.labels[idx]

    tokens = self.tokenizer.encode(text)

    if len(tokens) > self.max_length - 2:
        tokens = tokens[:self.max_length - 2]

    input_ids = (
        [self.tokenizer.str_to_int['[CLS]']] +
        tokens +
        [self.tokenizer.str_to_int['[SEP]']]
    )

    padding_length = self.max_length - len(input_ids)
    input_ids = input_ids + [self.tokenizer.str_to_int['[PAD]']] * padding_length
    segment_ids = [0] * self.max_length

    return {
        'input_ids': torch.tensor(input_ids, dtype=torch.long),
        'segment_ids': torch.tensor(segment_ids, dtype=torch.long),
        'label': torch.tensor(label, dtype=torch.long)
    }

In [21]:
from collections import Counter

In [22]:
def build_vocab_from_custom_data(texts, vocab_size=10000):


    token_counter = Counter()

    for text in texts:
        tokens = re.split(r'([,.:;?_!"()\']|--|\s)', text)
        tokens = [token for token in tokens if token.strip() != '']
        token_counter.update(tokens)

    print(f"Total unique tokens found: {len(token_counter)}")

    special_tokens = ['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]']
    vocab = {}

    for idx, token in enumerate(special_tokens):
        vocab[token] = idx

    # Add most common words from your data
    for idx, (token, count) in enumerate(token_counter.most_common(vocab_size - len(special_tokens))):
        vocab[token] = idx + len(special_tokens)

    print(f"Final vocabulary size: {len(vocab)}")
    print(vocab)

    return vocab

In [23]:
vocab=build_vocab_from_custom_data(text)

Total unique tokens found: 31
Final vocabulary size: 36
{'[PAD]': 0, '[UNK]': 1, '[CLS]': 2, '[SEP]': 3, '[MASK]': 4, 'e': 5, 'i': 6, 'n': 7, 's': 8, 't': 9, 'o': 10, 'a': 11, 'r': 12, 'c': 13, 'l': 14, 'h': 15, 'd': 16, 'u': 17, 'g': 18, 'p': 19, 'v': 20, 'm': 21, ',': 22, 'f': 23, 'y': 24, '.': 25, 'w': 26, 'b': 27, 'k': 28, 'x': 29, 'I': 30, 'S': 31, '-': 32, 'T': 33, 'q': 34, 'W': 35}


In [24]:
class SentimentModel(nn.Module):
  def __init__(self, vocab_size, num_classes, emb_dim, num_heads=4, dropout_prob=0.1):
    super().__init__()
    self.embedding = BERTEmbedding(vocab_size, emb_dim, dropout_prob=dropout_prob)
    self.encoder = BERTEncoder(emb_dim, num_heads, dropout_prob=dropout_prob)
    self.classifier = nn.Linear(emb_dim, num_classes)

  def forward(self, input_ids, segment_ids=None):
    embeddings = self.embedding(input_ids, segment_ids)
    encoder_output = self.encoder(embeddings)
    cls_output = encoder_output[:,0,:]
    logits = self.classifier(cls_output)
    return logits

In [25]:
print(torch.cuda.is_available())

True


In [26]:
device= 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [27]:
sample_data = [
    # Positive reviews
    ("This product is amazing! I love it.", 1),
    ("Excellent quality and fast delivery.", 1),
    ("Highly recommended, great value for money.", 1),
    ("Works perfectly, very satisfied with purchase.", 1),
    ("Outstanding service and product quality.", 1),
    ("Best purchase I've made this year!", 1),
    ("Fantastic product, exceeded my expectations.", 1),
    ("Great features and easy to use.", 1),
    ("Very happy with this product.", 1),
    ("Perfect! Exactly what I needed.", 1),

    # Negative reviews
    ("Terrible product, complete waste of money.", 0),
    ("Poor quality and stopped working after 2 days.", 0),
    ("Very disappointed with this purchase.", 0),
    ("Doesn't work as described, avoid this product.", 0),
    ("Worst product I've ever bought.", 0),
    ("Broken upon arrival, terrible quality.", 0),
    ("Not worth the money, very poor performance.", 0),
    ("Complete garbage, don't buy this.", 0),
    ("Extremely disappointed, faulty product.", 0),
    ("Awful experience, product doesn't work.", 0),

    # Neutral reviews
    ("The product is okay, nothing special.", 2),
    ("It works but could be better.", 2),
    ("Average product, does the job.", 2),
    ("Not bad, but not great either.", 2),
    ("Mediocre quality, expected more.", 2),
]

# Separate texts and labels
texts = [item[0] for item in sample_data]
labels = [item[1] for item in sample_data]

# print(texts)
# print(labels)


In [28]:
vocab = build_vocab_from_custom_data(texts, vocab_size=5000)
tokenizer = Tokenizer(vocab)

Total unique tokens found: 107
Final vocabulary size: 112
{'[PAD]': 0, '[UNK]': 1, '[CLS]': 2, '[SEP]': 3, '[MASK]': 4, '.': 5, ',': 6, 'product': 7, 'quality': 8, "'": 9, 'this': 10, 'I': 11, 'and': 12, '!': 13, 'money': 14, 'with': 15, 'purchase': 16, 't': 17, 'is': 18, 'great': 19, 'very': 20, 've': 21, 'Very': 22, 'disappointed': 23, 'work': 24, 'Not': 25, 'the': 26, 'but': 27, 'This': 28, 'amazing': 29, 'love': 30, 'it': 31, 'Excellent': 32, 'fast': 33, 'delivery': 34, 'Highly': 35, 'recommended': 36, 'value': 37, 'for': 38, 'Works': 39, 'perfectly': 40, 'satisfied': 41, 'Outstanding': 42, 'service': 43, 'Best': 44, 'made': 45, 'year': 46, 'Fantastic': 47, 'exceeded': 48, 'my': 49, 'expectations': 50, 'Great': 51, 'features': 52, 'easy': 53, 'to': 54, 'use': 55, 'happy': 56, 'Perfect': 57, 'Exactly': 58, 'what': 59, 'needed': 60, 'Terrible': 61, 'complete': 62, 'waste': 63, 'of': 64, 'Poor': 65, 'stopped': 66, 'working': 67, 'after': 68, '2': 69, 'days': 70, 'Doesn': 71, 'as': 72,

In [29]:
num_classes = len(set(labels))
print(f"Number of classes: {num_classes}")

Number of classes: 3


In [30]:
from torch.utils.data import DataLoader, random_split

In [35]:
epochs=1000

In [32]:
import torch.optim

In [36]:
def evaluate_model(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            segment_ids = batch['segment_ids'].to(device)
            labels = batch['label'].to(device)

            logits = model(input_ids, segment_ids)
            _, predicted = torch.max(logits, 1)

            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    model.train()
    return 100 * correct / total if total > 0 else 0

In [37]:
dataset = CustomSentimentDataset(texts, labels, tokenizer)

# Split into train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

# Create model
model = SentimentModel(
    vocab_size=len(vocab),
    num_classes=num_classes,
    emb_dim=128,
    num_heads=4,
    dropout_prob=0.1
).to(device)

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01)

# Training loop
model.train()
best_accuracy = 0

for epoch in range(epochs):
    total_loss = 0
    correct = 0
    total = 0

    for batch_idx, batch in enumerate(train_loader):
        input_ids = batch['input_ids'].to(device)
        segment_ids = batch['segment_ids'].to(device)
        labels_batch = batch['label'].to(device)

        # Forward pass
        logits = model(input_ids, segment_ids)
        loss = criterion(logits, labels_batch)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        total_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(logits, 1)
        correct += (predicted == labels_batch).sum().item()
        total += labels_batch.size(0)

        if batch_idx % 5 == 0:
            batch_accuracy = 100 * (predicted == labels_batch).sum().item() / labels_batch.size(0)
            print(f'Epoch {epoch+1}, Batch {batch_idx}, Loss: {loss.item():.4f}, Acc: {batch_accuracy:.1f}%')

    epoch_accuracy = 100 * correct / total
    avg_loss = total_loss / len(train_loader)
    val_accuracy = evaluate_model(model, val_loader, device)

    print(f'\nEpoch [{epoch+1}/{epochs}] completed:')
    print(f'  Training Loss: {avg_loss:.4f}')
    print(f'  Training Accuracy: {epoch_accuracy:.2f}%')
    print(f'  Validation Accuracy: {val_accuracy:.2f}%')


    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        torch.save({
            'model_state_dict': model.state_dict(),
            'tokenizer_vocab': tokenizer.str_to_int,
            'vocab_size': len(vocab),
            'num_classes': num_classes,
            'label_mapping': {
                0: 'Negative',
                1: 'Positive',
                2: 'Neutral'
            }
        }, 'best_custom_model.pth')
        print(f'New best model saved! Validation Accuracy: {val_accuracy:.2f}%')

    print('-' * 60)

print(f'Training completed! Best validation accuracy: {best_accuracy:.2f}%')




Model parameters: 262,275
Epoch 1, Batch 0, Loss: 1.1148, Acc: 62.5%

Epoch [1/1000] completed:
  Training Loss: 1.1248
  Training Accuracy: 40.00%
  Validation Accuracy: 60.00%
New best model saved! Validation Accuracy: 60.00%
------------------------------------------------------------
Epoch 2, Batch 0, Loss: 1.2149, Acc: 12.5%

Epoch [2/1000] completed:
  Training Loss: 1.1084
  Training Accuracy: 30.00%
  Validation Accuracy: 20.00%
------------------------------------------------------------
Epoch 3, Batch 0, Loss: 1.0588, Acc: 50.0%

Epoch [3/1000] completed:
  Training Loss: 1.0419
  Training Accuracy: 50.00%
  Validation Accuracy: 20.00%
------------------------------------------------------------
Epoch 4, Batch 0, Loss: 1.0985, Acc: 37.5%

Epoch [4/1000] completed:
  Training Loss: 1.0661
  Training Accuracy: 45.00%
  Validation Accuracy: 20.00%
------------------------------------------------------------
Epoch 5, Batch 0, Loss: 1.1047, Acc: 25.0%

Epoch [5/1000] completed:
  