<a href="https://colab.research.google.com/github/tripathishiva0123/tripathishiva0123/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
pip install torch transformers




In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import random


In [6]:
class TextDataset(Dataset):
    def __init__(self, text, seq_length, vocab):
        self.vocab = vocab
        self.text = text
        self.seq_length = seq_length
        self.data = self.tokenize(text)

    def tokenize(self, text):
        tokenized_text = [self.vocab.get(c, self.vocab['<unk>']) for c in text]
        return tokenized_text

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data[idx:idx+self.seq_length], dtype=torch.long),
            torch.tensor(self.data[idx+1:idx+self.seq_length+1], dtype=torch.long)
        )

def build_vocab(text):
    chars = sorted(list(set(text)))
    vocab = {c: i for i, c in enumerate(chars)}
    vocab['<unk>'] = len(vocab)
    return vocab

# Example text data
text = "hello world. this is a simple example of training a language model."
vocab = build_vocab(text)
seq_length = 10
dataset = TextDataset(text, seq_length, vocab)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)


In [8]:
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.RNN(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, hidden_size)

# Model parameters
vocab_size = len(vocab)
embed_size = 64
hidden_size = 128
num_layers = 1

model = RNNModel(vocab_size, embed_size, hidden_size, num_layers)


In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for inputs, targets in dataloader:
        hidden = model.init_hidden(batch_size=inputs.size(0))  # Initialize hidden state for each batch
        optimizer.zero_grad()

        # Forward pass
        outputs, hidden = model(inputs, hidden)

        # Compute loss
        loss = criterion(outputs, targets.view(-1))

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 1.5865
Epoch [2/10], Loss: 1.0915
Epoch [3/10], Loss: 0.3911
Epoch [4/10], Loss: 0.4223
Epoch [5/10], Loss: 0.5110
Epoch [6/10], Loss: 0.3043
Epoch [7/10], Loss: 0.3627
Epoch [8/10], Loss: 0.1894
Epoch [9/10], Loss: 0.3158
Epoch [10/10], Loss: 0.1548


In [12]:
# Example where retain_graph=True might be used
loss.backward(retain_graph=True)


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [13]:
def train_model(model, dataloader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        for inputs, targets in dataloader:
            hidden = model.init_hidden(batch_size=inputs.size(0))  # Initialize hidden state for each batch
            optimizer.zero_grad()

            # Forward pass
            outputs, hidden = model(inputs, hidden)

            # Compute loss
            loss = criterion(outputs, targets.view(-1))

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Usage
train_model(model, dataloader, criterion, optimizer, num_epochs=10)


Epoch [1/10], Loss: 0.3018
Epoch [2/10], Loss: 0.0418
Epoch [3/10], Loss: 0.2530
Epoch [4/10], Loss: 0.3210
Epoch [5/10], Loss: 0.0284
Epoch [6/10], Loss: 0.1126
Epoch [7/10], Loss: 0.1574
Epoch [8/10], Loss: 0.2739
Epoch [9/10], Loss: 0.0180
Epoch [10/10], Loss: 0.0153


In [14]:
class RNNModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.rnn = nn.RNN(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        out = self.fc(out.reshape(out.size(0) * out.size(1), out.size(2)))
        return out, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(num_layers, batch_size, hidden_size)


In [15]:
print(f'Inputs: {inputs.size()}')
print(f'Outputs: {outputs.size()}')
print(f'Targets: {targets.size()}')


Inputs: torch.Size([1, 10])
Outputs: torch.Size([10, 21])
Targets: torch.Size([1, 10])


In [16]:
inputs = torch.tensor([[1, 2, 3, 4, 5]], dtype=torch.long)
targets = torch.tensor([[2, 3, 4, 5, 6]], dtype=torch.long)
hidden = model.init_hidden(batch_size=1)

outputs, hidden = model(inputs, hidden)
loss = criterion(outputs, targets.view(-1))
loss.backward()  # Ensure this runs without issues


In [17]:
class TextDataset(Dataset):
    def __init__(self, text, seq_length, vocab):
        self.vocab = vocab
        self.text = text
        self.seq_length = seq_length
        self.data = self.tokenize(text)

    def tokenize(self, text):
        tokenized_text = [self.vocab.get(c, self.vocab['<unk>']) for c in text]
        return tokenized_text

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data[idx:idx+self.seq_length], dtype=torch.long),
            torch.tensor(self.data[idx+1:idx+self.seq_length+1], dtype=torch.long)
        )


In [20]:
text = "hello world"
vocab = {'h': 0, 'e': 1, 'l': 2, 'o': 3, ' ': 4, 'w': 5, 'r': 6, 'd': 7}
seq_length = 5
dataset = TextDataset(text, seq_length, vocab)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Initialize model, criterion, optimizer
model = RNNModel(vocab_size=len(vocab), embed_size=64, hidden_size=128, num_layers=1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train with a single batch
inputs, targets = next(iter(dataloader))
hidden = model.init_hidden(batch_size=inputs.size(0))
outputs, hidden = model(inputs, hidden)
loss = criterion(outputs, targets.view(-1))
loss.backward()
optimizer.step()

print(f'Loss: {loss.item()}')


KeyError: '<unk>'

In [21]:
vocab = {'h': 0, 'e': 1, 'l': 2, 'o': 3, ' ': 4, 'w': 5, 'r': 6, 'd': 7, '<unk>': 8}


In [22]:
class TextDataset(Dataset):
    def __init__(self, text, seq_length, vocab):
        self.vocab = vocab
        self.text = text
        self.seq_length = seq_length
        self.data = self.tokenize(text)

    def tokenize(self, text):
        tokenized_text = [self.vocab.get(c, self.vocab['<unk>']) for c in text]
        return tokenized_text

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        return (
            torch.tensor(self.data[idx:idx+self.seq_length], dtype=torch.long),
            torch.tensor(self.data[idx+1:idx+self.seq_length+1], dtype=torch.long)
        )


In [23]:
text = "hello world"
vocab = {'h': 0, 'e': 1, 'l': 2, 'o': 3, ' ': 4, 'w': 5, 'r': 6, 'd': 7, '<unk>': 8}
seq_length = 5
dataset = TextDataset(text, seq_length, vocab)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Example of using DataLoader
for inputs, targets in dataloader:
    print("Inputs:", inputs)
    print("Targets:", targets)


Inputs: tensor([[4, 5, 3, 6, 2]])
Targets: tensor([[5, 3, 6, 2, 7]])
Inputs: tensor([[1, 2, 2, 3, 4]])
Targets: tensor([[2, 2, 3, 4, 5]])
Inputs: tensor([[2, 3, 4, 5, 3]])
Targets: tensor([[3, 4, 5, 3, 6]])
Inputs: tensor([[0, 1, 2, 2, 3]])
Targets: tensor([[1, 2, 2, 3, 4]])
Inputs: tensor([[3, 4, 5, 3, 6]])
Targets: tensor([[4, 5, 3, 6, 2]])
Inputs: tensor([[2, 2, 3, 4, 5]])
Targets: tensor([[2, 3, 4, 5, 3]])
