<a href="https://colab.research.google.com/github/jyotidabass/How-does-ChatGPT-works/blob/main/How_does_ChatGPT_works.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Step 1: Text Preprocessing
def preprocess_text(text):
    # Convert text to lowercase
    text = text.lower()
    # Remove punctuation and special characters
    text = ''.join(e for e in text if e.isalnum() or e.isspace())
    # Tokenize the text
    tokens = text.split()
    return tokens

# Step 2: Embedding
class Embedding(nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(Embedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)

    def forward(self, indices):
        # Get the embeddings
        embeddings = self.embedding(indices)
        return embeddings

# Step 3: Encoder
class Encoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Encoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, embeddings):
        # Forward pass
        outputs = torch.relu(self.fc1(embeddings))
        outputs = self.fc2(outputs)
        return outputs

# Step 4: Decoder
class Decoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, outputs):
        # Forward pass
        outputs = torch.relu(self.fc1(outputs))
        outputs = self.fc2(outputs)
        return outputs

# Step 5: Training
def train(model, inputs, targets, epochs):
    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    for epoch in range(epochs):
        # Forward pass
        outputs = model(inputs)
        # Calculate the loss
        loss = criterion(outputs, targets)
        # Backward pass
        optimizer.zero_grad()
        loss.backward(retain_graph=True)  # retain_graph=True added here
        optimizer.step()
        # Print the loss
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Define the vocabulary
vocab = ['hello', 'world', 'how', 'are', 'you']

# Define the model
embedding = Embedding(len(vocab), 10)
encoder = Encoder(10, 20, 10)
decoder = Decoder(10, 20, len(vocab))

# Define the inputs and targets
input_text = preprocess_text('hello world')
input_indices = torch.tensor([vocab.index(token) for token in input_text])
input_embeddings = embedding(input_indices)

# Change targets to have the same batch size as input_embeddings
# We can assume the target for 'hello' is 'how' and for 'world' is 'are'
targets = torch.tensor([vocab.index('how'), vocab.index('are')])

# Train the model
train(decoder, input_embeddings, targets, epochs=10)

Epoch 1, Loss: 1.356130599975586
Epoch 2, Loss: 1.3400578498840332
Epoch 3, Loss: 1.3240325450897217
Epoch 4, Loss: 1.3080558776855469
Epoch 5, Loss: 1.2921299934387207
Epoch 6, Loss: 1.2762569189071655
Epoch 7, Loss: 1.2604382038116455
Epoch 8, Loss: 1.2446751594543457
Epoch 9, Loss: 1.2289683818817139
Epoch 10, Loss: 1.2133179903030396


In [11]:
# Define a test input
test_input_text = preprocess_text('hello world')
test_input_indices = torch.tensor([vocab.index(token) for token in test_input_text])
test_input_embeddings = embedding(test_input_indices)

# Evaluate the model on the test input
test_outputs = decoder(test_input_embeddings)
print(test_outputs)

tensor([[ 0.0101, -0.2192,  0.5440,  0.0839, -0.1989],
        [ 0.0877, -0.4793,  0.0412,  0.3301, -0.3754]],
       grad_fn=<AddmmBackward0>)
