In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


In [3]:
text = """
artificial intelligence is transforming modern society.
it is used in healthcare finance education and transportation.
machine learning allows systems to improve automatically with experience.
data plays a critical role in training intelligent systems.
large datasets help models learn complex patterns.
deep learning uses multi layer neural networks.
neural networks are inspired by biological neurons.
each neuron processes input and produces an output.
training a neural network requires optimization techniques.
gradient descent minimizes the loss function.

natural language processing helps computers understand human language.
text generation is a key task in nlp.
language models predict the next word or character.
recurrent neural networks handle sequential data.
lstm and gru models address long term dependency problems.
however rnn based models are slow for long sequences.

transformer models changed the field of nlp.
they rely on self attention mechanisms.
attention allows the model to focus on relevant context.
transformers process data in parallel.
this makes training faster and more efficient.
modern language models are based on transformers.

education is being improved using artificial intelligence.
intelligent tutoring systems personalize learning.
automated grading saves time for teachers.
online education platforms use recommendation systems.
technology enhances the quality of learning experiences.

ethical considerations are important in artificial intelligence.
fairness transparency and accountability must be ensured.
ai systems should be designed responsibly.
data privacy and security are major concerns.
researchers continue to improve ai safety.

text generation models can create stories poems and articles.
they are used in chatbots virtual assistants and content creation.
generated text should be meaningful and coherent.
evaluation of text generation is challenging.
human judgement is often required.

continuous learning is essential in the field of ai.
research and innovation drive technological progress.
students should build strong foundations in mathematics.
programming skills are important for ai engineers.
practical experimentation enhances understanding.
"""


In [4]:
def preprocessed(text):
    text = text.lower()

    chars= sorted(list(set(text)))
    vocab_size = len(chars)

    char2idx = {ch:i for i,ch in enumerate(chars)}
    idx2char = {i:ch for i,ch  in enumerate(chars)}
 
    encoded = [char2idx[ch] for ch in text]

    return encoded, char2idx, idx2char, vocab_size


In [12]:
def create_sequence(encoded_txt, seq_len):
    x=[]
    y=[]

    for i in range(len(encoded_txt) - seq_len):
        x.append(encoded_txt[i:i+seq_len])
        y.append(encoded_txt[i+seq_len])

    
    return x, y

In [13]:
encoded_text, char_to_idx, idx_to_char, vocab_size = preprocessed(text)
seq_length = 5
inputs, targets = create_sequence(encoded_text, seq_length)

In [14]:
print(len(inputs))
vocab_size

2211


29

In [15]:
class ScratchLSTM(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(ScratchLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        
        # Embedding
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        
        # Gates
        self.Wf = nn.Linear(hidden_size + hidden_size, hidden_size)
        self.Wi = nn.Linear(hidden_size + hidden_size, hidden_size)
        self.Wc = nn.Linear(hidden_size + hidden_size, hidden_size)
        self.Wo = nn.Linear(hidden_size + hidden_size, hidden_size)
        
        # Output layer
        self.fc = nn.Linear(hidden_size, vocab_size)
    
    def forward(self, x):
        batch_size, seq_len = x.size()
        
        h = torch.zeros(batch_size, self.hidden_size)
        c = torch.zeros(batch_size, self.hidden_size)
        
        for t in range(seq_len):
            x_embed = self.embedding(x[:, t])
            
            combined = torch.cat((h, x_embed), dim=1)
            
            f = torch.sigmoid(self.Wf(combined))
            i = torch.sigmoid(self.Wi(combined))
            c_tilde = torch.tanh(self.Wc(combined))
            o = torch.sigmoid(self.Wo(combined))
            
            c = f * c + i * c_tilde
            h = o * torch.tanh(c)
        
        output = self.fc(h)
        return output


In [16]:
hidden_size = 128
model = ScratchLSTM(vocab_size, hidden_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)



In [17]:
epochs = 20

for epoch in range(epochs):
    total_loss = 0
    
    for i in range(len(inputs)):
        x = torch.tensor([inputs[i]])
        y = torch.tensor([targets[i]])
        
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(inputs):.4f}")


Epoch 1, Loss: 2.4862
Epoch 2, Loss: 2.0048
Epoch 3, Loss: 1.7596
Epoch 4, Loss: 1.5929
Epoch 5, Loss: 1.4642
Epoch 6, Loss: 1.3266
Epoch 7, Loss: 1.1743
Epoch 8, Loss: 1.0468
Epoch 9, Loss: 0.9659
Epoch 10, Loss: 0.8957
Epoch 11, Loss: 0.8446
Epoch 12, Loss: 0.7564
Epoch 13, Loss: 0.7006
Epoch 14, Loss: 0.6504
Epoch 15, Loss: 0.6244
Epoch 16, Loss: 0.5956
Epoch 17, Loss: 0.5544
Epoch 18, Loss: 0.5436
Epoch 19, Loss: 0.5524
Epoch 20, Loss: 0.5295


In [None]:
def generate_text(model, seed_text, seq_length, char_to_idx, idx_to_char, length=200):
    
    model.eval()
    generated = seed_text
    
    for _ in range(length):
        
       
        input_seq = generated[-seq_length:]
        
        input_indices = [char_to_idx[ch] for ch in input_seq]
        input_tensor = torch.tensor([input_indices])
        
        
        with torch.no_grad():
            output = model(input_tensor)
            prob = torch.softmax(output, dim=1)
            predicted_idx = torch.argmax(prob).item()
        
        predicted_char = idx_to_char[predicted_idx]
        generated += predicted_char
    
    return generated


In [29]:
seed = "artificial intelligence is "
print(generate_text(model, seed, seq_length, char_to_idx, idx_to_char, 200))


artificial intelligence is research and learning articles.
they are important for ai engineers.
practical experimentation enhances understanding.
human judgement is of ai.
research and learning articles.
they are important for 


In [34]:
total_params = sum(p.numel() for p in model.parameters())
print("Total Parameters:", total_params)


Total Parameters: 139037


In [39]:
%pip install torchinfo


Note: you may need to restart the kernel to use updated packages.


In [45]:
from torchinfo import summary

dummy_input = torch.randint(0, vocab_size, (1, seq_length), dtype=torch.long)

summary(model, input_data=dummy_input)



Layer (type:depth-idx)                   Output Shape              Param #
ScratchLSTM                              [1, 29]                   --
├─Embedding: 1-1                         [1, 128]                  3,712
├─Linear: 1-2                            [1, 128]                  32,896
├─Linear: 1-3                            [1, 128]                  32,896
├─Linear: 1-4                            [1, 128]                  32,896
├─Linear: 1-5                            [1, 128]                  32,896
├─Embedding: 1-6                         [1, 128]                  (recursive)
├─Linear: 1-7                            [1, 128]                  (recursive)
├─Linear: 1-8                            [1, 128]                  (recursive)
├─Linear: 1-9                            [1, 128]                  (recursive)
├─Linear: 1-10                           [1, 128]                  (recursive)
├─Embedding: 1-11                        [1, 128]                  (recursive)
├─Linear: 1-