In [5]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

In [13]:
from nltk.tokenize import word_tokenize
import nltk
from collections import Counter

In [14]:
# Tokenization
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\srksr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\srksr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [15]:
device = 'cuda'

In [16]:
document = """Artificial Intelligence (AI) refers to the ability of machines to perform tasks that normally require human intelligence, such as learning, reasoning, problem-solving, and decision-making. The concept of AI was formally introduced in the mid-20th century, inspired by the question posed by Alan Turing: Can machines think? Since then,
AI has evolved from a theoretical idea into a powerful technology shaping modern life.
AI systems work by analyzing large amounts of data and identifying patterns using algorithms and models, particularly in areas like machine learning and deep learning. 
Today, AI is widely used in everyday applications such as voice assistants, recommendation systems, facial recognition, and autonomous vehicles.
In healthcare, AI helps doctors diagnose diseases, analyze medical images, and develop personalized treatment plans. In education, AI enables adaptive learning platforms that cater to individual student needs.
One of the major advantages of AI is efficiency. 
Machines can perform repetitive tasks faster and more accurately than humans, reducing errors and saving time.
AI also plays a crucial role in scientific research, finance, agriculture, and cybersecurity by processing complex data at a scale impossible for humans. 
Organizations like OpenAI are actively working to develop AI systems that are both powerful and beneficial to society.
However, the rapid growth of AI also raises concerns. 
Job displacement due to automation, data privacy issues, algorithmic bias, and ethical questions about decision-making by machines are significant challenges. 
There is a growing need for responsible AI development to ensure transparency, fairness, and accountability. Governments and researchers worldwide are working on regulations and ethical frameworks to guide the safe use of AI.
In conclusion, Artificial Intelligence is a transformative technology with immense potential to improve human life. 
While it offers numerous benefits across various sectors, careful and ethical implementation is essential. By balancing innovation with responsibility, AI can become a powerful tool for sustainable development and human progress.
The applications of AI are vast and continue to grow. In healthcare, AI assists in early disease detection, robotic surgeries, drug discovery, and patient monitoring. 
In transportation, AI powers self-driving cars, traffic prediction, and route optimization. In finance, AI is used for fraud detection, algorithmic trading, and credit risk assessment.
In agriculture, AI helps farmers predict crop yields, monitor soil health, and optimize irrigation, contributing to food security.
AI also plays a critical role in improving productivity and innovation. 
By automating repetitive and time-consuming tasks, AI allows humans to focus on creative, strategic, and analytical work. 
Industries benefit from increased efficiency, reduced costs, and improved accuracy. In scientific research, AI accelerates discoveries by analyzing massive datasets in fields such as climate science, physics, and biology.
Despite its advantages, AI presents serious challenges and ethical concerns. 
Automation threatens certain job roles, particularly those involving routine tasks, leading to fears of unemployment and economic inequality. 
Bias in AI algorithms can result in unfair decisions, especially when training data reflects social inequalities. 
Issues related to privacy, surveillance, and misuse of AI technologies further emphasize the need for strong governance and ethical standards.
Looking ahead, the future of AI depends on responsible development and human-centered design. 
Education systems must adapt to prepare individuals for an AI-driven world by emphasizing critical thinking, creativity, and digital literacy. 
Collaboration between governments, researchers, and industries is essential to ensure that AI technologies are used for social good rather than harm.
In conclusion, Artificial Intelligence is one of the most influential technologies of the modern era. 
It has the power to transform industries, enhance human capabilities, and solve complex global problems. 
While challenges exist, thoughtful regulation, ethical practices, and continuous learning can ensure that AI remains a force for positive and inclusive progress.
In business and finance, AI has moved beyond simple automation. 
Real-time fraud detection systems analyze millions of transactions per second to secure global markets. 
Meanwhile, in manufacturing, predictive maintenance powered by AI has halved equipment downtime, allowing "smart factories" to operate with near-perfect efficiency.
Social and Ethical Challenges
Despite these advancements, the "AI Revolution" is not without friction. 
Job displacement remains a primary concern as AI agents take over administrative and repetitive tasks. 
While this creates new roles for "AI Orchestrators" and "Prompt Engineers," it necessitates a massive global effort in workforce reskilling.
Furthermore, algorithmic bias continues to be a hurdle. 
Because AI learns from historical data, it can inadvertently perpetuate social prejudices in areas like hiring or law enforcement. 
The rise of deepfakes and synthetic media in 2026 has also created a "truth crisis," making it difficult to distinguish between authentic and AI-generated information.
Conclusion: The Future of Co-Intelligence
As we look toward the next decade, the goal of AI development is shifting from "replacing" human intelligence to "augmenting" it. 
The most successful implementations in 2026 are those that maintain a "human-in-the-loop" approach, combining the cold, data-driven precision of machines with the empathy, ethics, and creative judgment of people. 
To ensure AI remains a force for good, robust regulatory frameworks and "hallucination insurance" are becoming standard, aiming to protect the integrity of the digital world.
The Rise of Physical Intelligence
We have officially moved past the "software-only" era of AI. 
Through advancements in Embodied AI, large language models have been integrated into humanoid and industrial robotics. 
Unlike the rigid, pre-programmed robots of the past, today’s machines use "vision-language-action" models to navigate unpredictable environments like messy kitchens or busy construction sites. 
These robots learn through observation and simulation, meaning they can master a physical task in a virtual world millions of times before ever performing it in the physical one, drastically reducing the cost of manual labor.
The Alignment Challenge and Safety
Perhaps the most critical paragraph in the story of AI is the struggle for Alignment—the technical challenge of ensuring an AIs goals perfectly match human values. 
As AI systems become more autonomous, they may find "shortcuts" to achieve goals that are technically correct but practically harmful. In 2026, the focus has shifted from "capability" (making AI smarter) to "interpretability" (understanding how it thinks). 
Global regulators and labs are now focused on building "Constitutional AI," where models are governed by a set of hard-coded ethical principles that they cannot bypass, regardless of how complex the task becomes."""

In [17]:
document = document.lower()

In [18]:
#tokenize
tokens = word_tokenize(document.lower())

In [20]:
#building vocab
voca = {"<UNK>":0}
Counter(tokens) # counts no. of times each word occur

Counter({',': 92,
         '.': 56,
         'and': 48,
         'ai': 42,
         'the': 35,
         'of': 29,
         'in': 29,
         'to': 27,
         'a': 18,
         '``': 15,
         "''": 15,
         'by': 11,
         'is': 11,
         'for': 11,
         'are': 11,
         'that': 8,
         'can': 8,
         'ethical': 8,
         'it': 8,
         'intelligence': 6,
         'machines': 6,
         'human': 6,
         'as': 6,
         'has': 6,
         'systems': 6,
         'tasks': 5,
         'learning': 5,
         'from': 5,
         'data': 5,
         'models': 4,
         'like': 4,
         'also': 4,
         'challenges': 4,
         'development': 4,
         'ensure': 4,
         'on': 4,
         'with': 4,
         'social': 4,
         'global': 4,
         'artificial': 3,
         '(': 3,
         ')': 3,
         'such': 3,
         'powerful': 3,
         'used': 3,
         'one': 3,
         'efficiency': 3,
         'repetitive': 3,
  

In [21]:
for token in Counter(tokens).keys():
    if token not in voca.keys():
        voca[token] = len(voca)

In [23]:
def text_to_indices(text):
    indexed_text = []
    for token in text:
        if token in voca:
            indexed_text.append(voca[token])
        else :
            indexed_text.append(voca["<UNK>"])
    return indexed_text


In [24]:
#sentences 
sentences = document.split('\n')
numerical_sentences =[]
for sentence in sentences:
    numerical_sentences.append(text_to_indices(word_tokenize(sentence.lower())))

In [25]:
numerical_sentences

[[1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  7,
  12,
  13,
  14,
  15,
  16,
  17,
  2,
  18,
  19,
  20,
  21,
  18,
  22,
  18,
  23,
  18,
  24,
  25,
  26,
  8,
  27,
  10,
  4,
  28,
  29,
  30,
  31,
  8,
  32,
  33,
  18,
  34,
  35,
  8,
  36,
  37,
  35,
  38,
  39,
  40,
  41,
  11,
  42,
  43,
  44,
  45,
  18],
 [4, 46, 47, 48, 49, 50, 51, 52, 49, 53, 54, 55, 56, 57, 26],
 [4,
  58,
  59,
  35,
  60,
  61,
  62,
  10,
  63,
  24,
  64,
  65,
  66,
  67,
  24,
  68,
  18,
  69,
  31,
  70,
  71,
  72,
  21,
  24,
  73,
  21,
  26],
 [74,
  18,
  4,
  75,
  76,
  77,
  31,
  78,
  79,
  19,
  20,
  80,
  81,
  18,
  82,
  58,
  18,
  83,
  84,
  18,
  24,
  85,
  86,
  26],
 [31,
  87,
  18,
  4,
  88,
  89,
  90,
  91,
  18,
  92,
  93,
  94,
  18,
  24,
  95,
  96,
  97,
  98,
  26,
  31,
  99,
  18,
  4,
  100,
  101,
  21,
  102,
  14,
  103,
  7,
  104,
  105,
  106,
  26],
 [107, 10, 8, 108, 109, 10, 4, 75, 110, 26],
 [11,
  41,
  12,
  111,
  13,
  112,


In [26]:
sentences

['artificial intelligence (ai) refers to the ability of machines to perform tasks that normally require human intelligence, such as learning, reasoning, problem-solving, and decision-making. the concept of ai was formally introduced in the mid-20th century, inspired by the question posed by alan turing: can machines think? since then,',
 'ai has evolved from a theoretical idea into a powerful technology shaping modern life.',
 'ai systems work by analyzing large amounts of data and identifying patterns using algorithms and models, particularly in areas like machine learning and deep learning. ',
 'today, ai is widely used in everyday applications such as voice assistants, recommendation systems, facial recognition, and autonomous vehicles.',
 'in healthcare, ai helps doctors diagnose diseases, analyze medical images, and develop personalized treatment plans. in education, ai enables adaptive learning platforms that cater to individual student needs.',
 'one of the major advantages of a

In [27]:
training_seq = []
for sentence in numerical_sentences:
    for i in range(1,len(sentence)):
        training_seq.append(sentence[:i+1])

In [28]:
training_seq[10]

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7]

In [29]:
len_list = []

for sequence in training_seq:
  len_list.append(len(sequence))

maxi = max(len_list)

In [30]:
padded_training_seq = []
for i in range(len(training_seq)):
    padded_training_seq.append([0]*(maxi - len(training_seq[i]))+training_seq[i])


In [31]:
padded_training_seq[0]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 2]

In [32]:
padded_training_sequence = torch.tensor(padded_training_seq, dtype=torch.long)

In [33]:
padded_training_sequence

tensor([[  0,   0,   0,  ...,   0,   1,   2],
        [  0,   0,   0,  ...,   1,   2,   3],
        [  0,   0,   0,  ...,   2,   3,   4],
        ...,
        [  0,   0,   0,  ..., 131,   8, 485],
        [  0,   0,   0,  ...,   8, 485, 539],
        [  0,   0,   0,  ..., 485, 539,  26]])

In [34]:
padded_training_sequence.shape

torch.Size([1157, 59])

In [35]:
X = padded_training_sequence[:,:-1]
Y = padded_training_sequence[:,-1]

In [36]:
Y

tensor([  2,   3,   4,  ..., 485, 539,  26])

In [37]:
class CustomDataset(Dataset):
    def __init__(self,X,Y):
        self.X = torch.tensor(X,dtype=torch.long,device=device)
        self.Y = torch.tensor(Y,dtype=torch.long,device=device)
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self,index):
        return self.X[index] , self.Y[index]


In [38]:
dataset = CustomDataset(X,Y)

  self.X = torch.tensor(X,dtype=torch.long,device=device)
  self.Y = torch.tensor(Y,dtype=torch.long,device=device)


In [39]:
dataloader = DataLoader(dataset,batch_size=32,shuffle=True)

In [40]:
class MySimpleLSTM(nn.Module):
    def __init__(self,vocab_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size,embedding_dim=100)
        self.lstm = nn.LSTM(100,150,batch_first=True)
        self.out = nn.Linear(150,vocab_size)
    def forward(self, X):
        embeded_question = self.embedding(X)
        _ ,(final_hidden,finay_cell_state) = self.lstm(embeded_question)
        output = self.out(final_hidden).squeeze(0)
        return output

In [41]:
model = MySimpleLSTM(len(voca))
model = model.to(device=device)

In [42]:
lr = 0.001
epochs = 50

In [43]:
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=lr)

In [44]:
for epoche in range(epochs):
    total_loss = 0
    for batch_features , batch_labels in dataloader:
        
        batch_features , batch_labels = batch_features.to(device) , batch_labels.to(device)
        
        y_pred = model(batch_features)
        
        loss = loss_function(y_pred,batch_labels)
        
        total_loss += loss.item()
        
        optimizer.zero_grad()
        
        loss.backward()
        
        optimizer.step()
    print('Epoch: ', epoche+1," ---> ","loss :",total_loss)

Epoch:  1  --->  loss : 229.08243799209595
Epoch:  2  --->  loss : 200.31897354125977
Epoch:  3  --->  loss : 187.1659049987793
Epoch:  4  --->  loss : 172.97803735733032
Epoch:  5  --->  loss : 157.2741448879242
Epoch:  6  --->  loss : 142.44660329818726
Epoch:  7  --->  loss : 128.39274978637695
Epoch:  8  --->  loss : 115.2596583366394
Epoch:  9  --->  loss : 102.2876524925232
Epoch:  10  --->  loss : 90.19933843612671
Epoch:  11  --->  loss : 79.13307690620422
Epoch:  12  --->  loss : 69.02585852146149
Epoch:  13  --->  loss : 58.642227947711945
Epoch:  14  --->  loss : 51.69609045982361
Epoch:  15  --->  loss : 44.964842319488525
Epoch:  16  --->  loss : 38.12435817718506
Epoch:  17  --->  loss : 33.04110026359558
Epoch:  18  --->  loss : 28.746303498744965
Epoch:  19  --->  loss : 25.00350710749626
Epoch:  20  --->  loss : 21.462939769029617
Epoch:  21  --->  loss : 18.9222851395607
Epoch:  22  --->  loss : 16.601073533296585
Epoch:  23  --->  loss : 14.67015689611435
Epoch:  24 

In [45]:
def predict(model, text):

  # convert question to numbers
  tokens = word_tokenize(text.lower())
  
  numerical_question = text_to_indices(tokens)

  padded = [0]*(maxi - len(numerical_question)) + numerical_question
  
  # tensor
  question_tensor = torch.tensor(padded,device=device).unsqueeze(0)

  # send to model
  output = model(question_tensor)

  # convert logits to probs
  probs = torch.nn.functional.softmax(output, dim=1)

  # find index of max prob
  value, index = torch.max(probs, dim=1)
  
  return text + " " + list(voca.keys())[index]

In [46]:
text = "artificial intelligence"
p=10
while(p):
    next_text = (predict(model, text))
    print(next_text)
    text = next_text
    p-=1

artificial intelligence (
artificial intelligence ( ai
artificial intelligence ( ai )
artificial intelligence ( ai ) refers
artificial intelligence ( ai ) refers to
artificial intelligence ( ai ) refers to the
artificial intelligence ( ai ) refers to the ability
artificial intelligence ( ai ) refers to the ability of
artificial intelligence ( ai ) refers to the ability of machines
artificial intelligence ( ai ) refers to the ability of machines to
