In [1]:
!pip install nltk gensim scikit-learn torch requests python-dotenv groq weaviate-client langchain cohere




In [26]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk import word_tokenize, pos_tag

nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("averaged_perceptron_tagger")

sample = "I'm testing basic NLP stuff: tokenizing, lemmatizing, and stemming!"

# tokens
words = word_tokenize(sample)
print("Tokens:", words)

# remove common stopwords
stops = set(stopwords.words("english"))
filtered = [w for w in words if w.lower() not in stops]
print("Without Stopwords:", filtered)

# stemming
stem = PorterStemmer()
stemmed = [stem.stem(word) for word in filtered]
print("Stemmed:", stemmed)

# lemmatizing
lem = WordNetLemmatizer()
lemmatized = [lem.lemmatize(w) for w in filtered]
print("Lemmatized:", lemmatized)


# text cleanup
def clean(text):
    text = text.lower()
    return re.sub(r"[^\w\s]", "", text)

print("Cleaned Text:", clean(sample))




Tokens: ['I', "'m", 'testing', 'basic', 'NLP', 'stuff', ':', 'tokenizing', ',', 'lemmatizing', ',', 'and', 'stemming', '!']
Without Stopwords: ["'m", 'testing', 'basic', 'NLP', 'stuff', ':', 'tokenizing', ',', 'lemmatizing', ',', 'stemming', '!']
Stemmed: ["'m", 'test', 'basic', 'nlp', 'stuff', ':', 'token', ',', 'lemmat', ',', 'stem', '!']
Lemmatized: ["'m", 'testing', 'basic', 'NLP', 'stuff', ':', 'tokenizing', ',', 'lemmatizing', ',', 'stemming', '!']
Cleaned Text: im testing basic nlp stuff tokenizing lemmatizing and stemming


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [28]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

docs = [
    "The phone is on the table.",
    "A cat sat under the table.",
    "Table and chair were dusty."
]

# Bag of Words
cv = CountVectorizer()
bow = cv.fit_transform(docs)
print("BoW:", bow.toarray())

# TF-IDF
tfidf = TfidfVectorizer()
tfidf_result = tfidf.fit_transform(docs)
print("TF-IDF:", tfidf_result.toarray())


BoW: [[0 0 0 0 1 1 1 0 1 2 0 0]
 [0 1 0 0 0 0 0 1 1 1 1 0]
 [1 0 1 1 0 0 0 0 1 0 0 1]]
TF-IDF: [[0.         0.         0.         0.         0.42024133 0.42024133
  0.42024133 0.         0.2482013  0.63920872 0.         0.        ]
 [0.         0.50461134 0.         0.         0.         0.
  0.         0.50461134 0.29803159 0.38376993 0.50461134 0.        ]
 [0.47952794 0.         0.47952794 0.47952794 0.         0.
  0.         0.         0.28321692 0.         0.         0.47952794]]


In [30]:
from gensim.models import Word2Vec

data = [["cat", "sits", "on", "mat"], ["dog", "plays", "with", "ball"]]

cbow = Word2Vec(vector_size=50, window=2, sg=0, min_count=1)
cbow.build_vocab(data)
cbow.train(data, total_examples=len(data), epochs=10)

print("CBOW vector for 'cat':", cbow.wv['cat'])


CBOW vector for 'cat': [ 0.00855287  0.00015212 -0.01916856 -0.01933109 -0.01229639 -0.00025714
  0.00399483  0.01886394  0.0111687  -0.00858139  0.00055663  0.00992872
  0.01539662 -0.00228845  0.00864684 -0.01162876 -0.00160838  0.0162001
 -0.00472013 -0.01932691  0.01155852 -0.00785964 -0.00244575  0.01996103
 -0.0045127  -0.00951413 -0.01065877  0.01396178 -0.01141774  0.00422733
 -0.01051132  0.01224143  0.00871461  0.00521271 -0.00298217 -0.00549213
  0.01798587  0.01043155 -0.00432504 -0.01894062 -0.0148521  -0.00212748
 -0.00158989 -0.00512582  0.01936544 -0.00091704  0.01174752 -0.01489517
 -0.00501215 -0.01109973]


In [9]:
pip install torch




In [31]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, batch_first=True)

    def forward(self, x):
        return self.rnn(x)[0]

class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)

    def forward(self, x):
        return self.lstm(x)[0]

x = torch.randn(2, 5, 10)
print("RNN shape:", RNN(10, 20)(x).shape)
print("LSTM shape:", LSTM(10, 20)(x).shape)


RNN shape: torch.Size([2, 5, 20])
LSTM shape: torch.Size([2, 5, 20])


In [32]:
import torch
import torch.nn as nn

model = nn.Transformer(d_model=128, nhead=4, num_encoder_layers=2, num_decoder_layers=2)

src = torch.rand((6, 2, 128))  # (src_len, batch, dim)
tgt = torch.rand((4, 2, 128))

out = model(src, tgt)
print("Transformer output:", out.shape)


Transformer output: torch.Size([4, 2, 128])




In [12]:
pip install requests




In [33]:
import requests

r = requests.get("https://api.github.com")
print("Status:", r.status_code)
print("Content type:", r.headers.get("Content-Type"))
print("API root:", r.json())

# Query string
params = {"q": "lang:python"}
res = requests.get("https://api.github.com/search/repositories", params=params)
print("Top Python repo:", res.json()["items"][0]["name"])

# POST test
data = {"message": "Hello"}
res_post = requests.post("https://httpbin.org/post", json=data)
print("Posted:", res_post.json()["json"])


Status: 200
Content type: application/json; charset=utf-8
API root: {'current_user_url': 'https://api.github.com/user', 'current_user_authorizations_html_url': 'https://github.com/settings/connections/applications{/client_id}', 'authorizations_url': 'https://api.github.com/authorizations', 'code_search_url': 'https://api.github.com/search/code?q={query}{&page,per_page,sort,order}', 'commit_search_url': 'https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}', 'emails_url': 'https://api.github.com/user/emails', 'emojis_url': 'https://api.github.com/emojis', 'events_url': 'https://api.github.com/events', 'feeds_url': 'https://api.github.com/feeds', 'followers_url': 'https://api.github.com/user/followers', 'following_url': 'https://api.github.com/user/following{/target}', 'gists_url': 'https://api.github.com/gists{/gist_id}', 'hub_url': 'https://api.github.com/hub', 'issue_search_url': 'https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}', 'issues

In [14]:
pip install pydantic




In [34]:
from pydantic import BaseModel

class Book(BaseModel):
    title: str
    price: float = 0.0

b1 = Book(title="Python Essentials", price=199.99)
print(b1)

# invalid data
try:
    b2 = Book(title="AI", price="cheap")
except Exception as err:
    print("Error:", err)




title='Python Essentials' price=199.99
Error: 1 validation error for Book
price
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='cheap', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/float_parsing


In [24]:
import asyncio

# Define async task
async def greet(name, delay):
    print(f" Waiting {delay}s for {name}...")
    await asyncio.sleep(delay)
    print(f" Hello, {name}!")

# Define main async function
async def main():
    await asyncio.gather(
        greet("Ashwin", 1),
        greet("Singh", 2),
        greet("Google", 3)
    )

# Run the async tasks (Colab-compatible)
await main()


 Waiting 1s for Ashwin...
 Waiting 2s for Singh...
 Waiting 3s for Google...
 Hello, Ashwin!
 Hello, Singh!
 Hello, Google!
