In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
## Linear Regression

In [4]:
from sklearn.datasets import fetch_california_housing

In [5]:
data = fetch_california_housing()

In [6]:
X, y = data.data, data.target



In [7]:
data.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [8]:
X.shape, y.shape

((20640, 8), (20640,))

In [9]:
X[0]

array([   8.3252    ,   41.        ,    6.98412698,    1.02380952,
        322.        ,    2.55555556,   37.88      , -122.23      ])

In [10]:
X = (X - X.mean(axis=0)) / X.std(axis=0)


In [11]:
y = y.reshape(-1,1)
y.shape

(20640, 1)

In [12]:
weights = np.random.randn(X.shape[1], 1) * 0.01
bias = np.zeros((1,1))




In [13]:
weights, bias

(array([[ 0.00624857],
        [ 0.00461895],
        [ 0.01858074],
        [-0.00168651],
        [ 0.00860636],
        [ 0.01038476],
        [-0.01024801],
        [-0.00038599]]),
 array([[0.]]))

In [14]:
m = X.shape[0]
learning_rate = 0.001

In [15]:
m, X.shape, weights.shape, bias.shape

(20640, (20640, 8), (8, 1), (1, 1))

In [16]:
for i in range(1000):
    y_pred = np.dot(X , weights) + bias
    loss = np.mean((y_pred - y)**2)

    if i % 100 == 0:
        print(loss)

    residual = y_pred - y
    
    dw = (1/m) *  np.dot(X.T, residual)
    db = np.sum(residual) / m   # FIXED
    
    weights -= learning_rate * dw
    bias -= learning_rate * db


5.590985746380969
4.691300657474584
3.95576373391659
3.354173533773975
2.8619414715895686
2.459032208903183
2.1291090629233493
1.8588432644327102
1.6373546065353484
1.4557577667771215


In [17]:
## Logistic Regression

In [18]:
from sklearn.datasets import load_breast_cancer

In [19]:
data = load_breast_cancer()

In [20]:
X, y= data.data, data.target

In [21]:
data.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [22]:
X.shape, y.shape

((569, 30), (569,))

In [23]:
X = (X - X.mean(axis = 0)) / X.std(axis =0) 

In [24]:
y = y.reshape(-1,1)

In [25]:
X[0], y[0]

(array([ 1.09706398, -2.07333501,  1.26993369,  0.9843749 ,  1.56846633,
         3.28351467,  2.65287398,  2.53247522,  2.21751501,  2.25574689,
         2.48973393, -0.56526506,  2.83303087,  2.48757756, -0.21400165,
         1.31686157,  0.72402616,  0.66081994,  1.14875667,  0.90708308,
         1.88668963, -1.35929347,  2.30360062,  2.00123749,  1.30768627,
         2.61666502,  2.10952635,  2.29607613,  2.75062224,  1.93701461]),
 array([0]))

In [26]:
weights = np.random.randn(X.shape[1],1) * 0.01
bias = np.zeros((1,1))


In [27]:
weights.shape, X.shape, bias.shape, y.shape

((30, 1), (569, 30), (1, 1), (569, 1))

In [28]:
m = X.shape[0]
learning_rate = 0.001

In [29]:
def binary_loss(y_pred, y):
    loss =  - y * np.log(y_pred) - (1-y) * np.log(1-y_pred)
    return np.mean(loss)

In [30]:
for i in range(1000):
    a = np.dot(X, weights) + bias
    y_pred = 1 / ( 1 + np.exp(-a))

    loss = binary_loss(y_pred, y)
    if i % 100 == 0:
        print("loss", loss)
        accuracy = (y_pred >= .5) == y
        print("accuracy", np.sum(accuracy)/m)
    
    
    error = y_pred - y

    dw = (1/m) * np.dot(X.T, error)
    db = (1/m) * np.sum(error)

    weights -= learning_rate * dw
    bias -= learning_rate * db
    



loss 0.7017529746007928
accuracy 0.29701230228471004
loss 0.5489705450817699
accuracy 0.929701230228471
loss 0.4608837957763806
accuracy 0.9349736379613357
loss 0.40388442107783795
accuracy 0.9367311072056239
loss 0.3637344122044971
accuracy 0.9402460456942003
loss 0.333718970191316
accuracy 0.9402460456942003
loss 0.31028701466753816
accuracy 0.9420035149384886
loss 0.29138654030146244
accuracy 0.9437609841827768
loss 0.2757483157231162
accuracy 0.9472759226713533
loss 0.26254414364336714
accuracy 0.9507908611599297


In [31]:
## Neural Network

In [32]:
from sklearn.datasets import fetch_california_housing

In [33]:
data = fetch_california_housing()

In [34]:
X, y = data.data, data.target

In [35]:
X = (X - X.mean(axis=0))/ X.std(axis=0)
y = y.reshape(-1,1)

In [36]:
X.shape, y.shape

((20640, 8), (20640, 1))

In [37]:
X[0], y[0]

(array([ 2.34476576,  0.98214266,  0.62855945, -0.15375759, -0.9744286 ,
        -0.04959654,  1.05254828, -1.32783522]),
 array([4.526]))

In [38]:
m = X.shape[0]
learning_rate = 0.001


In [112]:
prev = X.shape[1]

layers = [12,8,4,1]
weights = {}
bias = {}

for i in range(len(layers)):
    weights[f"W{i}"] = np.random.randn(layers[i], prev)
    bias[f"b{i}"] = np.zeros((layers[i], 1))
    prev = layers[i]
    

In [113]:
weights, bias

({'W0': array([[ 3.11831971, -0.6872158 ,  1.34358889, -0.97405175, -2.79606643,
          -0.53458966,  0.32873042, -0.0308331 ],
         [ 2.09228515, -0.58423135, -3.54251035, -0.96537798, -2.43242561,
           0.39983572, -0.29087657,  0.83912278],
         [ 0.25906262,  1.03533333, -1.18466681,  0.43616851,  0.88009366,
          -2.3724459 , -0.24306021,  0.59118237],
         [ 1.53861388, -0.31074631,  0.68048811, -0.52936416, -0.74551814,
           1.46277657,  1.33991791, -0.10841388],
         [ 0.21843953, -0.30272251,  0.75045008,  0.17495442, -0.33806442,
          -1.29877635, -0.31687994, -2.05784913],
         [ 1.80765351, -2.51661465,  0.85334123,  0.92841523,  2.16249836,
          -0.84373844, -0.67131784, -0.71227349],
         [-0.0233813 ,  1.19492663,  1.973216  , -0.7231188 , -0.3395483 ,
           0.48577486, -0.10932921,  0.40418105],
         [ 0.92407899,  0.26461296,  1.20279506,  1.14331123,  0.02984194,
           0.0112738 , -1.29715938, -1.17472

In [114]:
def relu(x):
    return np.maximum(0, x)


def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid(x):
    
    return 1 / (1 + np.exp(-x))

In [115]:
def forward(X):
    m = X.shape[1]
    cache = {}
    cache[f"A{-1}"] = X
    A = X

    for i in range(len(layers)-1):
        Z = np.dot(weights[f"W{i}"], A) + bias[f"b{i}"]
        A = relu(Z)

        
        cache[f"Z{i}"] = Z
        cache[f"A{i}"] = A

    l = len(layers) - 1

    Z = np.dot( weights[f"W{l}"], A) + bias[f"b{l}"]
    A = sigmoid(Z)
    cache[f"Z{l}"] = Z
    cache[f"A{l}"] = A
    return A, cache
    

In [116]:
def backward(cache, y):
    grad = {}
    m = y.shape[1]
    l = len(layers) - 1
    dZ = cache[f"A{l}"] - y
    grad[f"dW{l}"] = (1/m) * dZ.dot(cache[f"A{l-1}"].T)
    grad[f"db{l}"] = (1/m) * np.sum(dZ, axis =1 , keepdims = True)
    dA = np.dot(weights[f"W{l}"].T, dZ)

    for i in reversed(range(l)):
        dZ = dA * relu_derivative(cache[f"Z{i}"])
        grad[f"dW{i}"] = (1/m) * dZ.dot(cache[f"A{i-1}"].T)
        grad[f"db{i}"] = (1/m) * np.sum(dZ, axis = 1, keepdims = True)
        dA = np.dot(weights[f"W{i}"].T, dZ)

    return grad

In [117]:
def updated_weights(grad):
    learning_rate = 0.001
    for i in range(len(layers)):
        weights[f"W{i}"] -= learning_rate * grad[f"dW{i}"]
        
        bias[f"b{i}"] -= learning_rate * grad[f"db{i}"]
    

In [118]:
a, cache = forward(X.T)

In [119]:
grads = backward(cache, y.T)

In [120]:
def binary_loss1(z, y):
    z = np.clip(z, 1e-12, 1- 1e-12)
    return binary_loss(z, y)

In [121]:
for i in range(10):
    y_pred, cache = forward(X.T)
    loss = binary_loss1(y_pred.T, y)
    print(loss)
    grads = backward(cache, y.T)
    updated_weights(grads)
    

-9.468384086545871
-10.28343947690716
-11.0500125427598
-11.764267758031476
-12.438238920557076
-13.082570201573963
-13.693959881233875
-14.274542950944943
-14.83227133881689
-15.371034097691746


In [148]:
class BatchNormalization:
    def __init__(self, num_features):
        self.gamma = np.random.randn(num_features)
        self.beta = np.zeros(num_features)
        self.momentum= 0.9
        self.epsilon = 1e-12
        self.moving_mean = np.zeros(num_features)
        self.moving_var = np.ones(num_features)

    def forward(self, x, train= True):
        if train:
            x_mean = x.mean(axis = 0)
            x_var = x.var(axis = 0)

            x_scaled = (x - x_mean) / np.sqrt(x_var + self.epsilon)
            x = self.gamma * x_scaled + self.beta
            self.moving_mean = self.moving_mean * self.momentum + (1 - self.momentum) * x_mean
            self.movinb_var = self.moving_var * self.momentum + (1 - self.momentum) * x_var
            return x
        else:
            x_scaled = (x - self.moving_mean) / np.sqrt(self.moving_var + self.epsilon)
            x = self.gamma * x_scaled + self.beta
            return x
    
        

In [149]:
class LayerNorm:
    def __init__(self, num_layers):
        self.gamma = np.random.randn(num_layers)
        self.beta = np.zeros(num_layers)
        self.epsilon = 1e-12

    def forward(self, x):
        x_mean = np.mean(x, axis = 0, keepdims= True)
        x_var = np.var(x, axis = 0, keepdims = True)

        x_scaled = (x - x_mean)/ np.sqrt(x_var + self.epsilon)

        x = self.gamma * x_scaled + self.beta
        return x

In [150]:
x = np.random.randint(0, 10, size = (10,5))

In [151]:
x

array([[3, 7, 4, 4, 4],
       [8, 8, 7, 2, 9],
       [0, 1, 7, 7, 9],
       [1, 1, 5, 0, 5],
       [1, 3, 6, 4, 4],
       [6, 5, 9, 8, 8],
       [6, 0, 8, 4, 6],
       [7, 7, 5, 6, 0],
       [6, 0, 0, 4, 9],
       [5, 0, 2, 0, 6]], dtype=int32)

In [152]:
bnorm = BatchNormalization(5)

In [153]:
bnorm.forward(x)

array([[-0.43974236,  1.73412618,  0.19932357,  0.01643176,  0.01221521],
       [ 1.25157442,  2.19047517, -0.2606539 , -0.31220353, -0.01832282],
       [-1.45453244, -1.00396779, -0.2606539 ,  0.50938471, -0.01832282],
       [-1.11626908, -1.00396779,  0.04599775, -0.64083882,  0.00610761],
       [-1.11626908, -0.0912698 , -0.10732808,  0.01643176,  0.01221521],
       [ 0.57504771,  0.82142819, -0.56730555,  0.67370235, -0.01221521],
       [ 0.57504771, -1.46031678, -0.41397972,  0.01643176,  0.        ],
       [ 0.91331107,  1.73412618,  0.04599775,  0.34506706,  0.03664564],
       [ 0.57504771, -1.46031678,  0.81262686,  0.01643176, -0.01832282],
       [ 0.23678435, -1.46031678,  0.50597522, -0.64083882,  0.        ]])

In [154]:
lnorm = LayerNorm(5)

In [155]:
a = lnorm.forward(x)

In [156]:
a

array([[ 0.84195611, -1.86715284,  0.1567224 ,  0.02707515, -0.87292167],
       [-2.39633661, -2.35850885, -0.20494468, -0.51442783,  1.3093825 ],
       [ 2.78493173,  1.08098322, -0.20494468,  0.83932961,  1.3093825 ],
       [ 2.13727319,  1.08098322,  0.03616671, -1.0559308 , -0.43646083],
       [ 2.13727319,  0.0982712 , -0.08438899,  0.02707515, -0.87292167],
       [-1.10101952, -0.88444082, -0.44605607,  1.1100811 ,  0.87292167],
       [-1.10101952,  1.57233923, -0.32550038,  0.02707515,  0.        ],
       [-1.74867807, -1.86715284,  0.03616671,  0.56857813, -2.618765  ],
       [-1.10101952,  1.57233923,  0.63894518,  0.02707515,  1.3093825 ],
       [-0.45336098,  1.57233923,  0.39783379, -1.0559308 ,  0.        ]])

In [178]:
class Adam:
    def __init__(self, parameters, learning_rate = 0.001, beta1 = 0.9, beta2 = 0.99):
        self.params = parameters
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.m = [np.zeros_like(m) for m in parameters]
        self.v = [np.zeros_like(m) for m in parameters]
        self.t = 1
        self.epsilon = 1e-12

    def step(self, grads):

        for idx, grad in enumerate(grads):
            m = self.beta1 * self.m[idx] + (1- self.beta1) * grad
            v = self.beta2 * self.v[idx] + (1 - self.beta2) * grad ** 2
            m = m / (1 - self.beta1 ** self.t)
            v = v / (1 - self.beta2 ** self.t)

            self.m[idx] = m
            self.v[idx] = v
            self.params[idx] -= learning_rate * (m / np.sqrt(v + self.epsilon))

        self.t +=1
            

In [192]:
params = [np.random.randn(m) for m in [12,2,4]]

In [193]:
params_c = params.copy()

In [194]:
grads = [m * np.random.randn(1) * 0.01 for m in prams]

In [195]:
optimizer = Adam(prams)

In [196]:
optimizer.step(grads)

In [197]:
optimizer.params

[array([-0.44002423, -1.73868727,  0.43077806, -0.77118064,  1.10888205,
        -0.17105573, -0.52767842, -1.48297304, -0.77391791,  0.46315121,
        -0.09043118,  0.02298037]),
 array([ 0.4310773 , -1.31845386]),
 array([-1.0001559 , -1.3860749 , -1.01459203,  0.26044208])]

In [200]:
params_c

[array([ 0.11888624,  0.93871594,  1.2341828 ,  1.36895591, -0.07017109,
        -0.3781867 ,  0.15237563,  1.07547484,  0.68693854,  0.36210822,
        -0.68858785,  0.75941918]),
 array([-0.35653899,  0.44489863]),
 array([ 1.74649008, -0.42283103,  0.5197996 ,  0.70050476])]

In [204]:
# from langgraph.graph import StateGraph, MessagesState, START, END

# def mock_llm(state: MessagesState):
#     return {"messages": [{"role": "ai", "content": "hello world"}]}

# graph = StateGraph(MessagesState)
# graph.add_node(mock_llm)
# graph.add_edge(START, "mock_llm")
# graph.add_edge("mock_llm", END)
# graph = graph.compile()

# graph.invoke({"messages": [{"role": "user", "content": "hi!"}]})

In [212]:
import torch
from sklearn.datasets import fetch_california_housing
from torch import nn

In [207]:
data = fetch_california_housing()
X, y = data.data, data.target


In [208]:
X.shape, y.shape

((20640, 8), (20640,))

In [209]:
from sklearn.model_selection import train_test_split

In [210]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [211]:
X_train  = torch.tensor(X_train, dtype = torch.float32)
X_test  = torch.tensor(X_test, dtype = torch.float32)
y_train  = torch.tensor(y_train, dtype = torch.float32)
y_test  = torch.tensor(y_test, dtype = torch.float32)

In [227]:
class RegressionModel(nn.Module):
    def __init__(self, in_features, out_features, hidden_dim = 32):
        super().__init__()
        self.layers1 = nn.Linear(in_features,hidden_dim)
        self.layers2 = nn.Linear(hidden_dim, hidden_dim//2)
        self.layers3 = nn.Linear(hidden_dim//2, out_features)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.layers1(x)
        x = self.layers2(self.relu(x))
        x = self.layers3(self.relu(x))
        return x

In [228]:
model = RegressionModel(X.shape[1],1)

In [229]:
loss = nn.L1Loss()

In [233]:
loss_fn = nn.MSELoss()

In [232]:
optimizer = torch.optim.Adam(params =model.parameters(),lr = 0.001)

In [235]:
for i in range(10):
    model.train()
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    print(loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    

13.607321739196777
16.382476806640625
30.710865020751953
43.13301467895508
44.281005859375
34.7808723449707
20.90658187866211
10.04626750946045
6.984131336212158
11.414653778076172


In [236]:
from sklearn.datasets import load_breast_cancer
import torch


In [263]:
data = load_breast_cancer()
X, y = data.data, data.target
y = y.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.2, random_state = 42)


In [264]:
X_train = torch.tensor(X_train, dtype = torch.float32)
X_test = torch.tensor(X_test, dtype = torch.float32)
y_train = torch.tensor(y_train, dtype = torch.float32)
y_test = torch.tensor(y_test, dtype = torch.float32)


In [265]:
class BinaryModel(nn.Module):
    def __init__(self, in_features, out_features, hidden_dim=32):
        super().__init__()
        self.layers1 = nn.Linear(in_features = in_features, out_features = hidden_dim)
        self.layers2 = nn.Linear(in_features = hidden_dim, out_features = hidden_dim // 2)
        self.layers3 = nn.Linear(hidden_dim // 2, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.layers1(x)
        x = self.layers2(self.relu(x))
        x = self.layers3(self.relu(x))

        return x
        
        

In [266]:
model = BinaryModel(X.shape[1], 1)

In [267]:
loss_fn = nn.BCEWithLogitsLoss()

In [268]:
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.001)

In [272]:
for i in range(10):
    model.train()

    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
model.eval()
with torch.inference_mode():
    y_pred = model(X_test) 
    print(loss_fn(y_pred, y_test))

tensor(0.3977)


In [274]:
with torch.inference_mode():
    y_pred = model(X_test) 
    print(torch.sigmoid(y_pred[0]))

tensor([0.4676])


In [317]:
from sklearn.datasets import load_iris
data = load_iris()
X, y = data.data, data.target
y = y.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = .2)


In [318]:
X_train = torch.tensor(X_train, dtype = torch.float32)
X_test = torch.tensor(X_test, dtype = torch.float32)
y_train = torch.tensor(y_train, dtype = torch.long)
y_test = torch.tensor(y_test, dtype = torch.long)


In [319]:
class ClfModel(nn.Module):
    def __init__(self, in_features, out_features, hidden_dim=32):
        super().__init__()
        self.layers1 = nn.Linear(in_features = in_features, out_features = hidden_dim)
        self.layers2 = nn.Linear(in_features = hidden_dim, out_features = hidden_dim // 2)
        self.layers3 = nn.Linear(hidden_dim // 2, out_features)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.layers1(x)
        x = self.layers2(self.relu(x))
        x = self.layers3(self.relu(x))

        return x
        
        

In [320]:
model = ClfModel(X.shape[1], 3)

In [321]:
loss_fn = nn.CrossEntropyLoss()

In [322]:
optimizer = torch.optim.Adam(params = model.parameters(), lr= 0.001)

In [327]:
for i in range(100):
    model.train()
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train.squeeze(1))

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(loss)

tensor(0.9242, grad_fn=<NllLossBackward0>)
tensor(0.9177, grad_fn=<NllLossBackward0>)
tensor(0.9111, grad_fn=<NllLossBackward0>)
tensor(0.9044, grad_fn=<NllLossBackward0>)
tensor(0.8977, grad_fn=<NllLossBackward0>)
tensor(0.8909, grad_fn=<NllLossBackward0>)
tensor(0.8841, grad_fn=<NllLossBackward0>)
tensor(0.8772, grad_fn=<NllLossBackward0>)
tensor(0.8702, grad_fn=<NllLossBackward0>)
tensor(0.8632, grad_fn=<NllLossBackward0>)
tensor(0.8561, grad_fn=<NllLossBackward0>)
tensor(0.8489, grad_fn=<NllLossBackward0>)
tensor(0.8417, grad_fn=<NllLossBackward0>)
tensor(0.8344, grad_fn=<NllLossBackward0>)
tensor(0.8271, grad_fn=<NllLossBackward0>)
tensor(0.8196, grad_fn=<NllLossBackward0>)
tensor(0.8121, grad_fn=<NllLossBackward0>)
tensor(0.8046, grad_fn=<NllLossBackward0>)
tensor(0.7968, grad_fn=<NllLossBackward0>)
tensor(0.7891, grad_fn=<NllLossBackward0>)
tensor(0.7814, grad_fn=<NllLossBackward0>)
tensor(0.7739, grad_fn=<NllLossBackward0>)
tensor(0.7667, grad_fn=<NllLossBackward0>)
tensor(0.75

In [330]:
from torchvision import datasets
from torchvision.transforms import ToTensor

In [352]:
from torch.utils.data import DataLoader

In [334]:
train_data = datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
)

100%|█████████████████████████████████████████████████████████████████████████████| 26.4M/26.4M [00:20<00:00, 1.26MB/s]
100%|█████████████████████████████████████████████████████████████████████████████| 29.5k/29.5k [00:00<00:00, 98.4kB/s]
100%|██████████████████████████████████████████████████████████████████████████████| 4.42M/4.42M [00:05<00:00, 786kB/s]
100%|█████████████████████████████████████████████████████████████████████████████| 5.15k/5.15k [00:00<00:00, 7.92MB/s]


In [335]:
test_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

In [338]:
train_data, test_data

(Dataset FashionMNIST
     Number of datapoints: 60000
     Root location: data
     Split: Train
     StandardTransform
 Transform: ToTensor(),
 Dataset FashionMNIST
     Number of datapoints: 10000
     Root location: data
     Split: Test
     StandardTransform
 Transform: ToTensor())

In [344]:
train_data[0][0].shape

torch.Size([1, 28, 28])

In [361]:
class CNNModel(nn.Module):
    def __init__(self, in_channels, out_dim):
        super().__init__()
        self.layer1 = nn.Conv2d(in_channels, out_channels = 64, kernel_size = (3,3), stride = 1)
        
        self.layer2 = nn.Conv2d(64, out_channels = 32, kernel_size = (3,3), stride = 1)
        
        self.layer3 = nn.Conv2d(32, out_channels = 16, kernel_size = (3,3), stride = 1)
        self.max_pool = nn.MaxPool2d(kernel_size = (2,2))
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(16 * 11 * 11, out_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(self.relu(x))
        x = self.layer3(self.relu(x))
        x = self.max_pool(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

In [362]:
model = CNNModel(1,10)

In [363]:
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.001)

In [364]:
criterion = nn.CrossEntropyLoss()

In [365]:
train_dataloader = DataLoader(train_data, batch_size = 32, shuffle = True)
test_dataloader = DataLoader(test_data, batch_size = 32, shuffle = True)

In [None]:
for i in range(1):
    model.train()
    for x,y in train_dataloader:
        y_pred = model(x)
        loss = criterion(y_pred, y)
        print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        

In [367]:
from torch.utils.data import DataLoader, Dataset

In [368]:
# Text data (features)
texts = [
    "I love this product",
    "This is the worst experience ever",
    "Absolutely fantastic service",
    "I am very disappointed",
    "Great quality and fast delivery",
    "Not worth the money",
    "I am happy with the purchase",
    "Terrible customer support"
]

# Binary labels
# 1 = Positive
# 0 = Negative
labels = [
    1,  # I love this product
    0,  # This is the worst experience ever
    1,  # Absolutely fantastic service
    0,  # I am very disappointed
    1,  # Great quality and fast delivery
    0,  # Not worth the money
    1,  # I am happy with the purchase
    0   # Terrible customer support
]


In [369]:
from collections import Counter

In [385]:
class TextData(Dataset):

    def __init__(self, data, labels, vocab_size = 50, sequence_length = 7):
        self.data = data
        self.labels = labels
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.vacabulary = self.get_vacabulary(vocab_size)

    def get_vacabulary(self, vocab_size):
        texts = " ".join(self.data)
        counters = Counter(texts.split(" "))
        most_freq = counters.most_common(vocab_size)
        vocab = {k[0]: i + 1 for i, k in enumerate(most_freq)}
        vocab["UNK"] = 0
        return vocab

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx].split(" ")
        label = self.labels[idx]
        text = [self.vacabulary.get(t) for t in text]
        if len(text) < 7:
            text += [0] * (7 - len(text))
        else:
            text = text[:7]

        return torch.tensor(text), torch.tensor(label)
        
        

In [386]:
data = TextData(texts, labels)

In [443]:
train_dataloader = DataLoader(data, batch_size = 32, shuffle = True)

In [387]:
data[0]

(tensor([1, 4, 5, 6, 0, 0, 0]), tensor(1))

In [444]:
class RNNModel(nn.Module):
    def __init__(self,vocab_size = 50, embedding_dim=28, hidden_dim=32, output_dim=2):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings = vocab_size , embedding_dim= embedding_dim)
        self.rnn = nn.RNN(input_size = embedding_dim, hidden_size = hidden_dim, num_layers = 2,batch_first = True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x)
        x_out, hidden = self.rnn(x)
        x = self.fc(hidden[-1])

        return x


        

In [445]:
model = RNNModel()

In [446]:
criterion = nn.CrossEntropyLoss()

In [447]:
optimizer = torch.optim.Adam(params = model.parameters(), lr = 0.001)

In [None]:
for i in range(100):
    model.train()

    for x, y in train_dataloader:
        y_pred = model(x)
        loss = criterion(y_pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(loss.item())
    

In [25]:
s = "python.py"
print(s.startswith("py"))  # True
print(s.endswith(".py"))   # True


s = "  Python  "
print(s.strip())   # Python
print(s.lstrip())  # Python  
print(s.rstrip())  #   Python

s = "apple,banana,orange"
print(s.split(","))    # ['apple', 'banana', 'orange']

words = ["Hello", "World"]
print(" ".join(words)) # Hello World


s = "I like Java"
print(s.replace("Java", "Python"))  # I like Python


s = "banana"
print(s.find("a"))     # 1
print(s.rfind("a"))    # 5
print(s.count("a"))    # 3


s = "Python123"
print(s.isalpha())   # False
print(s.isdigit())   # False
print(s.isalnum())   # True
print(s.islower())   # False
print(s.isupper())   # False
print(s.isspace())   # False


s = "Python"
print(s.upper())     # PYTHON
print(s.lower())     # python
print(s.title())     # Python
print(s.capitalize())# Python
print(s.swapcase())  # pYTHON



True
True
Python
Python  
  Python
['apple', 'banana', 'orange']
Hello World
I like Python
1
5
3
False
False
True
False
False
False
PYTHON
python
Python
Python
pYTHON


In [420]:
!pip install langchain_huggingface

In [19]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id="deepseek-ai/DeepSeek-R1-0528",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
    provider="auto",  # let Hugging Face choose the best provider for you
)

chat_model = ChatHuggingFace(llm=llm)

In [20]:
from langchain.tools import tool

In [21]:
@tool
def retrieve_info(query: str):
    """Retrieve information from the vector database
    Args:
        Query(str): Qeury for which you want to retrive
    """
    return "NO infomration"

In [22]:
llm = chat_model.bind_tools([retrieve_info])

In [23]:
res = llm.invoke("give the info about the latest sales data")

In [24]:
res.tool_calls[0].get("args")

{'query': 'latest sales data'}

In [13]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader



# load one or more text files
loader = TextLoader("your_text.txt", encoding = "utf-8")
documents = loader.load()

# split larger texts into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)



embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2")


faiss_db = FAISS.from_documents(docs, embeddings)


query = "What is the key idea?"
results = faiss_db.similarity_search(query, k=5)
for doc in results:
    print(doc.page_content)




The medieval period brought significant cultural transformations with the arrival of Islamic rulers, leading to the Delhi Sultanate and later the Mughal Empire. The Mughals left a lasting legacy in architecture, administration, and art, epitomized by monuments like the Taj Mahal. European trading companies, particularly the British East India Company, gradually gained power, eventually leading to nearly two centuries of British colonial rule. India’s struggle for independence, marked by movements led by figures like Mahatma Gandhi and Jawaharlal Nehru, culminated in freedom in 1947.

Today, India’s history is celebrated not only through its monuments and literature but also through its diverse cultural traditions, languages, and philosophies, reflecting a civilization that has continuously evolved while preserving its ancient heritage.

---

If you want, I can also write a **shorter, more concise version** suitable for exams or quick reading. Do you want me to do that?
Here’s a well-ro

In [17]:
from langchain_core.documents import Document

docs = [
    Document(page_content="Text of doc1", metadata={"source": "doc1.txt"}),
    Document(page_content="Text of doc2", metadata={"source": "doc2.txt"}),
]
faiss_db = FAISS.from_documents(docs, embeddings)
