# ML Lab Endsem

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Simple Linear Rgression 

In [2]:
x = np.array([1, 2, 3, 4, 5])  
y = np.array([5, 7, 9, 11, 13]) 

x_mean = np.mean(x)
y_mean = np.mean(y)
n = len(x)  

B1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean) ** 2)
B0 = y_mean - B1 * x_mean

y_pred_calc = B1 * x + B0

mse = np.mean((y-y_pred_calc)**2)
rmse = np.sqrt (mse)


print(f'Calculus Method\nB0 intercept : {B0} \nB1 slope : {B1}\nRMSE : {rmse}')
print(f'Predicted values : {y_pred_calc}')

Calculus Method
B0 intercept : 3.0 
B1 slope : 2.0
RMSE : 0.0
Predicted values : [ 5.  7.  9. 11. 13.]


### Multiple Linear Regression

In [3]:
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])  
y = np.array([5, 7, 9, 11, 13])  

X = np.c_[np.ones(X.shape[0]), X[:, 0]] 

# B = (X^T * X)^-1 * X^T * y  -->  Importanat
B = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

B0 = B[0]  
B_rest = B[1:]  

y_pred_calc = X.dot(B)

rmse_calc = np.sqrt(((y - y_pred_calc) ** 2).mean())

print(f'B0 intercept : {B0}')

for i, coef in enumerate(B_rest):
    print(f'B{i+1} slope (for feature X{i+1}) : {coef}')

print(f'RMSE : {rmse_calc}')
print(f'Predicted values : {y_pred_calc}')

B0 intercept : 3.0000000000000084
B1 slope (for feature X1) : 2.0000000000000004
RMSE : 9.985590539369011e-15
Predicted values : [ 5.  7.  9. 11. 13.]


### Polynomial Regression

In [4]:
x = np.array([1, 2, 3, 4, 5])  
y = np.array([1, 4, 9, 16, 25])

degree = 2
X_poly = np.array([x**i for i in range(degree+1)]).T

B = np.linalg.inv(X_poly.T.dot(X_poly)).dot(X_poly.T).dot(y)

B0 = B[0]
B1 = B[1:]

y_pred = X_poly.dot(B)

mse = np.mean((y-y_pred)**2)
rmse = np.sqrt(mse)

print(f"Intercept (B0): {B0}")
print(f"Coefficients (B1, B2, ...): {B1}")
print(f"Predicted values: {y_pred}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")

Intercept (B0): -5.684341886080802e-14
Coefficients (B1, B2, ...): [7.01660952e-14 1.00000000e+00]
Predicted values: [ 1.  4.  9. 16. 25.]
MSE: 2.443427628592877e-27
RMSE: 4.943103912111172e-14


### Logistic Regression and Gradient Descent

In [5]:
X = np.array([1, 2, 3, 4, 5])  
y = np.array([0, 0, 0, 1, 1])

def sigmoid(z):
    return 1/(1+np.exp(-z))

B0 = 0
B1 = 0
learning_rate=0.01
threshold = 0.5
n = len(X)
epochs = 1000

for epoch in range(epochs):
    z = B0 + B1*X
    y_pred = sigmoid(z)

    gradient_b0 = (1/n) * np.sum(y-y_pred)
    gradient_b1 = (1/n) * np.sum((y-y_pred)*X)

    B0 -= learning_rate * gradient_b0
    B1 -= learning_rate * gradient_b1

y_pred = B0 + B1*x
y_pred_class = (y_pred  >= threshold).astype(int)

print(f'B0 intercept : {B0} \nB1 slope : {B1}')
print(f'Predicted probabilities: {y_pred}')
print(f'Predicted classes: {y_pred_class}')

B0 intercept : -3.6652656746306485 
B1 slope : -17.152090029546965
Predicted probabilities: [-20.8173557  -37.96944573 -55.12153576 -72.27362579 -89.42571582]
Predicted classes: [0 0 0 0 0]


### Cost Function

In [6]:
X = np.array([1, 2, 3, 4, 5])  
y = np.array([0, 0, 0, 1, 1])

def sigmoid(z):
    return 1/(1+np.exp(-z))

B0 = 0
B1 = 0
learning_rate=0.01
threshold = 0.5
n = len(X)
epochs = 1000

def cost_function(y,y_pred):
    return -(1/n) * np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))

for epoch in range(epochs):
    z = B0 + B1*X
    y_pred = sigmoid(z)

    gradient_b0 = (1/n) * np.sum(y-y_pred)
    gradient_b1 = (1/n) * np.sum((y-y_pred)*X)

    B0 -= learning_rate * gradient_b0
    B1 -= learning_rate * gradient_b1

    cost = cost_function(y,y_pred)

print(f'Cost:{cost}')


Cost:32.30586832322698


### Naive Bayes

In [16]:
import numpy as np

X = np.array([[1, 20], [2, 21], [3, 22], [4, 23], [5, 24]])
y = np.array([0, 0, 1, 1, 1])

class0 = X[y == 0]
class1 = X[y == 1]

mean0, var0 = class0.mean(axis=0), class0.var(axis=0)
mean1, var1 = class1.mean(axis=0), class1.var(axis=0)

def gaussian(x, mean, var):
    return (1 / np.sqrt(2 * np.pi * var)) * np.exp(-((x - mean) ** 2) / (2 * var))

def predict(X):
    predictions = []
    for x in X:
        p0 = np.log(1/len(X)) + np.sum(np.log(gaussian(x, mean0, var0)))
        p1 = np.log(1/len(X)) + np.sum(np.log(gaussian(x, mean1, var1)))
        predictions.append(0 if p0 > p1 else 1)
    return np.array(predictions)

y_pred = predict(X)
print("Predicted classes:", y_pred)

Predicted classes: [0 0 1 1 1]


### SVM

In [19]:
X = np.array([[1, 2], [2, 3], [3, 4], [5, 6], [6, 7]])
y = np.array([-1, -1, 1, 1, 1])

def svm(X, y, learning_rate=0.01, epochs=1000, lambda_param=0.01):
    n, m = X.shape
    w = np.zeros(m)
    b = 0
    
    for epoch in range(epochs):
        for i, x in enumerate(X):
            condition = y[i] * (np.dot(w, x) - b) >= 1
            if condition:
                w -= learning_rate * (2 * lambda_param * w)
            else:
                w -= learning_rate * (2 * lambda_param * w - np.dot(x, y[i]))
                b -= learning_rate * y[i]
                
    return w, b

w, b = svm(X, y)
print("Weights:", w)
print("Bias:", b)

Weights: [ 1.89054606 -0.19521501]
Bias: 3.7199999999999647


### KNN 

In [17]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def knn(X_train, y_train, X_test, k=3):
    y_pred = []
    for test_point in X_test:
        distances = [euclidean_distance(test_point, x) for x in X_train]
        k_neighbors = np.argsort(distances)[:k]
        k_labels = [y_train[i] for i in k_neighbors]
        y_pred.append(np.bincount(k_labels).argmax())
    return np.array(y_pred)

X_train = np.array([[1, 2], [2, 3], [3, 4], [6, 7], [7, 8]])
y_train = np.array([0, 0, 0, 1, 1])
X_test = np.array([[4, 5]])

print("Predicted class:", knn(X_train, y_train, X_test))

Predicted class: [0]


### K-Means

In [18]:
def kmeans(X, k=2, epochs=10):
    centroids = X[np.random.choice(range(X.shape[0]), k, replace=False)]
    
    for epoch in range(epochs):
        clusters = [[] for _ in range(k)]
        
        for x in X:
            distances = [np.linalg.norm(x - centroid) for centroid in centroids]
            cluster = np.argmin(distances)
            clusters[cluster].append(x)
        
        for i in range(k):
            centroids[i] = np.mean(clusters[i], axis=0) if clusters[i] else centroids[i]
    
    return centroids, clusters

X = np.array([[1, 2], [2, 3], [3, 4], [8, 9], [9, 10]])
centroids, clusters = kmeans(X)
print("Centroids:", centroids)
print("Clusters:", clusters)

Centroids: [[2 3]
 [8 9]]
Clusters: [[array([1, 2]), array([2, 3]), array([3, 4])], [array([8, 9]), array([ 9, 10])]]


### Decison Tree 

In [20]:
from collections import Counter
import numpy as np

def entropy(y):
    hist = np.bincount(y)
    ps = hist / len(y)
    return -np.sum([p * np.log2(p) for p in ps if p > 0])

def information_gain(X, y, feature):
    unique_vals = np.unique(X[:, feature])
    weighted_entropy = 0
    for val in unique_vals:
        subset_y = y[X[:, feature] == val]
        weighted_entropy += len(subset_y) / len(y) * entropy(subset_y)
    return entropy(y) - weighted_entropy

def best_feature_to_split(X, y):
    return np.argmax([information_gain(X, y, i) for i in range(X.shape[1])])

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

def build_tree(X, y):
    if len(set(y)) == 1:
        return Node(value=y[0])
    
    feature = best_feature_to_split(X, y)
    thresholds = np.unique(X[:, feature])
    
    if len(thresholds) == 1:
        return Node(value=Counter(y).most_common(1)[0][0])
    
    threshold = thresholds[len(thresholds) // 2]
    left_idx, right_idx = X[:, feature] <= threshold, X[:, feature] > threshold
    left, right = build_tree(X[left_idx], y[left_idx]), build_tree(X[right_idx], y[right_idx])
    
    return Node(feature, threshold, left, right)

def predict_tree(node, x):
    if node.value is not None:
        return node.value
    if x[node.feature] <= node.threshold:
        return predict_tree(node.left, x)
    return predict_tree(node.right, x)

# Example
X = np.array([[2, 3], [3, 2], [1, 1], [6, 5], [7, 8]])
y = np.array([0, 0, 0, 1, 1])

tree = build_tree(X, y)
predictions = [predict_tree(tree, x) for x in X]
print("Predictions:", predictions)

Predictions: [0, 0, 0, 1, 1]
