In [3]:
# Q1. Implementing logistic regression using Python (both scratch and sk learn).

# Importing numpy library
import numpy as np

#Logistic Regression from scatch

class Logistic_Regression():

  # declaring learning rate & number of iterations (Hyperparametes)
  def __init__(self, learning_rate, no_of_iterations):

    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations

  # fit function to train the model with dataset
  def fit(self, X, Y):

    # number of data points in the dataset (number of rows)  -->  m
    # number of input features in the dataset (number of columns)  --> n
    self.m, self.n = X.shape

  # initiating weight & bias value

    self.w = np.zeros(self.n)
    self.b = 0
    self.X = X
    self.Y = Y

  # implementing Gradient Descent for Optimization

    for i in range(self.no_of_iterations):
     self.update_weights()

  def update_weights(self):

    # Y_cap formula (sigmoid function)
    Y_cap = 1 / (1 + np.exp( - (self.X.dot(self.w) + self.b ) ))

  # derivaties

    dw = (1/self.m)*np.dot(self.X.T, (Y_cap - self.Y))
    db = (1/self.m)*np.sum(Y_cap - self.Y)

  # updating the weights & bias using gradient descent

    self.w = self.w - self.learning_rate * dw
    self.b = self.b - self.learning_rate * db

  # Sigmoid Equation & Decision Boundary

  def predict(self, X):
    Y_pred = 1 / (1 + np.exp( - (X.dot(self.w) + self.b ) ))
    Y_pred = np.where( Y_pred > 0.5, 1, 0)
    return Y_pred


# Logistic Regression using sklearn

    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score


  # Load the dataset
    data = pd.read_csv("Social_Network_Ads.csv")

  # Select features and target variable
    X = data.iloc[:, [2, 3]].values  # Assuming columns 2 and 3 are the relevant features
    y = data.iloc[:, 4].values  # Assuming column 4 is the target variable

  # Split the dataset into training and testing sets (70:30 ratio)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=34)

    model = LogisticRegression(random_state=34)
    model.fit(X_train, y_train)

  # Make predictions
    y_pred_sklearn = model.predict(X_test)

  # Evaluate accuracy
    accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)

    print("Accuracy (Logistic Regression using scikit-learn): {:.2f}%".format(accuracy_sklearn * 100))

    from sklearn.preprocessing import StandardScaler, MinMaxScaler

  # Normalize the data
    scaler = MinMaxScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)

  # Standardize the data
    scaler = StandardScaler()
    X_train_standardized = scaler.fit_transform(X_train)
    X_test_standardized = scaler.transform(X_test)

  # Logistic Regression using scikit-learn with normalized data
    model_normalized = LogisticRegression(random_state=34)
    model_normalized.fit(X_train_normalized, y_train)

  # Logistic Regression using scikit-learn with standardized data
    model_standardized = LogisticRegression(random_state=34)
    model_standardized.fit(X_train_standardized, y_train)

  # Make predictions
    y_pred_normalized = model_normalized.predict(X_test_normalized)
    y_pred_standardized = model_standardized.predict(X_test_standardized)

  # Evaluate accuracy
    accuracy_normalized = accuracy_score(y_test, y_pred_normalized)
    accuracy_standardized = accuracy_score(y_test, y_pred_standardized)

    print("Accuracy (Logistic Regression with Normalized Data): {:.2f}%".format(accuracy_normalized * 100))
    print("Accuracy (Logistic Regression with Standardized Data): {:.2f}%".format(accuracy_standardized * 100))



In [None]:
# Q2.  Implementing k-NN using Python (both scratch and sk learn)

# Implementing k-NN from scratch
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

# Load Iris dataset
iris = datasets.load_iris()
X,y = iris.data, iris.target


# Plot the dataset
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', edgecolor='k')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.title('Iris Dataset')
plt.show()

# Split the dataset into train and test sets (70:30 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

def euclidean_distance(x1, x2):
    distance = np.sqrt(np.sum((x1-x2)**2))
    return distance

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        # compute the distance
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

        # get the closest k
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # majority voye
        most_common = Counter(k_nearest_labels).most_common()
        return most_common[0][0]


# Implement k-NN using sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from matplotlib.colors import ListedColormap
from KNN import KNN

k_values = range(1, 21)
accuracy_scores_scratch = []

for k in k_values:
    knn_classifier = KNeighborsClassifier(n_neighbors=k)
    knn_classifier.fit(X_train, y_train)
    y_pred_sklearn = knn_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_sklearn)
    accuracy_scores_sklearn.append(accuracy)

# Plot accuracy vs k value
plt.figure(figsize=(8, 6))
plt.plot(k_values, accuracy_scores_scratch, marker='o', label='k-NN (Scratch)')
plt.plot(k_values, accuracy_scores_sklearn, marker='o', label='k-NN (scikit-learn)')
plt.xlabel('k Value')
plt.ylabel('Accuracy')
plt.title('Accuracy vs k Value for k-NN')
plt.legend()
plt.show()


