# **CLASSIFICATION**

In [1]:
import numpy as np
import pandas as pd

In [2]:
#For more information about the dataset visit: https://archive.ics.uci.edu/dataset/451/breast+cancer+coimbra
cancer_dataset = pd.read_csv('/content/dataR2.csv')

In [3]:
cancer_dataset.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,0
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,0
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,0
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,0
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,0


In [4]:
cancer_dataset.tail()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
111,45,26.85,92,3.33,0.755688,54.68,12.1,10.96,268.23,1
112,62,26.84,100,4.53,1.1174,12.45,21.42,7.32,330.16,1
113,65,32.05,97,5.73,1.370998,61.48,22.54,10.33,314.05,1
114,72,25.59,82,2.82,0.570392,24.96,33.75,3.27,392.46,1
115,86,27.18,138,19.91,6.777364,90.28,14.11,4.35,90.09,1


Logistic Regression

In [7]:
#Write Code for Logistic Regression here

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic regression model
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations

    def fit(self, X, y):
        # Initialize parameters
        self.m, self.n = X.shape
        self.X = X
        self.y = y
        self.theta = np.zeros(self.n)
        self.bias = 0

        # Gradient descent
        for _ in range(self.num_iterations):
            self.update_weights()

    def update_weights(self):
        # Compute linear model
        linear_model = np.dot(self.X, self.theta) + self.bias
        # Compute prediction using sigmoid
        y_pred = sigmoid(linear_model)

        # Compute gradients
        d_theta = (1 / self.m) * np.dot(self.X.T, (y_pred - self.y))
        d_bias = (1 / self.m) * np.sum(y_pred - self.y)

        # Update parameters
        self.theta -= self.learning_rate * d_theta
        self.bias -= self.learning_rate * d_bias

    def predict(self, X):
        linear_model = np.dot(X, self.theta) + self.bias
        y_pred = sigmoid(linear_model)
        return np.where(y_pred > 0.5, 1, 0)

X = cancer_dataset.iloc[:, :-1].values
y = cancer_dataset.iloc[:, -1].values

# Split the data into training and testing sets
def train_test_split(X, y, test_size=0.3, random_state=42):
    np.random.seed(random_state)
    indices = np.random.permutation(X.shape[0])
    test_size = int(X.shape[0] * test_size)
    test_indices = indices[:test_size]
    train_indices = indices[test_size:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the logistic regression model
log_regr = LogisticRegression(learning_rate=0.01, num_iterations=1000)
log_regr.fit(X_train, y_train)

# Make predictions on the test set
y_pred = log_regr.predict(X_test)

# Evaluate the model
def confusion_matrix(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    return np.array([[TP, FP], [FN, TN]])

def classification_report(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f"Precision: {precision:.2f}\nRecall: {recall:.2f}\nF1 Score: {f1_score:.2f}"

def accuracy_score(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))


Confusion Matrix:
[[ 1  0]
 [16 17]]

Classification Report:
Precision: 1.00
Recall: 0.06
F1 Score: 0.11

Accuracy Score:
0.5294117647058824


  return 1 / (1 + np.exp(-z))


SVM(Support Vector Machines)

In [8]:
# SVM Model
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.num_iterations = num_iterations

    def fit(self, X, y):
        self.m, self.n = X.shape
        self.X = X
        self.y = y
        self.w = np.zeros(self.n)
        self.b = 0

        # Gradient descent
        for _ in range(self.num_iterations):
            self.update_weights()

    def update_weights(self):
        for i in range(self.m):
            if self.y[i] * (np.dot(self.X[i], self.w) - self.b) >= 1:
                self.w -= self.learning_rate * (2 * self.lambda_param * self.w)
            else:
                self.w -= self.learning_rate * (2 * self.lambda_param * self.w - np.dot(self.X[i], self.y[i]))
                self.b -= self.learning_rate * self.y[i]

    def predict(self, X):
        linear_output = np.dot(X, self.w) - self.b
        return np.sign(linear_output)

X = cancer_dataset.iloc[:, :-1].values
y = cancer_dataset.iloc[:, -1].values

# Split the data into training and testing sets
def train_test_split(X, y, test_size=0.3, random_state=42):
    np.random.seed(random_state)
    indices = np.random.permutation(X.shape[0])
    test_size = int(X.shape[0] * test_size)
    test_indices = indices[:test_size]
    train_indices = indices[test_size:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the SVM model
svm_model = SVM(learning_rate=0.001, lambda_param=0.01, num_iterations=10000)
svm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_model.predict(X_test)

# Evaluate the model
def confusion_matrix(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == -1) & (y_pred == -1))
    FP = np.sum((y_true == -1) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == -1))
    return np.array([[TP, FP], [FN, TN]])

def classification_report(y_true, y_pred):
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == -1) & (y_pred == -1))
    FP = np.sum((y_true == -1) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == -1))
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f"Precision: {precision:.2f}\nRecall: {recall:.2f}\nF1 Score: {f1_score:.2f}"

def accuracy_score(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nAccuracy Score:")
print(accuracy_score(y_test, y_pred))


Confusion Matrix:
[[17  0]
 [ 0  0]]

Classification Report:
Precision: 1.00
Recall: 1.00
F1 Score: 1.00

Accuracy Score:
0.5




---



# **LINEAR REGRESSION**

In [10]:
#For more information for the dataser refer: https://archive.ics.uci.edu/dataset/477/real+estate+valuation+data+set
real_estate_dataset = pd.read_excel('/content/Real estate valuation data set.xlsx')

In [11]:
real_estate_dataset.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.916667,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.916667,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583333,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833333,5.0,390.5684,5,24.97937,121.54245,43.1


In [12]:
real_estate_dataset.tail()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
409,410,2013.0,13.7,4082.015,0,24.94155,121.50381,15.4
410,411,2012.666667,5.6,90.45606,9,24.97433,121.5431,50.0
411,412,2013.25,18.8,390.9696,7,24.97923,121.53986,40.6
412,413,2013.0,8.1,104.8101,5,24.96674,121.54067,52.5
413,414,2013.5,6.5,90.45606,9,24.97433,121.5431,63.9


In [15]:
# Linear Regression model
class LinearRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations

    def fit(self, X, y):
        self.m, self.n = X.shape
        self.X = X
        self.y = y
        self.theta = np.zeros(self.n)
        self.bias = 0

        # Gradient descent
        for _ in range(self.num_iterations):
            self.update_weights()

    def update_weights(self):
        y_pred = np.dot(self.X, self.theta) + self.bias
        d_theta = -(2 / self.m) * np.dot(self.X.T, (self.y - y_pred))
        d_bias = -(2 / self.m) * np.sum(self.y - y_pred)

        self.theta -= self.learning_rate * d_theta
        self.bias -= self.learning_rate * d_bias

    def predict(self, X):
        return np.dot(X, self.theta) + self.bias

X = real_estate_dataset.iloc[:, :-1].values
y = real_estate_dataset.iloc[:, -1].values

# Normalize features
X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X = (X - X_mean) / X_std

# Split the data into training and testing sets
def train_test_split(X, y, test_size=0.3, random_state=42):
    np.random.seed(random_state)
    indices = np.random.permutation(X.shape[0])
    test_size = int(X.shape[0] * test_size)
    test_indices = indices[:test_size]
    train_indices = indices[test_size:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the Linear Regression model
linear_regr = LinearRegression(learning_rate=0.01, num_iterations=10000)
linear_regr.fit(X_train, y_train)

# Make predictions on the test set
y_pred = linear_regr.predict(X_test)

# Evaluate the model
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def r2_score(y_true, y_pred):
    total_sum_of_squares = np.sum((y_true - np.mean(y_true)) ** 2)
    residual_sum_of_squares = np.sum((y_true - y_pred) ** 2)
    return 1 - (residual_sum_of_squares / total_sum_of_squares)

print("Mean Squared Error:")
print(mean_squared_error(y_test, y_pred))

print("\nR2 Score:")
print(r2_score(y_test, y_pred))


Mean Squared Error:
75.31184411393879

R2 Score:
0.5532406945054662
