In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# 1. Linear Regression

- Features (inputs), we'll call these x (or x if vectors)
- Training examples, many x(i) for which y(i) is known 
(e.g., many movies for which we know the rating)
- A model, a function that represents the relationship 
between x and y
- A loss/a cost/an objective function, which tells us 
how well our model approximates the training 
examples
- Optimization, a way of finding the parameters of 
our model that minimizes the loss function


## 1.1. Confusion Matrix

In [2]:
class Metric():
    def __init__(self,y_true, y_pred):
        self.y_true = np.array(y_true)
        self.y_pred = np.array(y_pred)
    def apply(self):
        self.accuracy_m()
        self.recall_m()
        self.precision_m()
        self.f1_m()
        print(f"Accuracy: {self.accuracy}\nPrecision: {self.precision}\nRecall: {self.recall}\nF1: {self.f1}")
    def accuracy_m(self):
        correct_predictions = np.sum(np.round(np.clip(self.y_true, 0, 1)) == np.round(np.clip(self.y_pred, 0, 1)))
        accuracy = correct_predictions / len(self.y_true)
        self.accuracy = accuracy
    def recall_m(self):
        true_positives = np.sum(np.round(np.clip(self.y_true * self.y_pred, 0, 1)))
        possible_positives = np.sum(np.round(np.clip(self.y_true, 0, 1)))
        recall = true_positives / (possible_positives + np.finfo(float).eps)
        self.recall = recall
    def precision_m(self):
        true_positives = np.sum(np.round(np.clip(self.y_true * self.y_pred, 0, 1)))
        predicted_positives = np.sum(np.round(np.clip(self.y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + np.finfo(float).eps)
        self.precision = precision
    def f1_m(self):
        f1_score = 2 * ((self.precision * self.recall) / (self.precision + self.recall + np.finfo(float).eps))
        self.f1 = f1_score

## 1.2. Feature Scaling

In [3]:
class FeatureScaling:
    def __init__(self):
        self.class_to_index = {}
        self.index_to_class = {}

    def fit(self, y):
        unique_classes = np.unique(y)
        self.class_to_index = {cls: idx for idx, cls in enumerate(unique_classes)}
        self.index_to_class = {idx: cls for idx, cls in enumerate(unique_classes)}

    def transform(self, y):
        return np.array([self.class_to_index[cls] for cls in y])

    def label_fit_transform(self, y):
        y=y.tolist()
        self.fit(y)
        return self.transform(y)

    def label_inverse_transform(self, y):
        y=y.tolist()
        return np.array([self.index_to_class[idx] for idx in y])

    def mean_normalize(self,df):
        normalized_df = df.copy()
        for column in normalized_df.columns:
            col_mean = df[column].mean()
            col_range = df[column].max() - df[column].min()
            if col_range == 0:
                col_range = 1  # Tránh chia cho 0
            normalized_df[column] = (df[column] - col_mean) / col_range
        return normalized_df


## 1.3 Linear

### 1.3.1. Single

![My Image](https://drive.google.com/uc?id=1gC0Oieo2M29W320s8oijYAiFREVup9Hi)
![2](https://drive.google.com/uc?id=1_kWSSv7raUkVj7qcpuLnTiTo2QxudjT3)

### 1.3.2 Multiple

![3](https://drive.google.com/uc?id=1safWBCF1d_E54TUmUF3_KPvKC46UaP9w)

## 1.5 Implement

In [4]:
class LinearRegression():
    def __init__(self,learning_rate,epochs):
        self.learning_rate = learning_rate
        self.epochs=epochs
        self.input = None
        self.target = None
        self.weights=None
    def CostFunction(self):
        #MSE
        return (1/(2*self.num_row))* np.sum(
            np.square(
                np.array([self.fit_transform(self.input.iloc[index]) for index in range(self.num_row)]) - np.array(self.target)
            )
        )

    def predict(self,x):
        goal = x.copy()
        goal.insert(0, 'x0', 1)
        return  [np.sum(np.multiply(np.array(goal.iloc[index]), self.weights)) for index in range(goal.shape[0])]
    def fit_transform(self,x):
        return np.sum(np.multiply(np.array(x), self.weights))
    def update_weights(self):
        #GradientDescent
        self.weights = [self.weights[i] - (self.learning_rate * (1/self.num_row) 
                       * np.sum(
                           np.multiply(
                               np.array([self.fit_transform(self.input.iloc[index]) for index in range(self.num_row)]) - np.array(self.target)
                               ,np.array(self.input.iloc[:,i])
                           )
                       ))
            for i in range(len(self.weights))
        ]
    def fit(self,input,target):
        self.num_features = input.shape[1]
        self.num_row = input.shape[0]
        self.input = input.copy()
        self.input.insert(0, 'x0', 1)
        self.target = target
        self.weights=np.ones(self.num_features + 1)
        for i in range(self.epochs):
            self.update_weights()
            cost = self.CostFunction()
            print(f"Epoch {i+1}: Weight = {self.weights}, Learning Rate = {self.learning_rate}\nCost = {cost}")

## 1.6. Train

In [5]:
# url = 'https://raw.githubusercontent.com/nttuan8/DL_Tutorial/master/L1/data_linear.csv'
# data = pd.read_csv(url)
# input = np.array(data['Diện tích'])
# target = np.array(data['Giá'])
input_path = '/kaggle/input/iris/Iris.csv'
df = pd.read_csv(input_path)
df.head(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [6]:
input = df[['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm']]
target = df['Species']
fs = FeatureScaling()

In [7]:
X = fs.mean_normalize(input)
y = fs.label_fit_transform(target)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
LRS = LinearRegression(0.0001,50)
LRS.fit(X_train,y_train)

Epoch 1: Weight = [1.000001171767106, 0.9999978114708664, 0.9999948231554392, 1.0000015986928468, 1.0000018130278845], Learning Rate = 0.0001
Cost = 0.07667250508589715
Epoch 2: Weight = [1.0000023434177792, 0.9999956229324427, 0.9999896463350246, 1.0000031973569532, 1.0000036260263876], Learning Rate = 0.0001
Cost = 0.07667211704134025
Epoch 3: Weight = [1.0000035149520308, 0.9999934343847291, 0.9999844695387561, 1.0000047959923202, 1.0000054389955098], Learning Rate = 0.0001
Cost = 0.07667172900358873
Epoch 4: Weight = [1.0000046863698726, 0.9999912458277259, 0.9999792927666334, 1.000006394598948, 1.0000072519352516], Learning Rate = 0.0001
Cost = 0.07667134097264201
Epoch 5: Weight = [1.000005857671316, 0.9999890572614334, 0.9999741160186565, 1.0000079931768373, 1.0000090648456137], Learning Rate = 0.0001
Cost = 0.07667095294849945
Epoch 6: Weight = [1.0000070288563727, 0.9999868686858517, 0.999968939294825, 1.0000095917259886, 1.0000108777265966], Learning Rate = 0.0001
Cost = 0.07

In [10]:
y_predict = LRS.predict(X_test)
metric = Metric(y_test,y_predict)
metric.apply()

Accuracy: 0.9666666666666667
Precision: 0.9523809523809523
Recall: 1.0
F1: 0.9756097560975608


In [11]:
#Compare
from sklearn.linear_model import LinearRegression 
regr = LinearRegression() 
regr.fit(X_train, y_train) 
y_pred = regr.predict(X_test)
metric = Metric(y_test,y_pred)
metric.apply()

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1: 1.0
