<a href="https://colab.research.google.com/github/lmquann/leminhquan/blob/main/HQTT_min_max_normalization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
from matplotlib import pyplot as plt
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
def readData(folder, filename):
    data = np.loadtxt(os.path.join(folder, filename), delimiter=',')
    print('Original data shape', data.shape)
    X = data[:,:-1]
    print('X shape: ', X.shape)
    y = data[:,-1]
    print('y shape: ', y.shape)
    m = X.shape[0]
    print('Number of training examples m = ', m)
    x0 = np.ones((m,1))
    X = np.hstack([x0, X])
    print('Modified X shape: ', X.shape)
    y = np.reshape(y, (m,1))
    print('Modified y shape: ', y.shape)
    return X, y

In [3]:
def featureVectorScaling(data):
    snn = data.min()
    sln = data.max()
    data_scl = (data - snn)/(sln - snn)
    print(data_scl[1])
    return data_scl

In [4]:
def scaleData(X, y):
    X_scl = X[:, 0]
    for i in range(1, X.shape[1]):
        scl = featureVectorScaling(X[:, i])
        X_scl = np.column_stack([X_scl, scl])
    y_scl = featureVectorScaling(y)
    print('X_scl shape: ', X_scl.shape)
    print(X_scl[1,:])
    print('y scl shape: ', y_scl.shape)
    print(y_scl[1,:])
    return X_scl, y_scl

In [5]:
def computeLoss(X, y, w):
    m = y.shape[0]
    J = 0
    h = np.dot(X, w)
    J = (1/(2*m))*np.sum(np.square(h - y))
    return J

def gradientDescent(X, y, w, alpha, n):
    m = y.shape[0]
    J_history = []
    w_optimal = w.copy()
    print('w_optimal shape: ', w_optimal.shape)
    for i in range(n):
        h = np.dot(X, w_optimal)
        error = h - y
        w_optimal = w_optimal - (alpha/m)*np.dot(X.T, error)
        J_history.append(computeLoss(X, y, w_optimal))
    return w_optimal, J_history

In [6]:
def visualizeDataAndModel(X, y, w_optimal):
    fig = plt.figure()
    plt.plot(X[:,1], y, 'g^')
    plt.plot(X[:, 1], np.dot(X, w_optimal), 'r-')
    plt.legend(['Raw Data', 'Linear regression'])
    plt.ylabel('Profit in $10,000')
    plt.xlabel('Population of City in 10,000s')
    plt.show()

In [7]:
def main():
    n = 1500
    alpha = 0.01
    X, y = readData("/content/drive/MyDrive/học máy 1", "ex1data1.txt")
    X_scl, y_scl = scaleData(X, y)
    print('X scl: ', X_scl[1,:])
    print('y scl: ', y_scl[1])
    w = np.zeros((X_scl.shape[1], 1))
    w, J_history = gradientDescent(X_scl, y_scl, w, alpha, n)
    print("Optimal weights are: ", w)
    print("Loss function: ", J_history[-1])

if __name__ == '__main__':
    main()

Original data shape (97, 2)
X shape:  (97, 1)
y shape:  (97,)
Number of training examples m =  97
Modified X shape:  (97, 2)
Modified y shape:  (97, 1)
0.02915679345136556
[0.44025019]
X_scl shape:  (97, 2)
[1.         0.02915679]
y scl shape:  (97, 1)
[0.44025019]
X scl:  [1.         0.02915679]
y scl:  [0.44025019]
w_optimal shape:  (2, 1)
Optimal weights are:  [[0.24338406]
 [0.42408963]]
Loss function:  0.009124713330402054
