<a href="https://colab.research.google.com/github/nguyentantan141/MACHINE-LEARNING/blob/Machine-learning/Metrics_mo_hinh_phan_lop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [270]:
import numpy as np
import os
from sklearn import preprocessing
from scipy import optimize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import top_k_accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

In [271]:
from google.colab import drive
drive.mount ('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


# Đọc dữ liệu

In [311]:
def readData(pathfolder: str, filename: str):
  data=np.loadtxt(os.path.join(pathfolder, filename),delimiter=',')
  X, y = data[:,:-1], data[:, -1]
  m = X.shape[0]
  n = X.shape[1]
  X = np.reshape(X, (m,n))
  y = np.reshape(y, (m,1))
  return X, y

# Chuẩn hóa dữ liệu

In [312]:
def scaling_minmax_norm(data):
  scaler = preprocessing.MinMaxScaler()
  #Phải thực hiện thao tác fit(data) trước khi điều chỉnh dữ liệu
  scaler.fit(data)
  #Thực hiện điều chỉnh dữ liệu
  data = scaler.transform(data)
  return data

In [313]:
def scaling_X(X):
    X_scl=scaling_minmax_norm(X)
    m=X.shape[0]
    X_scl=np.hstack((np.ones((m,1)),X_scl))
    return X_scl

In [314]:
def split_data(X,y):
  X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.30, random_state=15)
  return X_train, X_test, y_train, y_test

# Logistic Regression

In [315]:
def sigmoid(X, w):
  result = 1/(1 + np.exp(-np.dot(X, w)))
  return result

def predict(y_hat):
    return np.rint(y_hat)

def loss(X, y,w):
  m=y.shape[0]
  h=sigmoid(X,w)
  result = (-1 / m) * np.sum(np.dot(y.T, np.log(h)) + np.dot((1 - y).T, np.log(1 - h)))
  return result

def gradient(X, y, w):
    m = X.shape[0]
    result = (1/m)*np.dot(X.T, sigmoid(X, w) - y)
    return result

def gradientDescent(X, y, w, alpha, n_iters):
    w_optimal = w.copy()
    J_history = []
    for i in range(n_iters):
        w_optimal = w_optimal - alpha*gradient(X, y, w_optimal)
        J_history.append(loss(X, y, w_optimal))
    return w_optimal, J_history


# Độ đo

## Accuracy 

In [316]:
def acc_score(y, y_hat): 
 m = y.shape[0] 
 result = (1/m)*np.sum(y == y_hat)  
 return result


## Top-k accuracy

In [317]:
def top_k_acc(y,f_hat):
  m=f_hat.shape[0]
  n=f_hat.shape[1]
  a=0
  for i in range(m):
    if y[i]in f_hat[i]:
      a+=1
  result=a/m
  return result



## Balanced accuracy

In [318]:
def balanced_accuracy(y,y_hat):
  tn, fp, fn, tp = confusion_matrix(y, y_hat).ravel()
  result=0.5*(tp/(tp+fn)+tn/(tn+fp))
  return result 

## Precision

In [319]:
def precision(y,y_hat):
  tn, fp, fn, tp = confusion_matrix(y, y_hat).ravel()
  result=tp/(tp+fp)
  return result

## Recall

In [320]:
def recall(y,y_hat):
  tn, fp, fn, tp = confusion_matrix(y, y_hat).ravel()
  result=tp/(tp+fn)
  return result

## F1 (F-measure)

In [321]:
def f1(y,y_hat):
  tn, fp, fn, tp = confusion_matrix(y, y_hat).ravel()
  precision=tp/(tp+fp)
  recall=tp/(tp+fn)
  result=2*((precision*recall)/(precision+recall))
  return result

In [322]:
def predict(y_pred):
    return np.rint(y_pred)

# Đánh giá mô hình


In [323]:
def main():
    X,y=readData('/content/drive/MyDrive/Data/data/','ex2data1.txt')
    X_scl=scaling_X(X)
    X_train, X_test, y_train, y_test = split_data(X_scl,y)
    print('Huấn luyện mô hình trên tập dữ liệu train')
    n = X_scl.shape[1]
    w = np.zeros((n, 1)).reshape([n,1])
    alpha = 0.01
    n_iters = 2000
    w_opt, J_hist = gradientDescent(X_train, y_train, w, alpha, n_iters)
    print("Ket qua huan luyen mo hinh la: ")
    print('\t\tTrong so w toi uu la:\n ', w_opt)
    print('Ket qua du doan cua mo hinh')
    y_hat = predict(sigmoid(X_test, w_opt))
    print('\t\tChỉ số Accuracy: ', acc_score(y_test, y_hat))
    print('\t\tSử dụng sklearn, Acc: ', accuracy_score(y_test, y_hat))

if __name__ == "__main__":
    main()

Huấn luyện mô hình trên tập dữ liệu train
Ket qua huan luyen mo hinh la: 
		Trong so w toi uu la:
  [[-0.54745078]
 [ 0.99213425]
 [ 1.01929404]]
Ket qua du doan cua mo hinh
		Chỉ số Accuracy:  0.7333333333333333
		Sử dụng sklearn, Acc:  0.7333333333333333
