### Student id: 20237369
### Student name: Le Quoc Trung

In [28]:
import map_feature

import json
import pandas as pd
import numpy as np

In [29]:
def read_data(filename):
    df = pd.read_csv('training_data.txt', names=['0', '1', '2'])
    df.head()
    X = df[['0', '1']]
    X = X.to_numpy()
    X_train = map_feature.map_feature(X[:,0], X[:,1])
    Y_train = df['2'].to_numpy()

    return X_train, Y_train

In [30]:
class LogisticRegression:
    def __init__(self, alpha=0.01, iters=1000, lamda=0.0, verbose=False):
        self.alpha = alpha
        self.iters = iters
        self.lamda = lamda
        self.verbose = verbose
        self.theta = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def compute_cost(self, X, y):
        m = len(y)
        h_theta = self.sigmoid(np.dot(X, theta))
        J = -1/m * np.sum(y * np.log(h_theta) + (1 - y) * np.log(1 - h_theta))
        reg_term = (self.lamda / (2 * m)) * np.sum(theta[1:]**2)  
        J += reg_term
        return J

    def compute_gradient(self, X, y, theta):
        m = len(y)
        h_theta = self.sigmoid(np.dot(X, theta))
        loss = h_theta - y
        dJ = np.dot(X.T, loss) / m

        reg_term = (self.lamda / m) * theta[1:]
        dJ[1:] += reg_term

        return dJ

    def gradient_descent(self, X, y):
        X = np.c_[np.ones(len(X)), X]
        num_features = X.shape[1]
        self.theta = np.zeros(num_features)
        
        if self.verbose:
            print(f'The total of training samples: {len(y)}')

        for i in range(self.iters):
            dJ = self.compute_gradient(X, y, self.theta)
            self.theta -= self.alpha * dJ

            if self.verbose:
                cost = self.compute_cost(X, y, self.theta)
                print(f'Iter {i + 1}, cost = {cost}')
                
    def fit(self, X, y):
        self.gradient_descent(X, y)

    def predict(self, X):
        X_with_bias = np.c_[np.ones(len(X)), X]
        probabilities = self.sigmoid(np.dot(X_with_bias, self.theta))
        predictions = (probabilities >= 0.5).astype(int)
        return predictions
    
    def evaluate(self, X, y):
        predictions = self.predict(X)
        accuracy = np.mean(predictions == y)
        precision = np.sum((predictions == 1) & (y == 1)) / np.sum(predictions == 1)
        recall = np.sum((predictions == 1) & (y == 1)) / np.sum(y == 1)
        f1_score = 2 * (precision * recall) / (precision + recall)
        return accuracy, precision, recall, f1_score

In [31]:
def read_config(file_path):
    with open(file_path, 'r') as file:
        config = json.load(file)
        
    alpha = config["Alpha"]
    lamda = config["Lambda"]
    num_iterations = config["NumIter"]

    return alpha, lamda, num_iterations

In [32]:
def save_model(model, file_path):
    with open(file_path, 'w') as file:
        json.dump(model.theta.tolist(), file)

def save_report(report, file_path):
    with open(file_path, 'w') as file:
        json.dump(report, file)

In [36]:
config_file = 'config.json'
data_file = 'training_data.txt'

alpha, lamda, iters = read_config(config_file)
X_train, Y_train = read_data(data_file)

In [37]:
model = LogisticRegression(alpha=alpha, iters=iters, lamda=lamda)
model.fit(X_train, Y_train)

In [38]:
model_file_path = 'model.json'
save_model(model, model_file_path)

accuracy, precision, recall, f1_score = model.evaluate(X_train, Y_train)

classification_report = {
    "Accuracy": accuracy,
    "Precision": precision,
    "Recall": recall,
    "F1-score": f1_score
}
classification_report_file_path = 'classification_report.json'
save_report(classification_report, classification_report_file_path)

print("Training Accuracy:", accuracy)
print("Training Precision:", precision)
print("Training Recall:", recall)
print("Training F1-score:", f1_score)

Training Accuracy: 0.8305084745762712
Training Precision: 0.7794117647058824
Training Recall: 0.9137931034482759
Training F1-score: 0.8412698412698414
