__<h1 align = "center">STATIC LEARNING</h1>__

__<h1 align = "center">Lab assignment 2 – Logistic regression</h1>__

---

## Import library

In [44]:
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

## Read files

In [45]:
file_path = 'training_data.txt'
columns = ['feature 1', 'feature 2', 'label']
df = pd.read_csv(file_path, header = None, names = columns)
df

Unnamed: 0,feature 1,feature 2,label
0,0.051267,0.699560,1
1,-0.092742,0.684940,1
2,-0.213710,0.692250,1
3,-0.375000,0.502190,1
4,-0.513250,0.465640,1
...,...,...,...
113,-0.720620,0.538740,0
114,-0.593890,0.494880,0
115,-0.484450,0.999270,0
116,-0.006336,0.999270,0


In [46]:
import map_feature

ar_feature = map_feature.map_feature(df['feature 1'], df['feature 2'])
print(len(ar_feature), len(ar_feature[0]))

118 28


In [47]:
with open('config.json',) as f:
    configs = json.load(f)

print(configs)

{'Alpha': 0.5, 'Lambda': 1, 'NumIter': 10000}


## Helper functions

In [48]:
class LogisticRegression:
    def __init__(self, learning_rate = 0.01, num_iterations = 1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.theta = None

    def sigmoid_activation(self, z):
        return 1 / (1 + np.exp(-z))

    def compute_cost(self, X, y):
        m = len(y)
        h = self.sigmoid_activation(X.dot(self.theta))
        cost = (1 / m) * np.sum( - y * np.log(h) - (1 - y) * np.log(1 - h))
        return cost

    def compute_gradient(self, X, y):
        m = len(y)
        h = self.sigmoid_activation(X.dot(self.theta))
        gradient = (1 / m) * X.T.dot(h - y)
        return gradient

    def gradient_descent(self, X, y):
        self.theta = np.zeros(X.shape[1])
        for _ in range(self.num_iterations):
            gradient = self.compute_gradient(X, y)
            self.theta -= self.learning_rate * gradient

    def predict(self, X):
        h = self.sigmoid_activation(X.dot(self.theta))
        results = np.where(h >= 0.5, 1, 0)
        return results
    
    def evaluate(self, X, y):
        predictions = self.predict(X)

        # Calculate evaluation metrics
        accuracy = np.mean(predictions == y)
        precision = np.sum(predictions * y) / np.sum(predictions)
        recall = np.sum(predictions * y) / np.sum(y)
        f1_score = (2 * precision * recall) / (precision + recall)

        return accuracy, precision, recall, f1_score

## Train model

In [49]:
X = ar_feature
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.gradient_descent(X_train, y_train)

cost = model.compute_cost(X_train, y_train)
print("Cost:", cost)

predictions = model.predict(X_test)
print("Predictions:", predictions)

accuracy, precision, recall, f1_score = model.evaluate(X_test, y_test)
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1_score)

Cost: 0.6316012104969425
Predictions: [1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 1]
Accuracy: 0.7083333333333334
Precision: 0.8571428571428571
Recall: 0.5
F1-score: 0.631578947368421


## Save results

In [50]:
with open('model.json', 'w') as f:
    json.dump({'Predictions: ': model.predict(X_test).tolist()}, f)

with open('classification_report.json', 'w') as f:
    result = {
        'Accuracy: ': accuracy,
        'Precision: ': precision,
        'Recall: ': recall,
        'F1-score:': f1_score
    }
    json.dump(result, f)