In [1]:
import numpy as np
import csv
from sklearn.model_selection import train_test_split

def sigmoid(x):
    return 1/(1 + np.exp(-x))

class LogisticRegression():
    def __init__(self, rate=1e-5, epochs=20000):
        self.rate = rate
        self.epochs = epochs
        self.weights = None
        self.bias = 0
            
    def train(self, feats, ys):
        n_samples, n_feats = feats.shape
        self.weights = np.zeros(n_feats)
        self.bias = 0

        for _ in range(self.epochs):
            self.descent(feats, ys, n_samples)

    def descent(self, feats, ys, n_samples):
        # Check the outcome first
        linear_preds = np.dot(feats, self.weights) + self.bias
        # and then make on a scale from 0.0 to 1.0
        predictions = sigmoid(linear_preds)

        d_weights = (1/n_samples) * np.dot(feats.T, (predictions - ys))
        d_bias = (1/n_samples) * np.sum(predictions - ys)

        self.weights = self.weights - d_weights * self.rate
        self.bias = self.bias - d_bias * self.rate

    def predict(self, feats):
        linear_preds = np.dot(feats, self.weights) + self.bias
        predictions = sigmoid(linear_preds)
        class_preds = []

        # This may be an error
        for i in range(len(predictions)): 
            if predictions[i] <= 0.5:
                class_preds.append(0)
            else:
                class_preds.append(1)
                
        return class_preds

def accuracy(tests, preds):
    return np.sum(preds == tests) / len(tests)

In [2]:
def main():
    with open("/home/ln/src/ml/data/water_potability.csv") as file:
        next(file)
        reader = csv.reader(file)
        data_csv = list(reader)
        
        for row in data_csv:
            for i, x in enumerate(row):
                if len(x)< 1:
                    x = row[i] = "0.0"

    data = np.array(data_csv)
    data = data.astype(float)
    
    feats = data[:, :-1]
    ys = data[:, -1]

    features_train, features_test, ys_train, ys_test = train_test_split(feats, ys, test_size=0.2, random_state=1234)                                                                                                                                                   

    clf = LogisticRegression(rate=0.001, epochs=1000)
    clf.train(features_train, ys_train)

    ys_pred = clf.predict(features_test)
    a = accuracy(ys_test, ys_pred) * 100
    print("Accuracy: " + "{:.3f}%".format(a))

if __name__ == "__main__":
    main()

Accuracy: 59.146%


  return 1/(1 + np.exp(-x))
