In [101]:
import csv
from sklearn.model_selection import train_test_split
import random
import numpy as np
from sklearn.linear_model import LogisticRegression


# Assumes that the csv file has no header.
class LogisticBinaryClassification:
    """Class that automates Logistic regression"""
    """Assumes the class is at the very end of the data."""
    
    def __init__(self, positive_class_label='1', negative_class_label='0'):        
        self.negative_class = []
        self.positive_class = []   
        
        self.dataset = []
        self.X_data = []
        self.Y_data = [] 
        
        self.postive_class_label = positive_class_label
        self.negative_class_label = negative_class_label
        
        self.csv_as_list = []
        
        
        self.lgr = None
        
    def read_data(self, path):
        with open(path, 'r', encoding='utf-8') as csv_file:
            
            csv_reader = csv.reader(csv_file)
            
            for row in csv_reader:
                self.csv_as_list.append(row)
                if row[-1] == self.negative_class_label:
                    self.negative_class.append(row)
                else:
                    self.positive_class.append(row)
         
        # Now that we have got our data we can convert it get the number of groups
        # Shuffle the groups
        random.shuffle(self.negative_class)
        random.shuffle(self.positive_class)
        
        
        # Now we can collate the dataset.
        size_dataset = min(len(self.negative_class), len(self.positive_class))
        self.dataset = self.negative_class[:size_dataset] + self.positive_class[:size_dataset]       

        
    def format_data(self):
        """Formats the dataset into X and Y numpy arrays"""
        self.X_data = np.array([row[:-1] for row in self.dataset], dtype='float')
        self.Y_data = np.array([row[-1] for row in self.dataset])
        
    def train(self):
        """Makes a prediction based on the data in the class"""
        X_train, X_test, y_train, y_test = train_test_split(self.X_data, self.Y_data, stratify=self.Y_data)
    
        
        self.lgr = LogisticRegression(max_iter=10000, C=0.1).fit(X_train, y_train)
        
        print("Score: {:.2f}".format(self.lgr.score(X_train, y_train)))
        print("Score: {:.2f}".format(self.lgr.score(X_test, y_test)))  
   
    def predict(self, data):
        """Predicts the outcome based on data returns the class."""
        features = np.array(data, dtype='float').reshape((-1,1)).T
        return self.lgr.predict(features)[0]      
      
 
lbc = LogisticBinaryClassification()
lbc.read_data('data/data_banknote_authentication.csv')
lbc.format_data()               
lbc.train()
lbc.predict([1.6426, 3.0149, 0.22849, -0.147])


lbc1 = LogisticBinaryClassification()
lbc1.read_data('data/diabetes.csv')
lbc1.format_data()               
lbc1.train()
lbc1.predict(lbc1.csv_as_list[4][:-1]) 

Score: 0.98
Score: 0.99
Score: 0.73
Score: 0.76


'1'