In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score



### Data collection and data processing

In [2]:
sonar_data = pd.read_csv('./data.csv',header=None)
# sonar_data = np.array(sonar_data)

In [3]:
X = sonar_data.iloc[:,:-1]
y = sonar_data.iloc[:,-1]


### Training and testing

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.1, random_state=1)

In [5]:
class Logistic_Regression():
    def __init__(self, max_iter,learning_rate):
        self.max_iter = max_iter
        self.learning_rate = learning_rate
        self.z = None
        self.lambda_param = 0.01
    
    def fit(self, X, y):
        self.X_train = np.concatenate((X,np.ones((X.shape[0],1))),axis=1)
        self.labels = np.unique(y)
        self.y_train = self.covert_y(y)
        self.theta = np.zeros(self.X_train.shape[1])
        self.training()
        
        
    def covert_y(self,y):
        unique = np.unique(y)
        
        return np.where(y==unique[0],0,1)
        
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    
    def cost_function(self):
        y_hat = self.sigmoid(self.X_train@self.theta)
        J = -np.mean(self.y_train*np.log(y_hat)+(1-self.y_train)*np.log(1-y_hat))
        return J
    
    def training(self):
        self.cost = self.cost_function()
        
        for i in range(self.max_iter):
            y_pred = self.sigmoid(self.X_train@self.theta)
            gd = self.X_train.T@(y_pred - self.y_train)/len(self.y_train)
            
            self.theta -= gd*self.learning_rate
            self.cost = self.cost_function()
            
        return self.theta
    
    def predict(self,X):
       
        X = np.concatenate((X,np.ones((X.shape[0],1))),axis=1)
        
        pred = np.array([self.sigmoid(x@self.theta) for x in X])
        
        pred = np.where(pred >=0.5, 1,0)
        
        return np.array([self.labels[x] for x in pred])
        
    def accuracy(self,X,y):
        pred = self.predict(X)
        
        return accuracy_score(pred,y)
    
ls_model = Logistic_Regression(max_iter=30000,learning_rate=0.001)
ls_model.fit(X_train,y_train)

In [6]:
# accuracy
ls_model.accuracy(X_test, y_test)

0.8095238095238095

In [7]:
ls_model.accuracy(X_train, y_train)

0.7700534759358288

In [8]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression()

In [14]:
import pickle
pickle_out = open("model_flask.pkl", "wb")
pickle.dump(model, pickle_out)
load_model = pickle.load(open("model_flask.pkl", "rb"))