In [14]:
import pandas as pd
import numpy as np
from sklearn import datasets

In [188]:
class my_logistic_regression:
    def __init__(self, df, alpha, max_iter, convergence_value):
        self.df = df
        self.alpha = alpha
        self.y = np.matrix(df['target']).T
        
        cols = df.columns.to_list()
        cols.remove('target')
        self.X = np.matrix(df[cols].to_numpy())
        
        self.theta = np.matrix(np.zeros(len(cols))).T
        
        self.training_columns = cols
        self.max_iter = max_iter
        self.m = df.shape[0]
        self.convergence_value = convergence_value
        
    def cost_function(self):
        
        J = 0
        for index, row in self.df.iterrows():
            J += -row['target']*np.log(self.logistic_function(self.theta, row[self.training_columns])) - (1 - row['target'])*np.log(self.logistic_function(self.theta, row[self.training_columns]))
        
        return J
                
    def logistic_function(self, theta, X):
        return 1/(1+np.exp(-np.matmul(X, theta)))
    
    def fit(self):
        iterations = 0
        
        while True:
            iterations += 1 
            self.theta = self.theta - (self.alpha/self.m)*self.X.T.dot(self.logistic_function(self.theta, self.X)-self.y)
            cost = self.cost_function()
            print(cost)
            if abs(cost) <= self.convergence_value:
                break
            if iterations >= self.max_iter:
                break
        
        return self.theta

In [189]:
def popular_logistic_regression(df):
    from sklearn.linear_model import LogisticRegression
    
    y = np.matrix(df['target']).T

    cols = df.columns.to_list()
    cols.remove('target')
    X = np.matrix(df[cols].to_numpy())

    clf = LogisticRegression(random_state=0, fit_intercept=False).fit(X, y)
    
    return clf.coef_
       
    

In [196]:
data = datasets.load_iris()
y = data['target']
X = data['data']
df = pd.DataFrame({'target': y})
df[['t1','t2','t3','t4']] = X

In [197]:
log_reg = my_logistic_regression(df=df, alpha=0.01, max_iter=100000, convergence_value = 0.005)
coefs = log_reg.fit()

[78.32129629]
[60.85598232]
[48.70315492]
[39.97271176]
[33.49368357]
[28.54214371]
[24.66069304]
[21.55163197]
[19.01515917]
[16.91322534]
[15.1478886]
[13.64799176]
[12.36073069]
[11.24617441]
[10.27361443]
[9.41907679]
[8.66359103]
[7.9919625]
[7.39188671]
[6.85330007]
[6.36789706]
[5.92876637]
[5.53011326]
[5.1670454]
[4.83540589]
[4.53164185]
[4.2527]
[3.99594308]
[3.75908239]
[3.54012297]
[3.33731868]
[3.14913525]
[2.97421968]
[2.81137472]
[2.65953747]
[2.51776148]
[2.38520146]
[2.26110045]
[2.14477875]
[2.03562454]
[1.9330858]
[1.83666325]
[1.74590441]
[1.6603983]
[1.57977089]
[1.50368111]
[1.43181741]
[1.36389463]
[1.29965135]
[1.23884754]
[1.18126238]
[1.12669249]
[1.07495022]
[1.0258622]
[0.97926803]
[0.93501912]
[0.89297761]
[0.85301547]
[0.81501363]
[0.77886125]
[0.74445498]
[0.71169843]
[0.68050153]
[0.65078007]
[0.62245523]
[0.59545316]
[0.56970463]
[0.54514464]
[0.52171214]
[0.49934975]
[0.47800345]
[0.45762241]
[0.4381587]
[0.41956716]
[0.40180514]
[0.38483239]
[0.36861

In [198]:
pred = []
for index, row in df.iterrows():
    p = 1/(1+np.exp(np.dot(row[['t1','t2','t3','t4']],coefs)))
    if p>0.5:
        pred.append(1)
    else:
        pred.append(0)

In [199]:
df['pred'] = pred

In [200]:
df[['target','pred']]

Unnamed: 0,target,pred
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
145,2,0
146,2,0
147,2,0
148,2,0
