In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('iris.data',header = None)
date = data.replace('?',np.nan)
data = data.dropna(how='any')
data = data.drop_duplicates()

In [3]:
data[4] = data[4].map({'Iris-versicolor':0,'Iris-setosa':1,'Iris-virginica':2})

In [4]:
t1=data[data[4]==0]
t2=data[data[4]==1]
t1=t1.sample(len(t1),random_state=0)
t2=t2.sample(len(t2),random_state=0)

train_X = pd.concat([t1.iloc[:40,:-1],t2.iloc[:40,:-1]],axis=0)
train_y = pd.concat([t1.iloc[:40,-1],t2.iloc[:40,-1]], axis=0)
test_X = pd.concat([t1.iloc[40:,:-1],t2.iloc[40:,:-1]],axis=0)
test_y = pd.concat([t1.iloc[40:,-1],t2.iloc[40:,-1]], axis=0)


In [8]:
class LogisticRegression:
    
    def __init__(self, alpha, times):
        self.alpha = alpha
        self.times = times
    
    def sigmoid(self,z):
        return(1.0/(1.0+np.exp(-z)))
    
    def fit(self,X,y):
        X=np.asarray(X)
        y=np.asarray(y)
        
        self.w_ = np.zeros(1+X.shape[1])
        self.loss_ = []
        for i in range(self.times):
            z=np.dot(X,self.w_[1:])+self.w_[0]
            p=self.sigmoid(z)
            cost=-np.sum(y*np.log(p)+(1-y)*np.log(1-p))
            self.loss_.append(cost)
            
            self.w_[0] += self.alpha*np.sum(y-p)
            self.w_[1:] += self.alpha*np.dot(X.T, y-p)
        
    def predict_proba(self,X):
        X=np.asarray(X)
        z=np.dot(X,self.w_[1:])+self.w_[0]
        p=self.sigmoid(z)
        p=p.reshape(-1,1)
        return np.concatenate([1-p,p],axis=1)
    
    def predict(self,X):
        return np.argmax(self.predict_proba(X),axis=1)

In [9]:
lr=LogisticRegression(alpha=0.01,times=20)
lr.fit(train_X, train_y)
result=lr.predict_proba(test_X)

array([[0.99556826, 0.00443174],
       [0.98410068, 0.01589932],
       [0.98776425, 0.01223575],
       [0.98963123, 0.01036877],
       [0.99280127, 0.00719873],
       [0.99069063, 0.00930937],
       [0.99491877, 0.00508123],
       [0.99324116, 0.00675884],
       [0.99102776, 0.00897224],
       [0.99371804, 0.00628196],
       [0.00933381, 0.99066619],
       [0.00702376, 0.99297624],
       [0.02071892, 0.97928108],
       [0.07037915, 0.92962085],
       [0.01714725, 0.98285275],
       [0.02642633, 0.97357367],
       [0.00813116, 0.99186884],
       [0.00819442, 0.99180558]])

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
plt.plot(result,'ro',ms=12,label='PD')
plt.plot(test_y.values,'go',label='TD')
plt.title('LogisticRegression')
plt.xlabel('CaseID')
plt.ylabel('Type')
plt.show()
