참고 : https://leechamin.tistory.com/79

In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
iris=datasets.load_iris()

In [45]:
X = iris['data'][:,(2,3)]  # 꽃잎 길이, 꽃잎 넓이
Y = iris['target']
X= np.c_[np.ones([len(X),1]),X]   

In [46]:
X.shape

(150, 3)

In [47]:
from sklearn.model_selection import train_test_split
x,test_x,y,test_y=train_test_split(X,Y,test_size=0.2,random_state=42)
train_x,val_x,train_y,val_y=train_test_split(x,y,test_size=0.2,random_state=42)

In [48]:
from sklearn.preprocessing import OneHotEncoder
onehot=OneHotEncoder()
train_y=onehot.fit_transform(train_y.reshape(train_y.shape[0],1)).toarray()
val_y=onehot.fit_transform(val_y.reshape(val_y.shape[0],1)).toarray()
test_y=onehot.transform(test_y.reshape(test_y.shape[0],1)).toarray()

In [49]:
def softmaxscore(x,theta):
  return x.dot(theta)

def softmax(logits):
  exps = np.exp(logits)
  exp_sums = np.sum(exps,axis=1,keepdims=True)
  return exps/exp_sums

In [122]:
def gradientdescent(x,y,lr,n_estimate):
  np.random.seed(42)
  m=len(x)
  theta=np.random.randn(x.shape[1],len(np.unique(y,axis=0)))

  for i in range(n_estimate):
    y_prob=softmax(softmaxscore(x,theta))
    loss=-np.mean(np.sum(y*np.log(y_prob), axis=1))
    error=y_prob-y
    if i % 10000 == 0:
        print(i,loss)
    gradients=1/m*x.T.dot(error)
    theta=theta-lr*gradients
  return theta

In [123]:
def predict(x,y,theta):
  y_prob=softmax(softmaxscore(x,theta))
  y_predict=np.argmax(y_prob,axis=1)
  accuracy=np.mean(y_predict==np.argmax(y,axis=1))
  return accuracy

In [124]:
theta=gradientdescent(train_x,train_y,0.1,500001)
theta

0 7.727114466833609
10000 0.13517000587418598
20000 0.1110393182911033
30000 0.10141374957882815
40000 0.09619988745021706
50000 0.09294882925255843
60000 0.09074761406144653
70000 0.08917371455917734
80000 0.08800364217309546
90000 0.08710786367880675
100000 0.08640611313740458
110000 0.08584604788035721
120000 0.08539213018651459
130000 0.08501943104273106
140000 0.08470998479551754
150000 0.08445054702494707
160000 0.08423116324677304
170000 0.08404422591262885
180000 0.08388383602410848
190000 0.08374536064418876
200000 0.0836251197785316
210000 0.08352016071112152
220000 0.08342809269672095
230000 0.08334696408650961
240000 0.08327516978587289
250000 0.08321138072221847
260000 0.08315448950311444
270000 0.0831035681326781
280000 0.08305783481137505
290000 0.0830166276500897
300000 0.08297938369817548
310000 0.08294562209202504
320000 0.08291493042515048
330000 0.0828869536562395
340000 0.08286138503096989
350000 0.08283795861228872
360000 0.08281644310343539
370000 0.0827966367160

array([[ 26.36917275,   7.72298338, -33.08601774],
       [ -4.85726959,   0.1404904 ,   5.77151872],
       [ -7.84016161,   0.67626342,   9.04107135]])

In [125]:
predict(val_x,val_y,theta)

0.9583333333333334

In [126]:
#조기종료 추가
def gradientdescentEarlystop(x,y,val_x,val_y,lr,n_estimate):
  np.random.seed(42)
  m=len(x)
  min_loss=float("inf")
  theta=np.random.randn(x.shape[1],len(np.unique(y,axis=0)))

  for i in range(n_estimate):
    y_prob=softmax(softmaxscore(x,theta))
    loss=-np.mean(np.sum(y*np.log(y_prob), axis=1))
    error=y_prob-y
    if i % 10000 == 0:
        print(i,loss)
    gradients=1/m*x.T.dot(error)
    theta=theta-lr*gradients

    y_prob=softmax(softmaxscore(val_x,theta))
    y_predict=np.argmax(y_prob,axis=1)
    val_loss = -np.mean(np.sum(val_y*np.log(y_prob), axis=1))
    if val_loss < min_loss:
        min_loss = val_loss
    else:
        print(i-1, min_loss)
        print(i,val_loss,"Early Stopping!")
        return theta
  return theta

In [127]:
theta=gradientdescentEarlystop(train_x,train_y,val_x,val_y,0.1,500001)

0 7.727114466833609
10000 0.13517000587418598
20000 0.1110393182911033
30000 0.10141374957882815
40000 0.09619988745021706
50000 0.09294882925255843
60000 0.09074761406144653
70000 0.08917371455917734
80000 0.08800364217309546
90000 0.08710786367880675
100000 0.08640611313740458
110000 0.08584604788035721
120000 0.08539213018651459
130000 0.08501943104273106
140000 0.08470998479551754
150000 0.08445054702494707
160000 0.08423116324677304
170000 0.08404422591262885
177441 0.09031374646038946
177442 0.09031374646039035 Early Stopping!


In [128]:
predict(val_x,val_y,theta)

0.9583333333333334

In [129]:
predict(test_x,test_y,theta)

1.0