<a href="https://colab.research.google.com/github/sonicchomp/Logstic/blob/master/logistic_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [138]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import OrderedDict
import theano
import theano.tensor as T
from sklearn.metrics import accuracy_score

In [0]:
class Optimizer(object):
  def __init__(self,params=None):
    if params is None:
      return NotImplementedError()
    self.params = params
  def updates(self,loss=None):
    if loss is None:
      return NotImplementedError()
    self.updates = OrderedDict()
    self.gparams = [T.grad(loss,param) for param in self.params]



In [0]:
class SGD(Optimizer):
    def __init__(self, learning_rate=0.01, params=None):
        super(SGD, self).__init__(params=params)
        self.learning_rate = 0.01

    def updates(self, loss=None):
        super(SGD, self).updates(loss=loss)

        for param, gparam in zip(self.params, self.gparams):
            self.updates[param] = param - self.learning_rate * gparam

        return self.updates


In [0]:
def model(X,y,lambd, w_init):
  X = T.matrix(name="X")
  y = T.vector(name="y")  
  w = theano.shared(w_init, name="w")
  
  p_1 = 1/(1+T.exp(-T.dot(X,w)))
  xent = -y * T.log(p_1) - (1-y)*T.log(1-p_1)
  loss = xent.mean() + lambd * (w ** 2).sum()/2

  params = [w]
  updates = SGD(params=params).updates(loss)

  print('start: compile model')

  train = theano.function(
            inputs=[X, y],
            outputs=[loss,w],
            updates=updates)

  print('complete: compile model')

  return train

In [0]:
def test_model(X,y,lambd,w):
    X = T.matrix(name="X")
    y = T.vector(name="y")
    w = theano.shared(w, name="w")

    p_1 = 1/(1+T.exp(-T.dot(X,w)))
    xent = -y * T.log(p_1) - (1-y)*T.log(1-p_1)
    loss = xent.mean() + lambd * (w ** 2).sum()/2

    
    prediction = p_1 > 0.5

    print("start: compile model")
    test = theano.function(
        inputs = [X,y],
        outputs = [loss,prediction]
    )
    print('complete: compile model')
    return test


In [0]:
class confusion_matrix(object):
    def __init__(self,prediction,true_label):
        self.predictions = prediction
        self.true_label = true_label
        self.tp = 0
        self.TP()
        self.tn = 0
        self.TN()
        self.fp = 0
        self.FP()
        self.fn = 0
        self.FN()

    def TP(self):
        for pred, true in zip(self.predictions, self.true_label):
            if pred == True and true == 1:
                self.tp += 1
    def TN(self):
        for pred, true in zip(self.predictions, self.true_label):
            if pred == False and true == 0:
                self.tn += 1

    def FP(self):
        for pred, true in zip(self.predictions, self.true_label):
            if pred == True and true == 0:
                self.fp += 1

    def FN(self):
        for pred, true in zip(self.predictions, self.true_label):
            if pred == False and true == 1:
                self.fn += 1
    
    def accuracy(self):
        return (self.tp+self.tn) / (self.tp+self.tn+self.fp+self.fn)

    def error(self):
        return 1 - accu()

    def precision(self):
        return self.tp /(self.tp + self.fp)

    def recall(self):
        return self.tp /(self.tp + self.fn)
    
        
        

In [0]:
def main():
    breast_cancer = '/content/drive/My Drive/ロジスティック回帰/duke-breast-cancer.txt'
    data = pd.read_table(breast_cancer,header=None)
    X = data.drop(data.columns[0],axis=1)
    y = data[data.columns[0]]

    X_train, X_test, y_train, y_test=train_test_split(X, y, shuffle=True)
    
    lambd= 0.01
    training_epochs = 10
    np.random.seed(seed=38)
    w_init = np.random.normal(loc=0.0,scale=lambd,size=X_train.shape[1])
    train = model(X_train, y_train, lambd, w_init)

    min_w = np.empty_like(w_init)
    min_loss = 999
    print("epochs: {}".format(training_epochs))
    
    for t in range(training_epochs):
        loss, w = train(X_train,y_train)
        if t % (training_epochs/10) == 0:
            print('{}: loss:{}'.format(t,loss))
        if loss < min_loss :
            min_w = w
            min_loss = loss

    print("-"*20)
    test = test_model(X_test,y_test,lambd,min_w)
    test_loss, prediction = test(X_test,y_test)
    print("test loss: {}".format(test_loss))      
    cm = confusion_matrix(prediction, y_test) 
    print("Accuracy: {}".format(cm.accuracy()))

In [157]:
main()

  This is separate from the ipykernel package so we can avoid doing imports until


start: compile model
complete: compile model
epochs: 10
0: loss:0.6819969058338851
1: loss:0.36497589066659447
2: loss:0.5423279054020109
3: loss:0.637281733491918
4: loss:0.19097024089898795
5: loss:0.0957232419571012
6: loss:0.07492028781569349
7: loss:0.06846651689312289
8: loss:0.06320234599524963
9: loss:0.05879130951446321
--------------------
start: compile model
complete: compile model
test loss: 0.1462203133831545
Accuracy: 0.9090909090909091
