<a href="https://colab.research.google.com/github/linyuehzzz/5523_project/blob/main/sgd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##**Stochastic Gradient Descent for Logistic Regression**
This code implements and tests the SGD algorithm for logistic regression
in different scenarios.  
Yue Lin (lin.3326 at osu.edu)  
Created: 11/12/2020

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

#### **Set up libraries**

In [93]:
import numpy as np
import random
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler

#### **Prepare data**

##### Generate training data

In [86]:
def train_data(n_epoch, train_bs):
  train_x = np.random.uniform(-1, 1, (train_bs * n_epoch, 4))
  b = np.ones((train_bs * n_epoch, 1))
  train_x = np.append(train_x, b, axis=1)
  train_y = np.array([random.randrange(-1, 2, 2) for i in range(train_bs * n_epoch)])
  # print(train_x, train_y)
  return train_x, train_y

##### Generate test data

In [87]:
def test_data(test_n):
  test_x = np.random.uniform(-1, 1, (test_n, 4))
  b = np.ones((test_n, 1))
  test_x = np.append(test_x, b, axis=1)
  test_y = np.array([random.randrange(-1, 2, 2) for i in range(test_n)])
  # print(test_x, test_y)
  return test_x, test_y

#### **Train**
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html

https://machinelearningmastery.com/implement-logistic-regression-stochastic-gradient-descent-scratch-python/

##### Predict using logistic regression

In [94]:
# Make a prediction with coefficients
def pred(x, w):
  yhat = w[-1]
  for i in range(len(x) - 1):
    yhat += w[i] * x[i]
  return 1.0 / (1.0 + np.exp(-yhat))

In [None]:
def log_loss():
  

##### Estimate weight vector using SGD

In [96]:
def train_sgd(train_x, train_y, l_rate, n_epoch, bs):
  w = np.zeros(len(train_x[0]))
  risk = np.zeros(n_epoch)
  cls_err = np.zeros(n_epoch)
  sum_err = 0.
  for epoch in range(n_epoch):
    for idx in range(epoch * bs, (epoch + 1) * bs):
      x = train_x[idx]
      y = train_y[idx]
      yhat = pred(x, w)
      # print(yhat)
      err = y - yhat
      sum_err += err
      w[-1] = w[-1] + l_rate * err * yhat * (1.0 - yhat)
      for i in range(len(x) - 1):
        w[i] = w[i] + l_rate * err * yhat * (1.0 - yhat) * x[i]
    print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_err))
  return w

#### **Test**
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html

In [None]:
def test(test_x, test_y):

  for row in test_x:
    yhat = pred(row, w)

#### **Wrapper**

In [97]:
n_epoch = 30
train_bs = 50
test_n = 400
l_rate = 0.001

# Generate training data
train_x, train_y = train_data(n_epoch, train_bs)

# Generate test data
test_x, test_y = test_data(test_n)

# Train
w = train_sgd(train_x, train_y, l_rate, n_epoch, train_bs)
print(w)


>epoch=0, lrate=0.001, error=-26.948
>epoch=1, lrate=0.001, error=-41.835
>epoch=2, lrate=0.001, error=-68.659
>epoch=3, lrate=0.001, error=-91.413
>epoch=4, lrate=0.001, error=-126.067
>epoch=5, lrate=0.001, error=-158.617
>epoch=6, lrate=0.001, error=-175.112
>epoch=7, lrate=0.001, error=-191.538
>epoch=8, lrate=0.001, error=-221.898
>epoch=9, lrate=0.001, error=-256.173
>epoch=10, lrate=0.001, error=-262.346
>epoch=11, lrate=0.001, error=-286.491
>epoch=12, lrate=0.001, error=-312.569
>epoch=13, lrate=0.001, error=-342.547
>epoch=14, lrate=0.001, error=-364.446
>epoch=15, lrate=0.001, error=-380.274
>epoch=16, lrate=0.001, error=-412.033
>epoch=17, lrate=0.001, error=-433.721
>epoch=18, lrate=0.001, error=-459.339
>epoch=19, lrate=0.001, error=-488.850
>epoch=20, lrate=0.001, error=-516.294
>epoch=21, lrate=0.001, error=-547.631
>epoch=22, lrate=0.001, error=-576.875
>epoch=23, lrate=0.001, error=-600.048
>epoch=24, lrate=0.001, error=-619.165
>epoch=25, lrate=0.001, error=-646.195
