<a href="https://colab.research.google.com/github/lepickle/logistic-regression-in-tensorflow/blob/master/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Overview

This project implements a logistic regression classification in tensorflow 2.0.0 alpha.

# Setup

Import the libraries

In [242]:
#Force installing tensorflow 2.0.0 alpha for this notebook.

!pip install tensorflow==2.0.0alpha 

import tensorflow as tf
import time
from sklearn.datasets import load_breast_cancer

print(tf.__version__)

2.0.0-alpha0


# Model

Setting up a Logistic Regression Model object

In [0]:
# Setup Hyperparams

learning_rate = 0.0001
epochs = 15
batch_size = 128

class LogisticRegressionModel():
  def __init__(self):
    self.W = None
    self.b = None
    self.logit = None
    self.optimizer = tf.optimizers.Adam(learning_rate)

  def initialize_params(self, number_of_features, number_of_classes):
    self.W = tf.Variable(tf.random.normal(shape=[number_of_features, 1])) #weights
    self.b = tf.Variable(tf.random.normal(shape=[1, number_of_classes])) #bias
#     self.W = tf.Variable(tf.zeros([number_of_features, 1]), dtype='float32') #weights
#     self.b = tf.Variable(tf.zeros([1, number_of_classes]), dtype='float32') #bias
   
  def loss_fn(self, logits, labels):
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
    
  def predict(self, batch_features): #hypothesis
#     print("datatype")
    batch_features = tf.cast(batch_features, tf.float32)
#     print(batch_features.dtype)
#     print("=============")
#     print(batch_features.shape)
#     print(self.W.shape)
#     print(self.b.shape)
    logit = tf.matmul(batch_features, self.W) + self.b
#     print(logit.shape)
    return logit
    
  def train_step(self, batch_features, batch_labels):
    with tf.GradientTape() as tape:
      logits = self.predict(batch_features)
      logits = tf.sigmoid(logits)
      loss = self.loss_fn(logits, batch_labels)
      
    #calculating accuracy for this step
    prediction = tf.round(logits)
    correct = tf.cast(tf.equal(prediction, batch_labels), dtype=tf.float32)
    accuracy = tf.reduce_mean(correct)
    print(accuracy)
        
    gradients = tape.gradient(logits, [self.W])
    
    self.optimizer.apply_gradients(zip(gradients, [self.W]))
    
    return loss
        

#Dataset

We use the built in dataset from mnist, fashion-mnist and Wisconsin Breast Cancer Dataset from sklearn.



In [0]:
(xtr, ytr), (xte, yte) = tf.keras.datasets.mnist.load_data()

# Training

We train the model

In [245]:
xtr = xtr.reshape(-1, 784) / 255.
xte = xte.reshape(-1, 784) / 255.

no_of_features = xtr.shape[1]

ytr = tf.one_hot(ytr, 10)
yte = tf.one_hot(yte, 10)

print(xtr.shape)

dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
dataset = dataset.prefetch(len(xtr)//batch_size).shuffle(1024).batch(batch_size, drop_remainder=True)

model = LogisticRegressionModel()
model.initialize_params(no_of_features, 10)

(60000, 784)


In [0]:
loss_trace = []
for epoch in range(epochs):
    count = 0
    for step, (batch_features, batch_labels) in enumerate(dataset):
      train_loss = model.train_step(batch_features, batch_labels)
# print(tf.round(tf.sigmoid(model.predict(xte[1]))))

tf.Tensor(0.75703126, shape=(), dtype=float32)
tf.Tensor(0.73828125, shape=(), dtype=float32)
tf.Tensor(0.7601563, shape=(), dtype=float32)
tf.Tensor(0.753125, shape=(), dtype=float32)
tf.Tensor(0.7601563, shape=(), dtype=float32)
tf.Tensor(0.7890625, shape=(), dtype=float32)
tf.Tensor(0.7421875, shape=(), dtype=float32)
tf.Tensor(0.79765624, shape=(), dtype=float32)
tf.Tensor(0.753125, shape=(), dtype=float32)
tf.Tensor(0.7859375, shape=(), dtype=float32)
tf.Tensor(0.721875, shape=(), dtype=float32)
tf.Tensor(0.71484375, shape=(), dtype=float32)
tf.Tensor(0.7859375, shape=(), dtype=float32)
tf.Tensor(0.75078124, shape=(), dtype=float32)
tf.Tensor(0.71796876, shape=(), dtype=float32)
tf.Tensor(0.70703125, shape=(), dtype=float32)
tf.Tensor(0.746875, shape=(), dtype=float32)
tf.Tensor(0.7609375, shape=(), dtype=float32)
tf.Tensor(0.7367188, shape=(), dtype=float32)
tf.Tensor(0.753125, shape=(), dtype=float32)
tf.Tensor(0.7609375, shape=(), dtype=float32)
tf.Tensor(0.74921876, shape=(), 

In [0]:
(xfr, yfr), (xfe, yfe) = tf.keras.datasets.fashion_mnist.load_data()

xfr = xfr.reshape(-1, 784) / 255.
xfe = xte.reshape(-1, 784) / 255.

no_of_features = xtr.shape[1]

yfr = tf.one_hot(yfr, 10)
yfe = tf.one_hot(yfe, 10)

print(xtr.shape)

dataset = tf.data.Dataset.from_tensor_slices((xfr,yfr))
dataset = dataset.prefetch(len(xfr)//batch_size).shuffle(1024).batch(batch_size, drop_remainder=True)

model = LogisticRegressionModel()
model.initialize_params(no_of_features, 10)

for epoch in range(epochs):
  count = 0
  for step, (batch_features, batch_labels) in enumerate(dataset):
    train_loss = model.train_step(batch_features, batch_labels)

In [0]:
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()


In [0]:
xbr = data.data[:500]
ybr = data.target[:500]

# print(len(xbr))
# print(len(ybr))

print(xbr.shape[1])
no_of_features = xbr.shape[1]

xbe = data.data[500:569]
ybe = data.target[500:569]

# print(len(xbe))
# print(len(ybe))

In [0]:
# print(ybr)
ybr = tf.one_hot(ybr, 2)
ybe = tf.one_hot(ybe, 2)

# print(ybr.shape)

dataset = tf.data.Dataset.from_tensor_slices((xbr,ybr))
dataset = dataset.prefetch(len(xbr)//batch_size).shuffle(1024).batch(batch_size, drop_remainder=True)

model = LogisticRegressionModel()
model.initialize_params(no_of_features, 2)

for epoch in range(epochs):
  count = 0
  for step, (batch_features, batch_labels) in enumerate(dataset):
    train_loss = model.train_step(batch_features, batch_labels)
    print(train_loss)