# Linear Classifiers

*Note:*
* *A **linear transform** is of the form* `W•x`. 
* *An **affine transform** is a linear transform plus a constant (translation)* `W•x + b`. 

A **linear classifier** is an affine transformation trained to minimize the square of the difference between it's predictions and the targets:

`prediction = W•input + b`

Training a linear classifier is about finding `W` and `b` such that the resulting line separates two classes of data.

## Implementing a linear classifier from scatch

*Note: The covariance of multivariate_normal() describes the shape of the point cloud.*

In [None]:
# Creating nicely linearly separable synthetic data to use in our example
import numpy as np
import matplotlib.pyplot as plt

num_samples_per_class = 1000
negative_samples = np.random.multivariate_normal(
    mean=[3,0],
    cov=[[1, 0.5], [0.5, 1]],
    size=num_samples_per_class
) # shape: (1000, 2)
positive_samples = np.random.multivariate_normal(
    mean=[0,3],
    cov=[[1, 0.5], [0.5, 1]],
    size=num_samples_per_class
) # shape: (1000, 2)
inputs = np.vstack((negative_samples, positive_samples)).astype(np.float32)
targets = np.vstack((np.zeros((num_samples_per_class, 1), dtype=np.float32),
                     np.ones((num_samples_per_class, 1), dtype=np.float32)))
plt.scatter(inputs[:,0], inputs[:,1], c=targets[:,0])
plt.show()

In [None]:
import tensorflow as tf

# Initialize the weights
input_dim = 2
output_dim = 1
W = tf.Variable(initial_value=tf.random.uniform(shape=(input_dim, output_dim)))
b = tf.Variable(initial_value=tf.zeros(shape=(output_dim,)))

In [None]:
# Define the model
def model(inputs):
    return tf.matmul(inputs, W) + b

In [None]:
# Define the loss
def calc_loss(targets, predictions):
    per_sample_loss = tf.square(targets - predictions)
    return tf.reduce_mean(per_sample_loss)

In [None]:
# Gradient descent - Define one training step (move the weights in the opposite direction of the loss wrt W, b)
learning_rate = 0.1
def training_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        loss = calc_loss(targets, predictions)
    grad_loss_wrt_W, grad_loss_wrt_b = tape.gradient(loss, [W, b])
    W.assign_sub(grad_loss_wrt_W * learning_rate)
    b.assign_sub(grad_loss_wrt_b * learning_rate)
    return loss

In [None]:
# Train the model
for step in range(40):
    loss = training_step(inputs, targets)
    print(f"Loss at step {step} : {loss:.4f}")