In [52]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [53]:
x_data = np.array(
    [[1, 2, 1, 1],
     [2, 1, 3, 2],
     [3, 1, 3, 4],
     [4, 1, 5, 5],
     [1, 7, 5, 5],
     [1, 2, 5, 6],
     [1, 6, 6, 6],
     [1, 7, 7, 7]], dtype=np.float32)

y_data = np.array(
    [[0, 0, 1],
     [0, 0, 1],
     [0, 0, 1],
     [0, 1, 0],
     [0, 1, 0],
     [0, 1, 0],
     [1, 0, 0],
     [1, 0, 0]], dtype=np.float32
)

$\theta_{11} x_1 + \theta_{12} x_2 + \theta_{13} x_3 + \theta_{14} x_4 + \theta_{10}$ 

$\theta_{21} x_1 + \theta_{22} x_2 + \theta_{23} x_3 + \theta_{24} x_4 + \theta_{20}$ 

$\theta_{31} x_1 + \theta_{32} x_2 + \theta_{33} x_3 + \theta_{34} x_4 + \theta_{30}$ 

$\begin{bmatrix}
\theta_{11} & \theta_{12} & \theta_{13} & \theta_{14}\\ 
\theta_{21} & \theta_{22} & \theta_{23} & \theta_{24}\\ 
\theta_{31} & \theta_{32} & \theta_{33} & \theta_{34}\\ 
\end{bmatrix}
\begin{bmatrix}
x_1\\x_2\\x_3\\x_4
\end{bmatrix} + 
\begin{bmatrix}
\theta_{10}\\\theta_{20}\\\theta_{30}
\end{bmatrix}$

In [54]:
class SoftmaxRegression(tf.Module):
  def __init__(self, num_features, num_classes, name=None):
    super().__init__(name=name)
    self.W = tf.Variable(tf.random.normal((num_features, num_classes)), name='weight')
    self.b = tf.Variable(tf.random.normal((num_classes, )), name='bias')

  def __call__(self,X):
    return tf.nn.softmax(tf.matmul(X, self.W) + self.b)

In [55]:
def loss(y_true, y_predicted):
  cost = -tf.reduce_sum(y_true*tf.math.log(y_predicted), axis = 1)
  return tf.reduce_mean(cost)

In [56]:
def train(model, x, y, learning_rate = 0.1):
  with tf.GradientTape() as tape:
    current_loss = loss(y, model(x))
  
  d_W, d_b = tape.gradient(current_loss, [model.W, model.b])

  model.W.assign_sub(learning_rate * d_W)
  model.b.assign_sub(learning_rate * d_b)

In [57]:
def training_loop(model, x, y, epochs, alpha):
  for epoch in range(1, epochs+1):
    train(model, x, y, learning_rate = alpha)

In [58]:
model = SoftmaxRegression(4,3)
training_loop(model, x_data, y_data , 2000, 0.01)

In [62]:
y_predicted = model(x_data)

In [63]:
tf.argmax(y_predicted,1), tf.argmax(y_data,1)

(<tf.Tensor: shape=(8,), dtype=int64, numpy=array([2, 2, 2, 1, 0, 1, 0, 0])>,
 <tf.Tensor: shape=(8,), dtype=int64, numpy=array([2, 2, 2, 1, 1, 1, 0, 0])>)