## Softmax function

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.layers import Dense # type: ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.losses import SparseCategoricalCrossentropy # type: ignore
from tensorflow.keras.optimizers import Adam # type: ignore
from my_samples_generator import make_blobs
from lab_utils_softmax import plt_softmax

np.set_printoptions(precision=2)

# Allows us to manage and control the log messages
import logging
# Sets the logging level for tensorflow to show only errors
logging.getLogger("tensorflow").setLevel(logging.ERROR)
# Controls the verbosity of tensorflow autograph i.e; python code to tensorflow graph code
tf.autograph.set_verbosity(0)
# Disables internal logging or output messages

In [None]:
def my_softmax(z):
  e_z=np.exp(z)
  sm=e_z/np.sum(e_z)
  return (sm)

In [None]:
plt_softmax(my_softmax)

In [None]:
centers=[[-5, 2], [-2, -2], [1, 2], [5, -2]]

In [None]:
x_train, y_train=make_blobs(n_samples=2000, centers=centers, cluster_std=1.0, random_state=30)

### Traditional model training

In [None]:
model=Sequential(
  [
    Dense(units=25, activation='relu'),
    Dense(units=15, activation='relu'),
    Dense(units=4, activation='softmax')
  ]
)

In [None]:
model.compile(
  loss=SparseCategoricalCrossentropy(),
  optimizer=Adam(learning_rate=0.001)
)

In [None]:
model.fit(
  x_train, y_train,
  epochs=10
)

In [None]:
softmax_traditional=model.predict(x_train)
print(softmax_traditional[:2])
print(np.max(softmax_traditional), np.min(softmax_traditional))

### Preferred model training

In [None]:
model=Sequential(
  [
    Dense(units=25, activation='relu'),
    Dense(units=15, activation='relu'),
    Dense(units=4, activation='linear')
  ]
)

In [None]:
model.compile(
  loss=SparseCategoricalCrossentropy(from_logits=True),
  optimizer=Adam(learning_rate=0.001)
)

In [None]:
model.fit(
  x_train, y_train,
  epochs=10
)

In [None]:
softmax_preferred=model.predict(x_train) # the outputs are not probabilities

softmax_preferred_prob=tf.nn.softmax(softmax_preferred).numpy()
print(softmax_preferred_prob[:2])
print(np.max(softmax_preferred_prob), np.min(softmax_preferred_prob))

In [None]:
for i in range(5):
  print(f"{softmax_preferred[i]}, category: {np.argmax(softmax_preferred[i])}")

Tensorflow has two potential formats for target values and the selection of the loss defines which is expected.
- SparseCategorialCrossentropy: expects the target to be an integer corresponding to the index. For example, if there are 10 potential target values, y would be between 0 and 9. 
- CategoricalCrossEntropy: Expects the target value of an example to be one-hot encoded where the value at the target index is 1 while the other N-1 entries are zero. An example with 10 potential target values, where the target is 2 would be [0,0,1,0,0,0,0,0,0,0].