In [1]:
import tensorflow as tf
print(tf.__version__)

2024-03-05 21:58:45.446229: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-05 21:58:45.448018: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-05 21:58:45.475421: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-05 21:58:45.475464: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-05 21:58:45.476261: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

2.15.0


In [2]:
#working with MNIST dataset. This dataset contains 70,000 images of handwritten digits
#each image is 28x28 pixels
#each image is labeled with the digit it represents
#the dataset is split into 60,000 images for training and 10,000 images for testing
mnist=tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

#normalize the data
x_train, x_test = x_train/255.0, x_test/255.0 #this is because the pixel values are between 0 and 255 and we want them between 0 and 1


In [3]:
#Building a machine learning model
#the basic building block of a neural network is the layer. Layers extract representations from the data fed into them
#hopefully these representations are more meaningful for the problem at hand
#most of deep learning consists of chaining together simple layers
#most layers, such as tf.keras.layers.Dense, have parameters that are learned during training

#Our model will be a keiras sequential model
#this means that the layers are stacked sequentially
#there are other models such as functional API which is more flexible
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),#the flatten layer transforms the format of the images from a 2d array (of 28 by 28 pixels), to a 1d array (of 28 * 28 = 784 pixels)
  tf.keras.layers.Dense(128, activation='relu'),#this layer has 128 nodes. The dense layer is fully connected to the previous layer and each node contains a score that indicates the current image belongs to one of the 10 classes. The relu activation function is used here which means it will return 0 if it receives any negative input, but for any positive value x it will return that value back
  tf.keras.layers.Dropout(0.2),#the dropout layer helps prevent overfitting by randomly dropping out nodes in the layer during training. This forces the network to learn features in a distributed way and makes the network more robust and able to generalize better to new data. Here 0.2 means 20% of the nodes will be randomly dropped out during training. 
  tf.keras.layers.Dense(10)# This layer returns a logits array with length of 10. Each node contains a score that indicates the current image belongs to one of the 10 classes
])


In [4]:
predictions = model(x_train[:1]).numpy()# predictions is an array of 10 numbers. These describe the "confidence" of the model that the image corresponds to each of the 10 different digits. We can see which label has the highest confidence value using tf.argmax which returns the index of the highest value in a tensor
predictions

array([[ 0.24166276,  0.1667478 ,  0.20261912,  0.4292291 , -0.91619045,
         0.13665693,  0.10130789, -0.63811415, -0.15631399,  0.20399971]],
      dtype=float32)

In [5]:
tf.nn.softmax(predictions).numpy()#the softmax function converts these logits to "probabilities" for each class (we can see that all the probabilities add up to 1) and from these probabilities we can see which label has the highest confidence value using tf.argmax which returns the index of the highest value in a tensor (in this case the index of the highest probability) 


array([[0.12152202, 0.11275086, 0.11686879, 0.14659327, 0.03817735,
        0.10940863, 0.1056087 , 0.05041651, 0.08162362, 0.11703026]],
      dtype=float32)

In [6]:
# Loss function — This measures how accurate the model is during training. We want to minimize this function to "steer" the model in the right direction.
#Define a loss function for training using losses.SparseCategoricalCrossentropy: This loss function takes a vector of logits and a True index and returns a scalar loss for each example.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
loss_fn(y_train[:1], predictions).numpy()#this loss is equal to the negative log probability of the true class: It is zero if the model is sure of the correct class.

#This untrained model gives probabilities close to random (1/10 for each class), so the initial loss should be close to -tf.math.log(1/10) ~= 2.3.

2.2126656

In [7]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [8]:
#Train and evaluate your model. Model.fit method adjusts the model parameters to minimize the loss:
model.fit(x_train, y_train, epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7f3630310e50>

In [9]:
#The Model.evaluate method checks the models performance, usually on a "Validation-set" or "Test-set".
model.evaluate(x_test,  y_test, verbose=2)
#The image classifier is now trained to ~98% accuracy on this dataset. To learn more, read the TensorFlow tutorials.

313/313 - 0s - loss: 0.0715 - accuracy: 0.9771 - 316ms/epoch - 1ms/step


[0.0714721828699112, 0.9771000146865845]

In [10]:
#If you want your model to return a probability, you can wrap the trained model, and attach the softmax to it:

probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])


In [11]:
probability_model(x_test[:5])


<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[5.5606710e-09, 4.3668880e-08, 5.6750337e-06, 4.6325236e-05,
        5.7071352e-14, 7.0635139e-08, 3.1664534e-13, 9.9994683e-01,
        1.6604515e-08, 9.7783891e-07],
       [1.4749983e-07, 2.3057107e-04, 9.9975675e-01, 1.0528538e-05,
        5.1272875e-13, 1.3566871e-06, 4.8074452e-07, 1.1656961e-13,
        2.0351305e-07, 2.0309006e-12],
       [1.0752734e-06, 9.9972886e-01, 4.9349146e-05, 1.3450093e-05,
        3.1764572e-05, 1.2572569e-05, 1.1049490e-05, 6.4631662e-05,
        8.6677297e-05, 5.9389379e-07],
       [9.9993229e-01, 4.1204342e-09, 1.1793086e-06, 1.1091148e-08,
        1.5035042e-06, 1.0082309e-05, 3.4847910e-06, 1.1350709e-05,
        2.1858348e-09, 4.0005623e-05],
       [3.6894767e-07, 2.2746416e-08, 1.4881365e-05, 4.8372140e-08,
        9.9551207e-01, 1.2073768e-06, 2.6374707e-06, 3.7511421e-05,
        4.5961139e-07, 4.4306708e-03]], dtype=float32)>

In [12]:
x_test[:5].shape

(5, 28, 28)