In [28]:
#! pip install tensorflow

In [2]:
import tensorflow as tf
import time
print(tf.__version__)

2.18.0


In [None]:
print(tf.reduce_sum(tf.random.normal([1000, 1000])))

tf.Tensor(-1127.5006, shape=(), dtype=float32)


In [14]:
print(tf.config.list_physical_devices('GPU'))

[]


# Test on [tutorial](https://www.tensorflow.org/tutorials/quickstart/beginner) to tensorflow

In [5]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0


In [22]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [7]:
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.80671585, -0.31528577, -0.01968774,  0.21175817, -0.5280927 ,
         0.17245866,  0.50177115,  0.48069972,  0.14259987,  0.135178  ]],
      dtype=float32)

In [8]:
tf.nn.softmax(predictions).numpy()


array([[0.17879556, 0.05822073, 0.07824457, 0.09862111, 0.04706052,
        0.09482052, 0.13180168, 0.12905349, 0.09203115, 0.09135063]],
      dtype=float32)

In [9]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)


In [10]:
loss_fn(y_train[:1], predictions).numpy()


np.float32(2.3557694)

In [24]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])


In [26]:
start = time.perf_counter()
model.fit(x_train, y_train, epochs=5)
print(f"Time the code was running on CPU: {round(time.perf_counter() - start,2)} seconds")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 648us/step - accuracy: 0.9961 - loss: 0.0139
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635us/step - accuracy: 0.9962 - loss: 0.0115
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 640us/step - accuracy: 0.9972 - loss: 0.0089
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 636us/step - accuracy: 0.9965 - loss: 0.0102
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 639us/step - accuracy: 0.9969 - loss: 0.0109
Time the code was running on CPU: 6.14 seconds


In [13]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - 679us/step - accuracy: 0.9758 - loss: 0.0751


[0.07514670491218567, 0.9757999777793884]

In [15]:
tf.config.list_physical_devices(
    device_type=None
)


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

### enabling gpu support

In [21]:
!nvidia-smi

Thu Mar  6 17:57:53 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...    Off |   00000000:01:00.0  On |                  N/A |
|  0%   54C    P3             36W /  220W |    2184MiB /  12282MiB |     34%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [30]:
#!python3 -m pip install 'tensorflow[and-cuda]'

In [32]:
tf.config.list_physical_devices(
    device_type=None
)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

### I had to download cuda toolkit and cuDNN SKD

I think it would be much easier to use docker 

In [3]:
tf.config.list_physical_devices(
    device_type=None
)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [30]:
start = time.perf_counter()
with tf.device('/gpu:0'):
    model.fit(x_train, y_train, epochs=5)
print(f"Time the code was running on GPU: {round(time.perf_counter() - start,2)} seconds")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 702us/step - accuracy: 0.9950 - loss: 0.0133
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 661us/step - accuracy: 0.9959 - loss: 0.0124
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 663us/step - accuracy: 0.9942 - loss: 0.0164
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 660us/step - accuracy: 0.9953 - loss: 0.0144
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 657us/step - accuracy: 0.9955 - loss: 0.0128
Time the code was running on GPU: 6.49 seconds
