In [28]:
#! pip install tensorflow

In [2]:
import tensorflow as tf
import time
print(tf.__version__)

2.18.0


In [None]:
print(tf.reduce_sum(tf.random.normal([1000, 1000])))

tf.Tensor(-1127.5006, shape=(), dtype=float32)


In [14]:
print(tf.config.list_physical_devices('GPU'))

[]


# Test on [tutorial](https://www.tensorflow.org/tutorials/quickstart/beginner) to tensorflow

In [5]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0


In [22]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [7]:
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.80671585, -0.31528577, -0.01968774,  0.21175817, -0.5280927 ,
         0.17245866,  0.50177115,  0.48069972,  0.14259987,  0.135178  ]],
      dtype=float32)

In [8]:
tf.nn.softmax(predictions).numpy()


array([[0.17879556, 0.05822073, 0.07824457, 0.09862111, 0.04706052,
        0.09482052, 0.13180168, 0.12905349, 0.09203115, 0.09135063]],
      dtype=float32)

In [9]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)


In [10]:
loss_fn(y_train[:1], predictions).numpy()


np.float32(2.3557694)

In [24]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])


In [26]:
start = time.perf_counter()
model.fit(x_train, y_train, epochs=5)
print(f"Time the code was running on CPU: {round(time.perf_counter() - start,2)} seconds")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 648us/step - accuracy: 0.9961 - loss: 0.0139
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 635us/step - accuracy: 0.9962 - loss: 0.0115
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 640us/step - accuracy: 0.9972 - loss: 0.0089
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 636us/step - accuracy: 0.9965 - loss: 0.0102
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 639us/step - accuracy: 0.9969 - loss: 0.0109
Time the code was running on CPU: 6.14 seconds


In [13]:
model.evaluate(x_test,  y_test, verbose=2)

313/313 - 0s - 679us/step - accuracy: 0.9758 - loss: 0.0751


[0.07514670491218567, 0.9757999777793884]

In [15]:
tf.config.list_physical_devices(
    device_type=None
)


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

### enabling gpu support

In [21]:
!nvidia-smi

Thu Mar  6 17:57:53 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 ...    Off |   00000000:01:00.0  On |                  N/A |
|  0%   54C    P3             36W /  220W |    2184MiB /  12282MiB |     34%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [30]:
#!python3 -m pip install 'tensorflow[and-cuda]'

In [32]:
tf.config.list_physical_devices(
    device_type=None
)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

### I had to download cuda toolkit and cuDNN SKD

I think it would be much easier to use docker 

In [3]:
tf.config.list_physical_devices(
    device_type=None
)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
start = time.perf_counter()
with tf.device('/gpu:0'):
    model.fit(x_train, y_train, epochs=20)
print(f"Time the code was running on GPU: {round(time.perf_counter() - start,2)} seconds")


Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 705us/step - accuracy: 0.9943 - loss: 0.0168
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 661us/step - accuracy: 0.9938 - loss: 0.0178
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 663us/step - accuracy: 0.9936 - loss: 0.0178
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 679us/step - accuracy: 0.9948 - loss: 0.0146
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 675us/step - accuracy: 0.9935 - loss: 0.0188
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673us/step - accuracy: 0.9936 - loss: 0.0172
Epoch 7/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 672us/step - accuracy: 0.9935 - loss: 0.0178
Epoch 8/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 669us/step - accuracy: 0.9945 - loss: 0.0174
Epoch 9/

In [31]:
start = time.perf_counter()
with tf.device('/gpu:0'):
    model.fit(x_train, y_train, epochs=20)
print(f"Time the code was running on GPU: {round(time.perf_counter() - start,2)} seconds")

Epoch 1/20


2025-03-06 18:55:12.173358: W tensorflow/core/framework/op_kernel.cc:1841] OP_REQUIRES failed at xla_ops.cc:515 : INVALID_ARGUMENT: Trying to access resource adam/sequential_1_dense_2_kernel_momentum/132 (defined @ /home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/core.py:39) located in device /job:localhost/replica:0/task:0/device:CPU:0 from device /job:localhost/replica:0/task:0/device:GPU:0
 Cf. https://www.tensorflow.org/xla/known_issues#tfvariable_on_a_different_device


InvalidArgumentError: Graph execution error:

Detected at node StatefulPartitionedCall defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 205, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 534, in process_one

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 362, in execute_request

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 778, in execute_request

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 449, in do_execute

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3044, in run_cell

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3099, in _run_cell

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 128, in _pseudo_sync_runner

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3303, in run_cell_async

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3486, in run_ast_nodes

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3546, in run_code

  File "/tmp/ipykernel_37825/1974660710.py", line 3, in <module>

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

Trying to access resource adam/sequential_1_dense_2_kernel_momentum/132 (defined @ /home/aaa/Desktop/deep_learning/ConvolutionalNeuralNeutworks/.venv/lib/python3.11/site-packages/keras/src/backend/tensorflow/core.py:39) located in device /job:localhost/replica:0/task:0/device:CPU:0 from device /job:localhost/replica:0/task:0/device:GPU:0
 Cf. https://www.tensorflow.org/xla/known_issues#tfvariable_on_a_different_device
	 [[{{node StatefulPartitionedCall}}]] [Op:__inference_multi_step_on_iterator_394009]