# Tensorflow and PyTorch Setup
- CUDA: 11.2
- cuDNN : 8.6
- TF: tensorflow==2.10.1
- Torch: `conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch`
- Python: 3.7.9

### CUDA Setup
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\lib
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include
- `nvcc --version`
- `nvidia-smi`

### Ref
[Tensorflow CUDA 버전 확인](https://www.tensorflow.org/install/source_windows)  
[PyTorch 버전 확인](https://pytorch.org/get-started/previous-versions/)

In [1]:
import tensorflow as tf
tf.__version__

'2.6.0'

In [2]:
# TF GPU
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3571914208852834554,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 14053015552
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 16429436950123834166
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4080, pci bus id: 0000:01:00.0, compute capability: 8.9"]

In [5]:
import torch
torch.__version__

'1.12.1'

In [6]:
# Torch GPU
"CUDA" if torch.cuda.is_available() else "CPU"

'CUDA'

In [4]:
# TF GPU Use
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

with tf.device("/device:GPU:0"):
    model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test,  y_test, verbose=2)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 0s - loss: 0.0777 - accuracy: 0.9759


[0.07766376435756683, 0.9758999943733215]

In [7]:
# Torch GPU Use
import math

print(torch.__version__) # torch version 출력

dtype = torch.float
# device = torch.device("cpu")
device = torch.device("cuda") # Uncomment this to run on GPU, GPU 를 사용하므로 해당 라인 실행

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

1.12.1
99 665.2333984375
199 473.0482177734375
299 337.1463317871094
399 241.0374298095703
499 173.067626953125
599 124.9959487915039
699 90.99603271484375
799 66.94786071777344
899 49.93804168701172
999 37.9062385559082
1099 29.395362854003906
1199 23.374919891357422
1299 19.116058349609375
1399 16.103254318237305
1499 13.971904754638672
1599 12.46407699584961
1699 11.397354125976562
1799 10.64267349243164
1899 10.108749389648438
1999 9.730995178222656
Result: y = -0.03195663541555405 + 0.858229398727417 x + 0.005513053387403488 x^2 + -0.09354212880134583 x^3
