# Tensorflow and PyTorch Setup
- CUDA: 11.2
- cuDNN : 8.6
- TF: tensorflow==2.10.1
- Torch: `conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch`
- Python: 3.7.9

### CUDA Setup
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\lib
- C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include
- `nvcc --version`
- `nvidia-smi`

### Ref
[Tensorflow CUDA 버전 확인](https://www.tensorflow.org/install/source_windows)  
[PyTorch 버전 확인](https://pytorch.org/get-started/previous-versions/)

In [1]:
import tensorflow as tf
tf.__version__

'2.10.1'

In [2]:
# TF GPU
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 7785058835015709668
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 6254755840
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 13205256602889981293
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2070 SUPER, pci bus id: 0000:09:00.0, compute capability: 7.5"
 xla_global_id: 416903419]

In [3]:
import torch
torch.__version__

'1.12.1'

In [4]:
# Torch GPU
"CUDA" if torch.cuda.is_available() else "CPU"

'CUDA'

In [6]:
# TF GPU Use
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

with tf.device("/device:GPU:0"):
    model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test,  y_test, verbose=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
313/313 - 1s - loss: 0.0763 - accuracy: 0.9779 - 893ms/epoch - 3ms/step


[0.07627550512552261, 0.9779000282287598]

In [7]:
# Torch GPU Use
import math

print(torch.__version__) # torch version 출력

dtype = torch.float
# device = torch.device("cpu")
device = torch.device("cuda") # Uncomment this to run on GPU, GPU 를 사용하므로 해당 라인 실행

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

1.12.1
99 2150.86962890625
199 1434.52880859375
299 958.1357421875
399 641.2009887695312
499 430.2693786621094
599 289.8294982910156
699 196.28317260742188
799 133.9442596435547
899 92.38213348388672
999 64.65807342529297
1099 46.15502166748047
1199 33.79905700683594
1299 25.54328155517578
1399 20.023681640625
1499 16.33104705810547
1599 13.858994483947754
1699 12.202949523925781
1799 11.092704772949219
1899 10.347822189331055
1999 9.84769058227539
Result: y = -0.017719391733407974 + 0.8301140666007996 x + 0.003056889632716775 x^2 + -0.08954295516014099 x^3
