
# Step 1.2 Import the neccessary packages.
 Please refer to the section, Importing the packages in the book for further details. 

In [1]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchmetrics.functional import accuracy

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

**Note**: The `Variable` API is deprecated.

Initialized tensors with `requires_grad=True`.
However, only Tensors of floating point and complex dtype can require gradients!

original code
```
xor_input = [Variable(torch.Tensor([0, 0])),
           Variable(torch.Tensor([0, 1])),
           Variable(torch.Tensor([1, 0])),
           Variable(torch.Tensor([1, 1]))]
xor_target = [Variable(torch.Tensor([0])),
           Variable(torch.Tensor([1])),
           Variable(torch.Tensor([1])),
           Variable(torch.Tensor([0]))]
```

In [13]:
xor_input = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]])

In [14]:
xor_target = torch.tensor([[0], [1], [1], [0]])

In [15]:
xor_data = list(zip(xor_input, xor_target))
train_loader = DataLoader(xor_data, batch_size=1000)

# Model definition

In [11]:
class XORModel(pl.LightningModule):
  def __init__(self):

    super(XORModel,self).__init__()
    
    self.input_layer = nn.Linear(2, 4)
    self.output_layer = nn.Linear(4,1)

    self.sigmoid = nn.Sigmoid()

    self.loss = nn.MSELoss()

  def forward(self, input):
    #print("INPUT:", input.shape)
    x = self.input_layer(input)
    #print("FIRST:", x.shape)
    x = self.sigmoid(x)
    #print("SECOND:", x.shape)
    output = self.output_layer(x)
    #print("THIRD:", output.shape)
    return output

  def configure_optimizers(self):
    params = self.parameters()
    optimizer = optim.Adam(params=params, lr = 0.01)
    return optimizer

  def training_step(self, batch, batch_idx):
    xor_input, xor_target = batch
    #print("XOR INPUT:", xor_input.shape)
    #print("XOR TARGET:", xor_target.shape)
    outputs = self(xor_input) 
    #print("XOR OUTPUT:", outputs.shape)
    loss = self.loss(outputs, xor_target)
    return loss 

In [12]:
from pytorch_lightning.utilities.types import TRAIN_DATALOADERS
checkpoint_callback = ModelCheckpoint()
model = XORModel()

trainer = pl.Trainer(max_epochs=500, callbacks=[checkpoint_callback])

trainer.fit(model, train_dataloaders=train_loader)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA A100-PCIE-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name         | Type    | Params
-----------------------------------------
0 | input_layer  | Linear  | 12    
1 | output_layer | Linear  | 5     
2 | sigmoid

Epoch 0:   0%|                                                                                                                                  | 0/1 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 must have the same dtype, but got Long and Float

In [10]:
ls lightning_logs/

[0m[01;34mversion_0[0m/  [01;34mversion_1[0m/


In [11]:
ls lightning_logs/*/

lightning_logs/version_0/:
[0m[01;34mcheckpoints[0m/
events.out.tfevents.1651698347.3efc55c883d6.60.0
events.out.tfevents.1651698361.3efc55c883d6.60.1
hparams.yaml

lightning_logs/version_1/:
[01;34mcheckpoints[0m/  events.out.tfevents.1651700398.3efc55c883d6.609.0  hparams.yaml


In [12]:
print(checkpoint_callback.best_model_path)

/content/lightning_logs/version_1/checkpoints/epoch=499-step=499.ckpt


In [13]:
print(checkpoint_callback.best_model_path)
train_model = model.load_from_checkpoint(checkpoint_callback.best_model_path)
test = torch.utils.data.DataLoader(xor_input, batch_size=1)
for val in xor_input:
  _ = train_model(val)
  print([int(val[0]),int(val[1])], int(_.round()))

/content/lightning_logs/version_1/checkpoints/epoch=499-step=499.ckpt
[0, 0] 0
[0, 1] 1
[1, 0] 1
[1, 1] 0


In [14]:
print(checkpoint_callback.best_model_path)
train_model = model.load_from_checkpoint(checkpoint_callback.best_model_path)
total_accuracy = []
for xor_input, xor_target in train_loader:
  for i in range(100):
    output_tensor = train_model(xor_input)
    test_accuracy = accuracy(output_tensor, xor_target.int())
    total_accuracy.append(test_accuracy)
total_accuracy = torch.mean(torch.stack(total_accuracy))
print("TOTAL ACCURACY FOR 100 ITERATIONS: ", total_accuracy.item())

/content/lightning_logs/version_1/checkpoints/epoch=499-step=499.ckpt
TOTAL ACCURACY FOR 100 ITERATIONS:  1.0
