#### Author: Prakash C. Sukhwal
#### July 2021
#### Associated Lecturer & Consultant
#### Institute of Systems Science, NUS

---



## This notebook is deisgned to share some of the most important set of functionalities relevant to the Colab and PyTorch used for this course

---



## Shortcuts for Colab

<img src="https://drive.google.com/uc?id=128WKhxbyfKGM4HhhV7I9M1hOrFOXtFNo" alt="image" 
    width="600" 
    height="280" class="center">

---



## Turn code auto-complete on for colab

---



In [None]:
## turn on the autocomplete if off by default
%config use_jedi = False

## Link to Google Drive to access data 
---

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')


# Import the necessary libraries and understand the need

---




In [None]:
# import necessary libraries-1 
import numpy as np # numpy arrays, matrix operations etc.
import pandas as pd # read the datafiles
import io # for any input/output operations
from sklearn.preprocessing import StandardScaler # normalize your features
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# import necessary libraries-2
import torch # main torch library
import torch.nn as nn # all neural network classes and functions
import torch.nn.functional as F # all neural network classes and functions (stateless compared to torch.nn)
from torch.utils.data import Dataset # use it to create custom pytorch dataset
from torch.utils.data import DataLoader # use it for shuffling rows, batch processing, passing dataset etc.


<img src="https://drive.google.com/uc?id=1ejibPAdSdKPwLqlCZjgI87-PXbbu1Dy7" alt="image" 
    width="600" 
    height="400" class="center">

img source: https://www.mygreatlearning.com/blog/computer-vision-using-pytorch/

## Data Upload (2 Ways) and access

---



In [None]:
## Method-1
# let us say you have a file cars.csv on your google drive folder named data
# you can import it as below
cars = pd.read_csv('/content/gdrive/MyDrive/Deep Learning Apr 2021/ISS_AI_Course1/Pre-Read_Torch/data/cars.csv')
cars.head(4)

In [None]:
## Method-2
# upload multiple files interactively to google drive and access the data
from google.colab import files
uploaded = files.upload()

In [None]:
# following code will iterate through the uploaded files and provide their key names:
for fn in uploaded.keys():
  print('Found file(s) "{name}"'.format(name=fn))

In [None]:
# access the cars file
df = pd.read_csv(io.StringIO(uploaded['cars.csv'].decode('utf-8')))
df.head(4)

## Installing libraries to colab
---

In [None]:
# uncomment the example if needed to install
# note the '!' symbol
#!pip install sentencepiece

# PyTorch

<img src="https://drive.google.com/uc?id=1yZ7BUgpBtfvqNFkQIHh1AQAMPCMpwfOV" alt="image" 
    width="600" 
    height="350" class="center">

img source: 

https://www.kaggle.com/uvxy1234/cifar-10-implementation-with-pytorch

## Check the version of pytorch

---

In [None]:
# check you version type the below commands in your notebook
torch.__version__

## Setup GPU

    CUDA: CUDA is a parallel computing platform by NVIDIA. 
    Enables increases in computing performance via graphics processing unit (GPU/TPU).
---

In [None]:
# in colab you can set GPU easily. Go to Runtime ==> Change runtime type ==> select GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(device)
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))

cuda
0
Tesla T4


In [None]:
# Returns the current GPU memory usage by 
# tensors in bytes for a given device
torch.cuda.memory_allocated()

0

### Create and manipulate tensors
    Similar to numpy arrays tensors are a generalisation of matrices 
    Tensors are used in linear algebra and are represented using n-dimensional arrays

<img src="https://drive.google.com/uc?id=18V4YMTTWA_1Ux8HshA3LeKbIpobICFfb" alt="image" 
    width="600" 
    height="450" class="center">

img source: https://learnopencv.com/pytorch-for-beginners-basics/

## Creating various tensors from scractch 

## 0-D

---



In [None]:
# 0-D tensor
t0 = torch.tensor(3.14159)  
print(t0)
print(t0.size()) # also try print(t1.shape)
print(t0.dtype)

In [None]:
# rank
len(t0.shape)

## 1-D

---


In [None]:
# 1-D tensor
t1 = torch.randint( low=1, high=4, size=(3,))
print(t1)
print(t1.size()) # also try print(t1.shape)

In [None]:
# rank
len(t1.shape)

In [None]:
# how many elements 
t1.numel()

## 2-D

---


In [None]:
# 2-D tensor:  3 x 5 matrix which is empty
t1 = torch.empty(3, 5)
print(t1)

In [None]:
print(t1.shape)
print(t1.shape[0])
print(t1.shape[1])

In [None]:
# tensor of ones with data type assigned

t1 = torch.ones(3, 5, dtype=torch.float64)

print(t1)
print(t1.shape)
print(t1.dtype)
print(t1.data)

#### pytorch dtypes: https://pytorch.org/docs/stable/tensor_attributes.html

In [None]:
# tensor of zeros
t2 = torch.zeros(3, 5, dtype=torch.float64)
print(t2)
print(t2.data)

In [None]:
# how many elements 
t2.numel()

In [None]:
# tensor similar to t2
t3 = torch.rand_like(t2)
print(t3)

In [None]:
print(t3.ndim)
print(t3.stride())
print(t3.element_size())
print(t3.type())

## 3-D

---


In [None]:
# 3-D tensor: (3, 2, 5)  (3 channles,2 rows,2 columns)
t3d = torch.randn(3, 2, 5)
print(t3d)
print(t3d.shape)
print(t3d.dtype)
print(t3d.data)

## Creating various tensors from numpy arrays and back

In [None]:
n1 = np.array([[1, 2,3,4],[11, 12,13,14]])
print(n1)
print(n1.shape)
print(n1.dtype)

In [None]:
tn1 = torch.from_numpy(n1)
print(tn1)
print(tn1.shape)
print(tn1.dtype)
print(tn1.data)

In [None]:
# tensor to numpy
tn1.numpy()

In [None]:
# tensor to a dataframe
pd.DataFrame(tn1).astype("float")

## Using GPU

In [None]:
# GPU
t = torch.randint(3, 5, (2,2)).cuda()
print(t)

tensor([[3, 3],
        [4, 3]], device='cuda:0')


In [None]:
print(t.device)
print(torch.cuda.memory_allocated())

cuda:0
512


## Access data

---

In [None]:
a = torch.randint(3, 10, (3,6))
print(a)
print(a.shape)

In [None]:
# Access element at row 1, column 1
print(a[1,1])
print(a[1,1].item())

In [None]:
a[:, 1] # all rows, first col; similar to python slicing

In [None]:
print(a[1:,])# second row onwards, all cols

## Some arithmatic

In [None]:
a = torch.randint(3, 5, (3,3))
b = torch.randint(3, 5, (3,3))
print(a)
print(b)

tensor([[4, 4, 3],
        [3, 4, 4],
        [3, 3, 3]])
tensor([[3, 4, 4],
        [3, 3, 3],
        [4, 3, 4]])


## Add

---


In [None]:
## Addition
a + b

tensor([[7, 8, 7],
        [6, 7, 7],
        [7, 6, 7]])

In [None]:
torch.add(a,b)

In [None]:
# add b to a and modify a
# known as inline operation
a.add_(b)
a

tensor([[7, 8, 7],
        [6, 7, 7],
        [7, 6, 7]])

In [None]:
# reducing to a sum
print(a.sum())
# std
#print(a.type(torch.FloatTensor).std().item())
print(a.float().std())
print(a.float().std().item())

tensor(62)
tensor(0.6009)
0.6009252071380615


## Subtract

---


In [None]:
a = torch.randint(3, 9, (3,3))
b = torch.randint(3, 9, (3,3))
print(a)
print(b)

In [None]:
torch.subtract(a,b)

## Multiplication

---

In [None]:
a*2

In [None]:
# element-wise multiplication
a * b 

In [None]:
torch.mul(a,b) # or torch.multiply(a, b)

In [None]:
 # matrix multiplication
 torch.mm(a, b) # or torch.matmul(a, b)

## Division

---

In [None]:
a/4

In [None]:
a/b

In [None]:
torch.div(a, b) # or torch.divide(a, b)

## Reshaping Tensors (Reshape (view), Flatten, and (Un)squeeze)

## Reshape

---

In [None]:
a = torch.randint(3, 9, (3,5))
print(a)
print(a.shape)

tensor([[5, 7, 4, 7, 8],
        [4, 6, 5, 4, 6],
        [8, 3, 6, 4, 5]])
torch.Size([3, 5])


In [None]:
pd.DataFrame(a).astype("float")

Unnamed: 0,0,1,2,3,4
0,5.0,7.0,4.0,7.0,8.0
1,4.0,6.0,5.0,4.0,6.0
2,8.0,3.0,6.0,4.0,5.0


In [None]:
print(a.reshape([1, 15])) # or a.view(1, 15)
print(a.reshape([1, 15]).shape)

In [None]:
pd.DataFrame(a.reshape([1, 15])).astype("float")

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,5.0,7.0,4.0,7.0,8.0,4.0,6.0,5.0,4.0,6.0,8.0,3.0,6.0,4.0,5.0


In [None]:
print(a.reshape([5,3]))
print(a.reshape([5, 3]).shape)
pd.DataFrame(a.reshape([5,3])).astype("float")

tensor([[5, 7, 4],
        [7, 8, 4],
        [6, 5, 4],
        [6, 8, 3],
        [6, 4, 5]])
torch.Size([5, 3])


Unnamed: 0,0,1,2
0,5.0,7.0,4.0
1,7.0,8.0,4.0
2,6.0,5.0,4.0
3,6.0,8.0,3.0
4,6.0,4.0,5.0


In [None]:
print(a.reshape(5, 3))
print(a.reshape(5, 3).shape)

tensor([[5, 7, 4],
        [7, 8, 4],
        [6, 5, 4],
        [6, 8, 3],
        [6, 4, 5]])
torch.Size([5, 3])


In [None]:
print(a.reshape(3, 1, 5))
print(a.reshape(3, 1, 5).shape)

tensor([[[5, 7, 4, 7, 8]],

        [[4, 6, 5, 4, 6]],

        [[8, 3, 6, 4, 5]]])
torch.Size([3, 1, 5])


In [None]:
# inferring dimension with a -1
a.view(5, -1)

## Flatten
    usually used to create a 1D-array of elements

---

In [None]:
a = torch.randint(3, 9, (3,4))
print(a)
print(a.shape)
pd.DataFrame(a).astype("float")

tensor([[3, 3, 8, 6],
        [3, 7, 7, 5],
        [7, 5, 4, 5]])
torch.Size([3, 4])


Unnamed: 0,0,1,2,3
0,3.0,3.0,8.0,6.0
1,3.0,7.0,7.0,5.0
2,7.0,5.0,4.0,5.0


In [None]:
print(torch.flatten(a))
pd.DataFrame(torch.flatten(a)).astype("float")

tensor([3, 3, 8, 6, 3, 7, 7, 5, 7, 5, 4, 5])


Unnamed: 0,0
0,3.0
1,3.0
2,8.0
3,6.0
4,3.0
5,7.0
6,7.0
7,5.0
8,7.0
9,5.0


In [None]:
# https://discuss.pytorch.org/t/what-is-the-difference-of-flatten-and-view-1-in-pytorch/51790/5
# Create (2, 3, 4) shape data tensor filled with 0.
a = torch.zeros(2, 3, 4)
print('a is \n')
print(a)
# Flatten 2nd and 3rd dimensions of the original data 
# tensor using `view` and `flatten` methods.
b = a.view(2, 12)
print('b is \n')
print(b)
c = torch.flatten(a, start_dim=1)
print('c is \n')
print(c)
# Change a distinct value in each flattened tensor object.
b[0, 2] = 1
c[0, 4] = 2

# Compare tensors objects data to each other to look for 
# any mismatches.
print("Tensors A and B data match?", all(a.view(-1) == b.view(-1)))
print("Tensors A and C data match?", all(a.view(-1) == c.view(-1)))
print("Tensors B and C data match?", all(b.view(-1) == c.view(-1)))

a is 

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
b is 

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
c is 

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
Tensors A and B data match? True
Tensors A and C data match? True
Tensors B and C data match? True


## (Un)squeeze
    Squeeze: removes the dimensions or axes that have a length of one.
    Unsqueeze: adds a dimension with a length of one.

---

In [None]:
c = torch.randint( low=1, high=4, size=(3,1,1))
print(c)
print(c.shape)

In [None]:
print(torch.squeeze(c))
print(torch.squeeze(c).shape)

In [None]:
d = torch.randint( low=1, high=4, size=(3,1,2))
print(d)
print(d.shape)
print(torch.squeeze(d))
print(torch.squeeze(d).shape)

In [None]:
e = torch.randint( low=1, high=4, size=(3,))
print(e)
print(e.shape)

In [None]:
print('unsqueeze along dimension 0')
print(torch.unsqueeze(e, dim=0))
print(torch.unsqueeze(e, dim=0).shape)
print('\n')
print('unsqueeze along dimension 1')
print(torch.unsqueeze(e, dim=1))
print(torch.unsqueeze(e, dim=1).shape)

## Concatenate tensors

In [None]:
a = torch.randint(3, 5, (3,3))
b = torch.randint(3, 5, (3,3))
print(a)
print(b)

In [None]:
torch.cat([a, b]) 
# notice the square bracket to pass them as an array
# default dim = 0

tensor([[4, 3, 3],
        [4, 3, 4],
        [4, 4, 3],
        [3, 4, 3],
        [3, 4, 4],
        [3, 4, 3]])

In [None]:
pd.DataFrame(torch.cat([a, b])).astype('float')

Unnamed: 0,0,1,2
0,4.0,3.0,3.0
1,4.0,3.0,4.0
2,4.0,4.0,3.0
3,3.0,4.0,3.0
4,3.0,4.0,4.0
5,3.0,4.0,3.0


In [None]:
# concatenate along dim = 1
torch.cat([a, b], dim=1)

tensor([[4, 3, 4, 3, 3, 3],
        [3, 3, 4, 4, 4, 4],
        [4, 4, 3, 4, 4, 4]])

In [None]:
pd.DataFrame(torch.cat([a, b], dim=1)).astype('float')

Unnamed: 0,0,1,2,3,4,5
0,4.0,3.0,3.0,3.0,4.0,3.0
1,4.0,3.0,4.0,3.0,4.0,4.0
2,4.0,4.0,3.0,3.0,4.0,3.0


## Speed comparison numpy vs pytorch

source: https://jdhao.github.io/2017/11/15/pytorch-datatype-note/

In [None]:
# torch Tensor on CPU
x = torch.rand(1, 64)
y = torch.rand(5000, 64)
%timeit z=(x*y).sum(dim=1)

# torch Tensor on GPU

x, y = x.cuda(), y.cuda()
%timeit z = (x*y).sum(dim=1)

# numpy ndarray on CPU

x = np.random.random((1, 64))
y = np.random.random((5000, 64))
%timeit z = (x*y).sum(axis=1)