# Introduction to PyTorch
[Jian Tao](https://coehpc.engr.tamu.edu/people/jian-tao/), Texas A&M University

Feb 12, 2020

In [328]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch

## 1. PyTorch as a fancy version of NumPy

* PyTorch supports Tensor computation (like NumPy, a fundamental package for scientific computing with Python) with strong GPU acceleration. You can find correspong numpy functions in PyTorch.
* It supports deep neural networks built on a tape-based autograd system

### NumPy examples

In [None]:
import numpy as np

In [None]:
x = np.array([[1,2,3],[3,4,5]], dtype=np.float64)
x.shape

In [None]:
x = np.zeros([5,5])
x

In [None]:
x = np.random.rand(5,5)
x

### PyTorch examples

In [None]:
# create a 5X5 PyTorch tensor 
x = torch.zeros(5,5)
x

### Check CUDA support

In [329]:
torch.cuda.is_available()

False

In [330]:
torch.cuda.device_count()

0

In [331]:
dev = 'cuda:0' if torch.cuda.is_available() else 'cpu'
dev

'cpu'

In [332]:
x = torch.rand((2,2), device=dev)

In [333]:
x.device

device(type='cpu')

In [334]:
y = torch.rand(2,2).to(x.device)
y

tensor([[0.12794971, 0.74971551],
        [0.57380384, 0.52133507]])

In [335]:
y.device

device(type='cpu')

In [336]:
# create a 5X5 random matrix
x = torch.rand(5,5)
x

tensor([[0.92211878, 0.91773129, 0.41387683, 0.63738418, 0.75707543],
        [0.36489564, 0.19527304, 0.04656327, 0.92665040, 0.05896747],
        [0.83714986, 0.06538057, 0.13024211, 0.86240131, 0.06626105],
        [0.35698026, 0.55956924, 0.93162143, 0.98273206, 0.79119402],
        [0.93068588, 0.39777625, 0.12690693, 0.81158972, 0.40261716]])

In [337]:
x.ndim

2

In [338]:
x.shape

torch.Size([5, 5])

In [339]:
y = torch.rand(5,5)
y

tensor([[0.04954314, 0.84857178, 0.61152208, 0.91766316, 0.31968564],
        [0.59144276, 0.08199400, 0.74340647, 0.16820741, 0.56325442],
        [0.47539759, 0.39103407, 0.31311828, 0.91527659, 0.88702780],
        [0.72521186, 0.13370645, 0.08966839, 0.07931513, 0.86982375],
        [0.28131342, 0.73242480, 0.66534120, 0.54232001, 0.83390570]])

In [340]:
# (x+y)(x-y) - x**2 + y**2 = 0
(x+y)*(x-y) - x**2 + y**2

tensor([[ 3.72529030e-09,  0.00000000e+00, -2.98023224e-08,  5.96046448e-08,
         -2.23517418e-08],
        [ 0.00000000e+00, -9.31322575e-10,  0.00000000e+00,  3.72529030e-09,
          0.00000000e+00],
        [-2.98023224e-08,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00],
        [ 0.00000000e+00, -1.49011612e-08, -7.45058060e-09,  3.63215804e-08,
         -5.96046448e-08],
        [ 3.72529030e-08,  0.00000000e+00, -2.98023224e-08, -2.98023224e-08,
          0.00000000e+00]])

In [348]:
z = torch.empty(5,5)
torch.add(x, y, out =  z);
z

tensor([[0.97166193, 1.76630306, 1.02539897, 1.55504727, 1.07676101],
        [0.95633841, 0.27726704, 0.78996974, 1.09485781, 0.62222189],
        [1.31254745, 0.45641464, 0.44336039, 1.77767789, 0.95328885],
        [1.08219218, 0.69327569, 1.02128983, 1.06204724, 1.66101778],
        [1.21199930, 1.13020110, 0.79224813, 1.35390973, 1.23652291]])

In [366]:
y.add(x)
y

tensor([[7.42649269, 8.19042206, 3.92253637, 6.01673698, 6.37628841],
        [3.51060748, 1.64417827, 1.11591268, 7.58141136, 1.03499413],
        [7.17259598, 0.91407865, 1.35505509, 7.81448793, 1.41711617],
        [3.58105445, 4.61026096, 7.54264021, 7.94117069, 7.19937563],
        [7.72680092, 3.91463423, 1.68059635, 7.03503752, 4.05484343]])

In [378]:
# in place addition Any operation that mutates a tensor in-place is post-fixed with an _. 
# For example: x.copy_(y), x.t_(), will change x. (better performance, problematic in autograd)
y.add_(x)
y

tensor([[0.92211878, 0.91773129, 0.41387683, 0.63738418, 0.75707543],
        [0.36489564, 0.19527304, 0.04656327, 0.92665040, 0.05896747],
        [0.83714986, 0.06538057, 0.13024211, 0.86240131, 0.06626105],
        [0.35698026, 0.55956924, 0.93162143, 0.98273206, 0.79119402],
        [0.93068588, 0.39777625, 0.12690693, 0.81158972, 0.40261716]])

In [387]:
x[0:2, 0:2]

tensor([[0.92211878, 0.91773129],
        [0.36489564, 0.19527304]])

## 1. Linear Regression

### The data points to be fitted

In [None]:
x = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0], dtype = float)
y = 3.0 * x + 2.0 + 0.5 * np.random.randn(len(x))

In [None]:
plt.scatter(x,y);
plt.plot(x, 3*x+2);

### Review Linear Regression with scikit-learn 

In [None]:
from sklearn.linear_model import LinearRegression
sk_model = LinearRegression()

# use the data points defined above
sk_x = x[:, np.newaxis]
sk_y = y

# fit the model with sklearn
sk_model.fit(sk_x, sk_y);

# make predictions
sk_yfit = sk_model.predict(sk_x)

# plot the fitted line with the equation
plt.scatter(sk_x,sk_y);
plt.plot(sk_x, sk_yfit);
plt.text(-1.0, 15, r"Y = %f *x + %f"%(sk_model.coef_, sk_model.intercept_), fontsize=15);

In [None]:
import torch
import torchvision
from torchvision import transforms, datasets
import matplotlib.pyplot as plt

In [None]:
train = datasets.MNIST("", train=True, download=True, 
                       transform=transforms.Compose([transforms.ToTensor()]))

In [None]:
test = datasets.MNIST("", train=False, download=True, 
                       transform=transforms.Compose([transforms.ToTensor()]))

In [None]:
trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True)

In [None]:
total = 0

In [None]:
counter_dict={0:0, 1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0}

In [None]:
total = 0

for data in trainset:
    xs,ys =data
    for y in ys:
        counter_dict[int(y)] +=1
        total +=1
print (counter_dict)

In [None]:
for i in counter_dict:
    print(f"{i}: {counter_dict[i]/total*100}")

In [None]:
import seaborn as sns

sns.barplot(list(counter_dict.keys()), list(counter_dict.values()))

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        
        return F.log_softmax(x, dim=1)
        
net = Net()
print(net)

In [None]:
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [None]:
epochs =30
from tqdm import tqdm

In [None]:
for epoch in tqdm(range(epochs)):
    for data in trainset:
        X,y = data
        net.zero_grad()
        output = net(X.view(-1, 28*28))
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print(loss)

In [None]:
correct = 0 
total = 0
with torch.no_grad():
    for data in trainset:
        X, y =data
        output = net(X.view(-1, 784))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct +=1
            total +=1
print("Accuracy: ", round(correct/total,3))
    

In [None]:
X

In [None]:
plt.imshow(X[5].view(28,28))
print(torch.argmax(net(X[5].view(-1, 784))[0]))