<a href="https://colab.research.google.com/github/josepeon/python_dad_class/blob/main/Intro_to_ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Artificial Neural Networks: Introduction to Neural Nets for Regression and Classification


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
tips = sns.load_dataset('tips')

In [None]:
tips.head()

In [None]:
tips.plot.scatter(x = 'total_bill', y = 'tip')

## Building the network with pytorch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
neuron = nn.Linear(in_features=1, out_features=1)

In [None]:
list(neuron.parameters())

In [None]:
X = torch.tensor(tips[['total_bill']].values, dtype=torch.float32)

In [None]:
#scale the data
X =( X - X.mean() )/ X.std()

In [None]:
X.shape

In [None]:
neuron(X)

In [None]:
activation = nn.ReLU()

In [None]:
loss_function = nn.MSELoss()

In [None]:
y = torch.tensor(tips[['tip']].values, dtype=torch.float32)

In [None]:
model = nn.Sequential(neuron, activation)

In [None]:
list(model.parameters())

In [None]:
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [None]:
#step 1: pass data through the model
yhat = model(X)

In [None]:
#relu(weight x bill + bias)
yhat

In [None]:
loss = loss_function(y, yhat)

In [None]:
loss

In [None]:
optimizer.zero_grad()

In [None]:
loss.backward()

In [None]:
optimizer.step()

In [None]:
#list to keep track of errors
losses = []
#train the network 1000 times (iterations)
for epoch in range(1000):
  #pass the data through the network
  yhat = model(X)
  #evaluate the error in predictions
  loss = loss_function(y, yhat)
  #clear out gradients and backpropogate loss (wiggle weights)
  optimizer.zero_grad()
  loss.backward()
  #track the error
  losses.append(loss.item())
  #weights and biases get adjusted/updated
  optimizer.step()

In [None]:
plt.plot(losses)
plt.title('Mean Squared Error through training');

In [None]:
model(X[:10])

In [None]:
y[:10]

In [None]:
plt.scatter(tips['total_bill'], tips['tip'])
plt.plot(tips['total_bill'], yhat.detach().numpy(), '-r')

**PROBLEM**

Use the diabetes data below to build a neural net for regression.

In [None]:
from sklearn.datasets import load_diabetes

In [None]:
diabetes = load_diabetes(as_frame = True).frame
diabetes.head()

In [None]:
X = torch.tensor(diabetes.iloc[:, :-1].values, dtype = torch.float32)
y = torch.tensor(diabetes[['target']].values, dtype = torch.float32)

In [None]:
X.shape

In [None]:
model = nn.Sequential(nn.Linear(in_features = 10, out_features=1), nn.ReLU())
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [None]:
#list to keep track of errors
losses = []
#train the network 1000 times (iterations)
for epoch in range(1000):
  #pass the data through the network
  yhat = model(X)
  #evaluate the error in predictions
  loss = loss_function(y, yhat)
  #clear out gradients and backpropogate loss (wiggle weights)
  optimizer.zero_grad()
  loss.backward()
  #track the error
  losses.append(loss.item())
  #weights and biases get adjusted/updated
  optimizer.step()

In [None]:
plt.plot(losses)

## Binary Classification

In this situation, we are predicting a binary category.  In the example below, we examine whether or not a tumor is cancerous (class 0) or benign (class 1).  We use all 30 features to predict the `target` column.  The primary differences here are:

- Activation function at end of network is `nn.Sigmoid`
- Loss function is `nn.BCELoss()`

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer(as_frame=True).frame

In [None]:
cancer.head()

In [None]:
model = nn.Sequential(nn.Linear(in_features=30, out_features=1), nn.Sigmoid())

In [None]:
X = cancer.drop('target', axis = 1).values
y = cancer[['target']].values

In [None]:
X = torch.tensor(X, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.float32)

In [None]:
loss_function = nn.BCELoss() #binary classification loss

In [None]:
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [None]:
X = (X - X.mean())/X.std()

In [None]:
losses = []
for epoch in range(1000):
  yhat = model(X)
  loss = loss_function(yhat, y)
  optimizer.zero_grad()
  loss.backward()
  losses.append(loss.item())
  optimizer.step()

In [None]:
plt.plot(losses);

In [None]:
model(X)[:10] #output of model is probability something is a 1

In [None]:
predictions = torch.where(model(X) > .5, 1, 0)

In [None]:
predictions[:5]

In [None]:
y[:5]

In [None]:
(y == predictions).sum()/len(y)

**PROBLEM**

Use the orange vs. grapefruit dataset below to build a neural network for classification.

In [None]:
from sklearn.datasets import fetch_openml

In [None]:
fruit = fetch_openml(data_id=43387).frame

In [None]:
fruit.head()

In [None]:
fruit.info()

In [None]:
import numpy as np
X = torch.tensor(fruit.iloc[:, 1:].values, dtype = torch.float32)
y_binary = np.where(fruit[['name']].values == 'orange', 1, 0)

In [None]:
y_binary

In [None]:
y = torch.tensor(y_binary, dtype = torch.float32)

In [None]:
#model??
model = nn.Sequential(nn.Linear(in_features = 5, out_features=1), nn.Sigmoid())
loss_function = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [None]:
X = (X - X.mean())/X.std()

In [None]:
losses = []
for epoch in range(1000):
  yhat = model(X)
  loss = loss_function(yhat, y)
  optimizer.zero_grad()
  loss.backward()
  losses.append(loss.item())
  optimizer.step()

In [None]:
plt.plot(losses)

In [None]:
yhat = torch.where(model(X) > .5, 1, 0)

In [None]:
(yhat == y).sum()/len(y)

In [None]:
torch.cuda.is_available()

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
device

In [None]:
X = X.to(device)

## Multi-class classification

In [None]:
from torchvision.datasets import MNIST

In [None]:
train = MNIST('.', download = True)

In [None]:
plt.imshow(train[0][0])

In [None]:
from torchvision.transforms import ToTensor, Normalize, Compose
from torch.utils.data import DataLoader

In [None]:
#turn image into pytorch tensor
#then scale the tensor
transform = Compose([ToTensor(), Normalize(mean = (0.5), std = (0.5))])

In [None]:
train = MNIST('.', download = True, transform=transform)

In [None]:
train

In [None]:
#create smaller batches of data for training
trainloader = DataLoader(train, batch_size=24)

In [None]:
model = nn.Sequential(nn.Flatten(),
                      nn.Linear(in_features = 28*28, out_features=100),
                      nn.ReLU(),
                      nn.Linear(100, 10))

In [None]:
loss_function = nn.CrossEntropyLoss()

In [None]:
optimizer = optim.SGD(model.parameters(), lr = 0.01)

In [None]:
losses = []
for epoch in range(10):
  for X, y in trainloader:
    yhat = model(X)
    loss = loss_function(yhat, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  losses.append(loss.item())

In [None]:
plt.plot(losses)

In [None]:
correct = 0
total = 0
for X, y in trainloader:
  yhat = torch.argmax(model(X), dim = 1)
  correct += (y == yhat).sum()
  total += len(y)

In [None]:
correct/total

In [None]:
y

### Train it on the GPU

In [None]:
device = 'cuda'

In [None]:
#move everything to the GPU
model = model.to(device)
losses = []
for epoch in range(20):
  for X, y in trainloader:
    X =  X.to(device)
    y = y.to(device)
    yhat = model(X)
    loss = loss_function(yhat, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  losses.append(loss.item())

In [None]:
plt.plot(losses)

In [None]:
model

In [None]:
correct = 0
total = 0
for X, y in trainloader:
  X =  X.to(device)
  y = y.to(device)
  yhat = torch.argmax(model(X), dim = 1)
  correct += (y == yhat).sum()
  total += len(y)

In [None]:
correct/total