# solution 1

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split

import tensorflow as tf

import keras
from keras.models import Sequential
from keras.layers import Dense, Embedding, Dropout, BatchNormalization, Activation, GlobalAveragePooling1D
from keras.callbacks import EarlyStopping
from keras.preprocessing import sequence

# Sharing and executing the official tutorial model

In [13]:
# Movie review dataset loading
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

AttributeError: ignored

In [None]:
train_data[:10]

In [None]:
# The first part of the index is reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

# Create a reverse dictionary
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

# Create a function for reverse lookup
def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [None]:
decode_review(train_data[0])

In [None]:
print(len(train_data[0]))
print(len(train_data[1]))

In [None]:
from keras.preprocessing import sequence
train_data = sequence.pad_sequences(train_data,
                                    value=word_index["<PAD>"],
                                    padding="post",
                                    maxlen=256)

test_data = sequence.pad_sequences(test_data,
                                    value=word_index["<PAD>"],
                                    padding="post",
                                    maxlen=256)

In [None]:
print(len(train_data[0]))
print(len(train_data[1]))

In [None]:
embedding_dim=16
vocab_size = 10000

model = Sequential()
model.add(Embedding(vocab_size, embedding_dim))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.summary()

In [None]:
train_data.shape

In [None]:
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])

callbacks = EarlyStopping(patience=3)
history = model.fit(train_data, train_labels, batch_size=512 ,epochs=100, callbacks=callbacks ,validation_split=0.2)
model.evaluate(test_data, test_labels)

In [None]:
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]

epochs = range(1, len(acc) + 1)

fig, axes = plt.subplots(1,2, figsize=(12,4))

axes[0].plot(epochs, loss, 'bo', label='Training loss')
axes[0].plot(epochs, val_loss, 'r', label='Validation loss')
axes[0].set_title('Training and validation loss')
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Loss')
axes[0].legend()

axes[1].plot(epochs, acc, 'bo', label='Training acc')
axes[1].plot(epochs, val_acc, 'r', label='Validation acc')
axes[1].set_title('Training and validation accuracy')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Accuracy')
axes[1].legend(loc='lower right')
axes[1].set_ylim((0.5,1))
plt.show()

In [None]:
# Get the weight of the embedding layer
e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape) ## shape: (vocab_size, embedding_dim)

#[Problem 2] (Advance assignment) Execute various methods

In [None]:
import numpy as np
from scipy.stats import special_ortho_group
from scipy.spatial.transform import Rotation
from scipy.linalg import svd
import matplotlib.pyplot as plt

plt.style.use('seaborn-whitegrid')
FIGURE_SCALE = 1.0
FONT_SIZE = 20
plt.rcParams.update({
    'figure.figsize': np.array((8, 6)) * FIGURE_SCALE,
    'axes.labelsize': FONT_SIZE,
    'axes.titlesize': FONT_SIZE,
    'xtick.labelsize': FONT_SIZE,
    'ytick.labelsize': FONT_SIZE,
    'legend.fontsize': FONT_SIZE,
    'lines.linewidth': 3,
    'lines.markersize': 10,
})

In [None]:
def SO3_via_svd(A):
  """Map 3x3 matrix onto SO(3) via SVD."""
  u, s, vt = np.linalg.svd(A)
  s_SO3 = [1, 1, np.sign(np.linalg.det(np.matmul(u, vt)))]
  return np.matmul(np.matmul(u, np.diag(s_SO3)), vt)

In [None]:
def SO3_via_gramschmidt(A):
  """Map 3x3 matrix on SO(3) via GS, ignores last column."""
  x_normalized = A[:, 0] / np.linalg.norm(A[:, 0])
  z = np.cross(x_normalized, A[:, 1])
  z_normalized = z / np.linalg.norm(z)
  y_normalized = np.cross(z_normalized, x_normalized)
  return np.stack([x_normalized, y_normalized, z_normalized], axis=1)

In [None]:
def rotate_from_z(v):
  """Construct a rotation matrix R such that R * [0,0,||v||]^T = v.

  Input v is shape (3,), output shape is 3x3 """
  vn = v / np.linalg.norm(v)
  theta = np.arccos(vn[2])
  phi = np.arctan2(vn[1], vn[0])
  r = Rotation.from_euler('zyz', [0, theta, phi])
  R = np.squeeze(r.as_dcm()) # Maps Z to vn
  return R

def perturb_rotation_matrix(R, kappa):
  """Perturb a random rotation matrix with noise.

  Noise is random small rotation applied to each of the three
  column vectors of R. Angle of rotation is sampled from the
  von-Mises distribution on the circle (with uniform random azimuth).

  The von-Mises distribution is analagous to Gaussian distribution on the circle.
  Note, the concentration parameter kappa is inversely related to variance,
  so higher kappa means less variance, less noise applied. Good ranges for
  kappa are 64 (high noise) up to 512 (low noise).
  """
  R_perturb = []
  theta = np.random.vonmises(mu=0.0, kappa=kappa, size=(3,))
  phi = np.random.uniform(low=0.0, high=np.pi*2.0, size=(3,))
  for i in range(3):
    v = R[:, i]
    R_z_to_v = rotate_from_z(v)
    r_noise_z = np.squeeze(Rotation.from_euler('zyz', [0, theta[i], phi[i]]).as_dcm())

    v_perturb = np.matmul(R_z_to_v, np.matmul(r_noise_z, np.array([0,0,1])))
    R_perturb.append(v_perturb)

  R_perturb = np.stack(R_perturb, axis=-1)
  return R_perturb


def sigma_to_kappa(sigma):
  return ((0.5 - sigma) * 1024) + 64

In [None]:
# We create a ground truth special orthogonal matrix and perturb it with
# additive noise. We then see which orthogonalization process (SVD or GS) is
# better at recovering the ground truth matrix.


def run_expt(sigmas, num_trials, noise_type='gaussian'):
  # Always use identity as ground truth, or pick random matrix.
  # Nothing should change if we pick random (can verify by setting to True) since
  # SVD and Gram-Schmidt are both Equivariant to rotations.
  pick_random_ground_truth=False

  all_errs_svd = []
  all_errs_gs = []
  all_geo_errs_svd = []
  all_geo_errs_gs = []
  all_noise_norms = []
  all_noise_sq_norms = []

  for sig in sigmas:
    svd_errors = np.zeros(num_trials)
    gs_errors = np.zeros(num_trials)
    svd_geo_errors = np.zeros(num_trials)
    gs_geo_errors = np.zeros(num_trials)
    noise_norms = np.zeros(num_trials)
    noise_sq_norms = np.zeros(num_trials)

    for t in range(num_trials):
      if pick_random_ground_truth:
        A = special_ortho_group.rvs(3)  # Pick a random ground truth matrix
      else:
        A = np.eye(3)  # Our ground truth matrix in SO(3)

      N = None
      if noise_type == 'gaussian':
        N = np.random.standard_normal(size=(3,3)) * sig
      if noise_type == 'uniform':
        N = np.random.uniform(-1, 1, (3, 3)) * sig
      if noise_type == 'rademacher':
        N = np.sign(np.random.uniform(-1, 1, (3, 3))) * sig
      if noise_type == 'rotation':
        A_perturb = perturb_rotation_matrix(A, kappa=sigma_to_kappa(sig))
        N = A_perturb - A
      if N is None:
        print ('Error: unknown noise_type: %s', noise_type)
        return

      AplusN = A + N  # Ground-truth plus noise
      noise_norm = np.linalg.norm(N)
      noise_norm_sq = noise_norm**2

      # Compute SVD result and error.
      res_svd = SO3_via_svd(AplusN)
      error_svd = np.linalg.norm(res_svd - A, ord='fro')**2
      error_geodesic_svd = np.arccos(
          (np.trace(np.matmul(np.transpose(res_svd), A))-1.0)/2.0);

      # Compute GS result and error.
      res_gs = SO3_via_gramschmidt(AplusN)
      error_gs = np.linalg.norm(res_gs - A, ord='fro')**2
      error_geodesic_gs = np.arccos(
          (np.trace(np.matmul(np.transpose(res_gs), A))-1.0)/2.0);

      svd_errors[t] = error_svd
      gs_errors[t] = error_gs
      svd_geo_errors[t] = error_geodesic_svd
      gs_geo_errors[t] = error_geodesic_gs
      noise_norms[t] = noise_norm
      noise_sq_norms[t] = noise_norm_sq

    all_errs_svd.append(svd_errors)
    all_errs_gs.append(gs_errors)
    all_geo_errs_svd.append(svd_geo_errors)
    all_geo_errs_gs.append(gs_geo_errors)
    all_noise_norms.append(noise_norms)
    all_noise_sq_norms.append(noise_sq_norms)
    print('finished sigma = %f / kappa = %f' % (sig, sigma_to_kappa(sig)))

  return [np.array(x) for x in (
      all_errs_svd, all_errs_gs,
      all_geo_errs_svd, all_geo_errs_gs,
      all_noise_norms, all_noise_sq_norms)]

In [None]:
boxprops = dict(linewidth=2)
medianprops = dict(linewidth=2)
whiskerprops = dict(linewidth=2)
capprops = dict(linewidth=2)

def make_diff_plot(svd_errs, gs_errs, xvalues, title='', ytitle='', xtitle=''):
  plt.figure(figsize=(8,6))
  plt.title(title, fontsize=16)
  diff = gs_errs - svd_errs
  step_size = np.abs(xvalues[1] - xvalues[0])
  plt.boxplot(diff.T, positions=xvalues, widths=step_size/2, whis=[5, 95],
              boxprops=boxprops, medianprops=medianprops, whiskerprops=whiskerprops, capprops=capprops,
              showmeans=False, meanline=True, showfliers=False)
  plt.plot(xvalues, np.max(diff, axis=1), 'kx', markeredgewidth=2)
  plt.plot(xvalues, np.min(diff, axis=1), 'kx', markeredgewidth=2)
  xlim = [np.min(xvalues) - (step_size / 3), np.max(xvalues) + (step_size / 3)]
  plt.xlim(xlim)
  plt.plot(xlim, [0, 0], 'k--', linewidth=1)
  plt.xlabel(xtitle, fontsize=16)
  plt.ylabel(ytitle, fontsize=16)
  plt.tight_layout()

###Global Params

In [None]:
num_trials = 100000  # Num trials at each sigma
sigmas = np.linspace(0.125, 0.5, 4)

###Gaussian Noise

In [None]:
(all_errs_svd, all_errs_gs,
 all_geo_errs_svd, all_geo_errs_gs,
 all_noise_norms, all_noise_sq_norms
 ) = run_expt(sigmas, num_trials, noise_type='gaussian')

In [None]:
plt.plot(sigmas,
         3*sigmas**2,
         '--b',
         label='3 $\\sigma^2$')
plt.errorbar(sigmas,
             all_errs_svd.mean(axis=1),
             color='b',
             label='E[$\\|\\|\\mathrm{SVD}^+(M) - R\\|\\|_F^2]$')

plt.plot(sigmas, 6*sigmas**2,
         '--r',
         label='6 $\\sigma^2$')
plt.errorbar(sigmas,
             all_errs_gs.mean(axis=1),
             color='r',
             label='E[$\\|\\|\\mathrm{GS}^+(M) - R\\|\\|_F^2$]')

plt.xlabel('$\\sigma$')
plt.legend(loc='upper left')

In [None]:
make_diff_plot(all_errs_svd, all_errs_gs, sigmas, title='Gaussian Noise', ytitle='Frobenius Error Diff', xtitle='$\\sigma$')
make_diff_plot(all_geo_errs_svd, all_geo_errs_gs, sigmas, title='Gaussian Noise', ytitle='Geodesic Error Diff', xtitle='$\\sigma$')

In [None]:
(all_errs_svd, all_errs_gs,
 all_geo_errs_svd, all_geo_errs_gs,
 all_noise_norms, all_noise_sq_norms
 ) = run_expt(sigmas, num_trials, noise_type='uniform')

In [None]:
make_diff_plot(all_errs_svd, all_errs_gs, sigmas, title='Uniform Noise', ytitle='Frobenius Error Diff', xtitle='$\\phi$')
make_diff_plot(all_geo_errs_svd, all_geo_errs_gs, sigmas, title='Uniform Noise', ytitle='Geodesic Error Diff', xtitle='$\\phi$')

# solution 3 Learning Iris (binary classification) with Keras

In [None]:
from sklearn.datasets import load_iris
import numpy as np

iris = load_iris()
X = iris.data[50:]
y = iris.target[50:]

print(X.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelBinarizer

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

sc = StandardScaler().fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

le = LabelBinarizer()
le.fit(y_train)
y_train = le.transform(y_train)
y_test = le.transform(y_test)

In [None]:
from keras.models import Sequential
from keras.layers import Dense

def iris_nn_classifier(n_features):
  model = Sequential()
  model.add(Dense(64, activation="relu", input_shape=(n_features,)))
  model.add(Dense(1, activation="sigmoid"))

  model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
  return model

n_features = 4
model = iris_nn_classifier(n_features)
history = model.fit(X_train, y_train, batch_size=20, epochs=10, validation_split=0.1)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
from keras.utils import to_categorical

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

sc = StandardScaler().fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
def iris_nn_multi_classifier(n_features):
  model = Sequential()
  model.add(Dense(256, activation="relu", input_shape=(n_features,)))
  model.add(Dense(128, activation="relu"))
  model.add(Dense(64, activation="relu"))
  model.add(Dense(3, activation="softmax"))

  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
  return model

n_features = 4
model = iris_nn_multi_classifier(n_features)
history = model.fit(X_train, y_train, batch_size=20, epochs=10, validation_split=0.1)

In [None]:
model.evaluate(X_test, y_test)

# Learning House Prices with Keras

In [None]:
df = pd.read_csv("train.csv")
X_train = df.loc[:, ["TotalBsmtSF", "YearBuilt", "GarageArea"]]
y_train = df.loc[:, "SalePrice"]

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, random_state=0)

sc = StandardScaler().fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [None]:
def nn_regression(n_features, n_output):
  model = Sequential()
  model.add(Dense(256, activation="relu", input_shape=(n_features,)))
  model.add(Dropout(0.2))
  model.add(Dense(128, activation="relu"))
  model.add(Dropout(0.2))
  model.add(Dense(64, activation="relu"))
  model.add(Dropout(0.2))
  model.add(Dense(n_output, activation="linear"))

  model.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])
  return model

callbacks = EarlyStopping(patience=3)

n_features = X_train.shape[1]
n_output = 1
model = nn_regression(n_features, n_output)
model.fit(X_train, y_train, batch_size=128, epochs=10, callbacks=callbacks, validation_split=0.2)

In [None]:
model.evaluate(X_test, y_test)

# Learning MNIST with Keras

In [None]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
img_height = 28
img_width = 28
num_features = int(img_height * img_width)

X_train = X_train.reshape(-1, num_features).astype("float")
X_test = X_test.reshape(-1, num_features).astype("float")

X_train /= 255
X_test /= 255

y_train = to_categorical(y_train.reshape(-1,1))
y_test = to_categorical(y_test.reshape(-1,1))

print(X_train.shape)
print(y_train.shape)

In [None]:
from keras.callbacks import EarlyStopping

def mnist_classifier(n_features, n_output):
  model = Sequential()
  model.add(Dense(256, activation="relu", input_shape=(n_features,)))
  model.add(Dense(128, activation="relu"))
  model.add(Dense(64, activation="relu"))
  model.add(Dense(n_output, activation="softmax"))

  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
  return model

callbacks = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=2)

model = mnist_classifier(784, 10)
history = model.fit(X_train, y_train, batch_size=20, epochs=10, callbacks=callbacks, validation_split=0.2)

In [None]:
model.evaluate(X_test, y_test)

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

In [None]:
#Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

#Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [None]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break

In [None]:
#Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

#Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

In [None]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

###PyTorch Convolutional Neural Network With MNIST Dataset

In [None]:
# Import libraries
import torch

In [None]:
#Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
#Download MNIST dataset in local system
from torchvision import datasets
from torchvision.transforms import ToTensor
train_data = datasets.MNIST(
    root = 'data',
    train = True,                         
    transform = ToTensor(), 
    download = True,            
)
test_data = datasets.MNIST(
    root = 'data', 
    train = False, 
    transform = ToTensor()
)

In [None]:
#Print train_data and test_data size
print(train_data)
print(test_data)

In [None]:
print(train_data.data.size())

In [None]:
#Visualization of MNIST dataset
#Plot one train_data
import matplotlib.pyplot as plt
plt.imshow(train_data.data[0], cmap='gray')
plt.title('%i' % train_data.targets[0])
plt.show()

In [None]:
#Plot multiple train_data
figure = plt.figure(figsize=(10, 8))
cols, rows = 5, 5
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(train_data), size=(1,)).item()
    img, label = train_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(label)
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

In [None]:
#Preparing data for training with DataLoaders
from torch.utils.data import DataLoader
loaders = {
    'train' : torch.utils.data.DataLoader(train_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
    
    'test'  : torch.utils.data.DataLoader(test_data, 
                                          batch_size=100, 
                                          shuffle=True, 
                                          num_workers=1),
}
loaders

In [None]:
import torch.nn as nn
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Linear(32 * 7 * 7, 10)
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output, x    # return x for visualization

In [None]:
cnn = CNN()
print(cnn)

In [None]:
#Define loss function
loss_func = nn.CrossEntropyLoss()   
loss_func

In [None]:
#Define a Optimization Function
from torch import optim
optimizer = optim.Adam(cnn.parameters(), lr = 0.01)   
optimizer

In [None]:
#Train the model
from torch.autograd import Variable
num_epochs = 10
def train(num_epochs, cnn, loaders):
    
    cnn.train()
        
    #Train the model
    total_step = len(loaders['train'])
        
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(loaders['train']):
            
            #gives batch data, normalize x when iterate train_loader
            b_x = Variable(images)   # batch x
            b_y = Variable(labels)   # batch y
output = cnn(b_x)[0]               
            loss = loss_func(output, b_y)
            
            #clear gradients for this training step   
            optimizer.zero_grad()           
            
            #backpropagation, compute gradients 
            loss.backward()    
            #apply gradients             
            optimizer.step()                
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))
               pass
        
        pass
    
    
    pass
train(num_epochs, cnn, loaders)

In [None]:
#Evaluate the model on test data
def test():
    # Test the model
    cnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loaders['test']:
            test_output, last_layer = cnn(images)
            pred_y = torch.max(test_output, 1)[1].data.squeeze()
            accuracy = (pred_y == labels).sum().item() / float(labels.size(0))
            pass
print('Test Accuracy of the model on the 10000 test images: %.2f' % accuracy)
    
    pass
test()

In [None]:
sample = next(iter(loaders['test']))
imgs, lbls = sample

In [None]:
actual_number = lbls[:10].numpy()
actual_number

In [None]:
test_output, last_layer = cnn(imgs[:10])
pred_y = torch.max(test_output, 1)[1].data.numpy().squeeze()
print(f'Prediction number: {pred_y}')
print(f'Actual number: {actual_number}')

# Predict house prices.

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.utils.data.dataloader as dataloader
import torch.nn.functional as F
import pandas as pd

In [None]:
pd.read_csv('california_housing_train.csv')

In [None]:
#getting std and mean of training data, as it needs for normalization and denormalization of data

train_csv = pd.read_csv('california_housing_train.csv') 
train_mean =  train_csv.mean()
train_std = train_csv.std()

In [None]:
#creating custom dataset class

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file):
        self.data_frame = pd.read_csv(csv_file)
        self.norm_data = (self.data_frame - train_mean)/train_std  

    def __len__(self):
        return len(self.norm_data)

    def __getitem__(self, idx):
        data = self.norm_data.iloc[idx, 2:8].values # keep all except median_house_value  
        label = self.norm_data.iloc[idx, 8:9].values # keep only median_house_value  
        
        data = torch.tensor(data, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.float32)

        return {'data': data, 'label':label}

In [None]:
#Loading our dataset object in DataLoader class
train_data = MyDataset('california_housing_train.csv')
dataset_len = len(train_data)
train_data = torch.utils.data.DataLoader(dataset=train_data, shuffle=True, batch_size=10)

In [None]:
#Create artificial neural network model class
#our model

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(in_features=6, out_features=18, bias=True)
        self.fc2 = nn.Linear(in_features=18, out_features=18, bias=True)
        self.fc3 = nn.Linear(in_features=18, out_features=12, bias=True)
        self.fc4 = nn.Linear(in_features=12, out_features=1, bias=True)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

net = Network()

In [None]:
#Define optimization and loss functions
#loss and optimizer
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.MSELoss()

In [None]:
#Create training loop
#training the model

for epoch in range(15):
    running_loss = 0.0
    for i, value in enumerate(train_data):
        inputs = value['data']
        labels = value['label']
        prediction = net(inputs) # passing inputs to our model to get prediction
        loss = criterion(prediction, labels)
        running_loss += loss.item() * inputs.size(0) # multiplying with batch size
        optimizer.zero_grad() # reset all gradient calculation
        loss.backward() # this is backpropagation to calculate gradients
        optimizer.step() # applying gradient descent to update weights and bias values

    print('epoch: ', epoch, ' loss: ', running_loss/dataset_len)

In [None]:
#test the model

test_data = MyDataset('california_housing_test.csv')
test_data_len = len(test_data)
test_dataset = torch.utils.data.DataLoader(dataset=test_data, shuffle=False, batch_size=10)

running_loss = 0.0
accuracy = 0.0
for i, value in enumerate(test_dataset):
    inputs = value['data']
    labels = value['label']

    prediction = net(inputs)
    loss = criterion(prediction, labels)

        
    running_loss += loss.item() * inputs.size(0) # multiplying by batch size

print('Test loss: ', running_loss/test_data_len)

In [None]:
#let's see first 10 predictions

test_dataset_sample = next(iter(test_dataset)) # return first batch of data
outputs = net(test_dataset_sample['data']) 
outputs = (outputs * train_std.values[-1]) + train_mean.values[-1] # denormalizing data to see real prices
print(outputs)

In [None]:
pd.read_csv('california_housing_test.csv').head(10)

# Classifying the Iris Data Set with PyTorch

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('ggplot')

In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# Scale data to have mean 0 and variance 1 
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=2)

In [None]:
## Visualize the Data
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
for target, target_name in enumerate(names):
    X_plot = X[y == target]
    ax1.plot(X_plot[:, 0], X_plot[:, 1], 
             linestyle='none', 
             marker='o', 
             label=target_name)
ax1.set_xlabel(feature_names[0])
ax1.set_ylabel(feature_names[1])
ax1.axis('equal')
ax1.legend();

for target, target_name in enumerate(names):
    X_plot = X[y == target]
    ax2.plot(X_plot[:, 2], X_plot[:, 3], 
             linestyle='none', 
             marker='o', 
             label=target_name)
ax2.set_xlabel(feature_names[2])
ax2.set_ylabel(feature_names[3])
ax2.axis('equal')
ax2.legend();

In [None]:
## Configure Neural Network Models
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.autograd import Variable

In [None]:
class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim, 50)
        self.layer2 = nn.Linear(50, 50)
        self.layer3 = nn.Linear(50, 3)
        
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x), dim=1)
        return x

In [None]:
model     = Model(X_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn   = nn.CrossEntropyLoss()
model

In [None]:
## Train the Model
import tqdm

EPOCHS  = 100
X_train = Variable(torch.from_numpy(X_train)).float()
y_train = Variable(torch.from_numpy(y_train)).long()
X_test  = Variable(torch.from_numpy(X_test)).float()
y_test  = Variable(torch.from_numpy(y_test)).long()

loss_list     = np.zeros((EPOCHS,))
accuracy_list = np.zeros((EPOCHS,))

for epoch in tqdm.trange(EPOCHS):
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    loss_list[epoch] = loss.item()
    
    # Zero gradients
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    with torch.no_grad():
        y_pred = model(X_test)
        correct = (torch.argmax(y_pred, dim=1) == y_test).type(torch.FloatTensor)
        accuracy_list[epoch] = correct.mean()

In [None]:
## Plot Accuracy and Loss from Training
fig, (ax1, ax2) = plt.subplots(2, figsize=(12, 6), sharex=True)

ax1.plot(accuracy_list)
ax1.set_ylabel("validation accuracy")
ax2.plot(loss_list)
ax2.set_ylabel("validation loss")
ax2.set_xlabel("epochs");

In [None]:
## Show ROC Curve
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import OneHotEncoder

plt.figure(figsize=(10, 10))
plt.plot([0, 1], [0, 1], 'k--')

# One hot encoding
enc = OneHotEncoder()
Y_onehot = enc.fit_transform(y_test[:, np.newaxis]).toarray()

with torch.no_grad():
    y_pred = model(X_test).numpy()
    fpr, tpr, threshold = roc_curve(Y_onehot.ravel(), y_pred.ravel())
    
plt.plot(fpr, tpr, label='AUC = {:.3f}'.format(auc(fpr, tpr)))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend();