In [None]:
!pip install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
# !pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
# import torch
# print (torch.__version__)

In [None]:
# Import libraries necessary for this project
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random

from keras.datasets import mnist #load our MNIST data
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
# from sklearn.metrics import confusion_matrix
# plotting tool
import matplotlib.pyplot as plt

column_names = [
    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 
    'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 
    'LSTAT', 'MEDV'
]

boston_data = pd.read_csv('../input/boston-house-prices/housing.csv', 
                          header=None, 
                          delimiter=r"\s+", 
                          names=column_names)

# Linear Model

First, let's build a linear model. Due to the large number of parameters relative to number of training examples, I will be using Ridge regression.

In [None]:
import sklearn
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split

# Translating our data into arrays for processing.
x = np.array(boston_data.drop(['MEDV'], axis=1))
y = boston_data['MEDV'].values

# Train/test split for validation.
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=42)

# Our Model
lr = Ridge(alpha=0.5)
lr.fit(x_train, y_train)

In [None]:
from sklearn.metrics import r2_score

r2_score(lr.predict(x_test), y_test)

# Building Our Neural Networks

Now, let's build our neural networks. 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd

batch_size = 50
num_epochs = 250
learning_rate = 0.001
hidden_size = 64
batch_no = len(x_train) // batch_size
input_dim = x.shape[1]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# Let's try no hidden layers at all first
model_linear = nn.Sequential(
    nn.Linear(input_dim, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, 1)
)

# Use a single hidden layer NN
model = nn.Sequential(
    nn.Linear(input_dim, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, 1)
)

# Use mean squared error loss.
loss = nn.MSELoss(reduce='mean')

# Use Adam to optimize our NN.
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
model_linear = model_linear.to(device)
model = model.to(device)

print ("Linear model first")
running_loss_lin = 0

for epoch in range(num_epochs):
    for i in range(402):
        start = i
        end = start + 1
        
        x_batch = autograd.Variable(torch.FloatTensor(x_train[start:end]).to(device))
        y_batch = autograd.Variable(torch.FloatTensor(y_train[start:end]).to(device))
        
        y_pred = model_linear(x_batch).to(device)
     
        loss_step = loss(y_pred, torch.unsqueeze(y_batch, dim=1))
        
        optimizer.zero_grad()
        loss_step.backward()
        optimizer.step()
        running_loss_lin += loss_step.item()
        
   
    print("Epoch {}, Loss: {}. Validation R2: {}".format(
        epoch + 1, running_loss_lin, 
        r2_score(model_linear(torch.Tensor(x_test).to(device)).detach().cpu().numpy(), y_test)))
    running_loss_lin = 0.0

print ("+++++++++++++++++++++++++++++++++++++")    
print ("Now for a single hidden layer network")    
    
running_loss = 0

for epoch in range(num_epochs):
    for i in range(402):
        start = i
        end = start + 1
        
        x_batch = autograd.Variable(torch.FloatTensor(x_train[start:end])).to(device)
        y_batch = autograd.Variable(torch.FloatTensor(y_train[start:end])).to(device)
            
        y_pred = model(x_batch).to(device)
        
        loss_step = loss(y_pred, torch.unsqueeze(y_batch, dim=1))
        optimizer.zero_grad()
        loss_step.backward()
        optimizer.step()
        running_loss += loss_step.item()
    
    print("Epoch {}, Loss: {}. Validation R2: {}".format(
        epoch + 1, running_loss, 
        r2_score(model(torch.Tensor(x_test).to(device)).detach().cpu().numpy(), y_test)))
    running_loss = 0.0

# Naive Bayes

In [None]:
# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()


# only use 1s and 2s
train_filter = np.where((y_train == 1 ) | (y_train == 2))
test_filter = np.where((y_test == 1) | (y_test == 2))
X_train, y_train = X_train[train_filter], y_train[train_filter]
X_test, y_test = X_test[test_filter], y_test[test_filter]

In [None]:
#show 20 random images from the data set
n_images = X_train.shape[0]
n_rows=4
n_cols=5

for i in range(1,n_rows*n_cols+1):
    im_idx = random.randint(0,n_images-1)
    pixels=X_train[im_idx]
    plt.subplot(n_rows, n_cols, i)
    plt.imshow(pixels, cmap='gray')
    plt.axis('off')
plt.show()

In [None]:
# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train.shape[1] * X_train.shape[2]

X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')



In [None]:
# investigate the size of the feature matrices
print(X_train.shape)
print (y_train.shape)
print(X_test.shape)
# inspect one example
print(X_train[1])



# Training the Naive Bayes Classifier

In [None]:
# initialize the model with standard parameters
clf_nb = MultinomialNB()
# train the model
clf_nb.fit(X_train,y_train)

# Evaluating the Naive Bayes classifier

In [None]:
# make predictions with the NB classifier
y_test_pred_nb = clf_nb.predict(X_test);
a_nb = accuracy_score(y_test, y_test_pred_nb);
print(a_nb)

# Naive Bayes Classifier in PyTorch

In [None]:
%matplotlib inline
import math
import torch
import torchvision
from IPython import display


display.set_matplotlib_formats('svg')

In [None]:
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
    """Plot a list of images."""
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        if torch.is_tensor(img):
            # Tensor Image
            if img.device == "cpu":
                ax.imshow(img.numpy())
            else:
                ax.imshow(img.cpu().numpy())
        else:
            # PIL Image
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes

In [None]:
data_transform = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor()])

mnist_train = torchvision.datasets.MNIST(
    root='./temp', train=True, transform=data_transform, download=True)
mnist_test = torchvision.datasets.MNIST(
    root='./temp', train=False, transform=data_transform, download=True)


# Selecting 1s and 2s only
for part in [mnist_train, mnist_test]:
    idx = (part.targets==1) | (part.targets==2)
    part.targets = part.targets[idx]
    part.data = part.data[idx]

In [None]:
image, label = mnist_train[2]
print(image.shape, label)
print(image.shape, image.dtype)
print(label, type(label))

In [None]:
images = torch.stack([mnist_train[i][0] for i in range(10,38)], 
                     dim=1).squeeze(0)
labels = torch.tensor([mnist_train[i][1] for i in range(10,38)])
images.shape, labels.shape

In [None]:
show_images(images, 2, 9);

# Training

In [None]:
X = torch.stack([mnist_train[i][0] for i in range(len(mnist_train))], 
                dim=1).squeeze(0).to(device)
Y = torch.tensor([mnist_train[i][1] for i in range(len(mnist_train))]).to(device)

# n_y = torch.zeros(10)
n_y = torch.zeros(2)
# for y in range(10):
for y in [1,2]:
#     n_y[y] = (Y == y).sum()
    n_y[y-1] = (Y == y).sum()
P_y = n_y / n_y.sum()
P_y

In [None]:
# print (Y.numpy())
# print (X.numpy())
# n_x = torch.zeros((10, 28, 28))
n_x = torch.zeros((2, 28, 28))
# for y in range(10):
for y in [1,2]:
    n_x[y-1] = torch.tensor(X.cpu().numpy()[Y.cpu().numpy() == y].sum(axis=0))
# print ((n_x + 1))
# print ((n_y + 1).reshape(10, 1, 1))    
# P_xy = (n_x + 1) / (n_y + 1).reshape(10, 1, 1)
P_xy = (n_x + 1) / (n_y + 1).reshape(2, 1, 1)

# show_images(P_xy, 2, 5);
show_images(P_xy, 1, 2);

In [None]:
log_P_xy = torch.log(P_xy).to(device)
log_P_xy_neg = torch.log(1 - P_xy).to(device)
log_P_y = torch.log(P_y).to(device)


def bayes_pred_stable(x):
    x = x.unsqueeze(0)  # (28, 28) -> (1, 28, 28)
    x = x.to(device)
    p_xy = log_P_xy * x + log_P_xy_neg * (1 - x)
#     p_xy = p_xy.reshape(10, -1).sum(axis=1)  # p(x|y)
    p_xy = p_xy.reshape(2, -1).sum(axis=1)  # p(x|y)
    return (p_xy + log_P_y).to(device)

py = bayes_pred_stable(image)
py

In [None]:
# py.argmax(dim=0) == label
py.argmax(dim=0) == label-1

In [None]:
# def predict(X):
#     return [bayes_pred_stable(x).argmax(dim=0).type(torch.int32).item() 
#             for x in X]
def predict(X):
    return [bayes_pred_stable(x).argmax(dim=0).type(torch.int32).item()+1 
            for x in X]

X = torch.stack([mnist_train[i][0] for i in range(10,38)], dim=1).squeeze(0).to(device)
y = torch.tensor([mnist_train[i][1] for i in range(10,38)]).to(device)
preds = predict(X)
show_images(X, 2, 9, titles=[str(d) for d in preds]);

In [None]:
X = torch.stack([mnist_train[i][0] for i in range(len(mnist_test))], 
                dim=1).squeeze(0).to(device)
y = torch.tensor([mnist_train[i][1] for i in range(len(mnist_test))]).to(device)
preds = torch.tensor(predict(X), dtype=torch.int32).to(device)
float((preds == y).sum()) / len(y)  # Validation accuracy