# Default project: Model validation


### Load Packages

In [1]:
import torch
import torch.nn as nn
import numpy as np
import sklearn.model_selection as model_selection
import torch.optim as optim
from sklearn import linear_model, preprocessing, svm
from sklearn.metrics import  accuracy_score
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import matplotlib
import matplotlib.pyplot as plt

### Load data

Let's first establish where our dataset is located.

In [2]:
training_folder = 'training_data/'

We are told that the dataset has one file for each class *k*=0,1,2,... labeled 'Class*k*.csv'.  When that file is loaded, it produces a matrix where the rows contain the samples, the last column contains the label, and the other columns contain the features. 

Let's load the class files one-by-one until there are none left.

In [3]:
import numpy as np

# instantiate empty arrays for features and labels
Xtr = np.array([])
ytr = np.array([])
k = 0 # initialize

# load data from the relevant files
while True:
    try:
        # load data file
        class_k = np.loadtxt(training_folder + 'Class{:}.csv'.format(k))
        # extract features and labels
        class_k_features = class_k[:,:-1] # extract features
        class_k_labels  = class_k[:,-1].astype(np.int) # labels; convert to int
        # append the features and labels to the arrays
        Xtr = np.vstack([Xtr,class_k_features]) if Xtr.size else class_k_features
        ytr = np.hstack([ytr,class_k_labels]) if ytr.size else class_k_labels
        # increment counter
        k += 1
    except:
        print('loaded %i classes of training data' %k)
        break

# examine shape
num_classes = k
num_features = Xtr.shape[1]
num_samples = Xtr.shape[0]

print('unique labels: ', np.unique(ytr))
print('number of features: ', num_features)
print('number of samples: ', num_samples)

loaded 20 classes of training data
unique labels:  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
number of features:  20
number of samples:  100000


## Standardize the Data
First, we want to standardize the data. We do this by first finding the sample mean and sample standard deviation for each feature. This prevents the units of the different features from affecting the model. Each feature will have a mean of 0 and variance 1 after the standardization.

In [4]:
#Get the mean and standard deviation of the training data
Xtr_mean = Xtr.mean(axis=0)
Xtr_std = Xtr.std(axis=0)

#Standardize the training data
Xtr_scale = (Xtr - Xtr_mean) / Xtr_std

### Load and validate a trained sk-learn model

In [5]:
import pickle

sklearn_model_file = 'mlr_sklearn.sav'

# load a trained instance of sklearn.linear_model.LogisticRegression
model = pickle.load(open(sklearn_model_file,'rb'))

# predict
ytr_hat = model.predict(Xtr)

# evaluate
acc = np.mean(ytr_hat == ytr)
print('accuracy of model: ',acc)



accuracy of model:  0.79254


### Load and validate a trained PyTorch model

In [6]:
import torch

# load a trained PyTorch model (see 'pytorch_saving_demo.ipynb')
mlr_torch = torch.jit.load("./mlr_torch.pth")

# predict
with torch.no_grad():
    scores = mlr_torch(torch.Tensor(Xtr)).detach().numpy()   
ytr_hat = np.argmax(scores,axis=1)

# evaluate
acc = np.mean(ytr_hat == ytr)
print('accuracy of model: ',acc)

accuracy of model:  0.79254


### Running our Logistic Regression

In [7]:
sklearn_model_file = 'Logistic_model.sav'

# load a trained instance of sklearn.linear_model.LogisticRegression
model = pickle.load(open(sklearn_model_file,'rb'))


# predict
ytr_hat = model.predict(Xtr_scale)


# evaluate
acc = np.mean(ytr_hat == ytr)
print('accuracy of model: ',acc)

accuracy of model:  0.86993


### Running our SVC

In [8]:
sklearn_model_file = 'SVC_model.sav'

# load a trained instance of sklearn.linear_model.LogisticRegression
model = pickle.load(open(sklearn_model_file,'rb'))


# predict
ytr_hat = model.predict(Xtr_scale)


# evaluate
acc = np.mean(ytr_hat == ytr)
print('accuracy of model: ',acc)

accuracy of model:  0.87188


### Running our SVM 

In [9]:
sklearn_model_file = 'SVM_model.sav'

# load a trained instance of sklearn.linear_model.LogisticRegression
model = pickle.load(open(sklearn_model_file,'rb'))


# predict
ytr_hat = model.predict(Xtr_scale)


# evaluate
acc = np.mean(ytr_hat == ytr)
print('accuracy of model: ',acc)

accuracy of model:  0.92225


### Loading our trained 2-layer NN model

In [10]:
#Load our model in
NN2layer = torch.jit.load('./saved_model2NN.pth')

#Predict the training accuracy
with torch.no_grad():
    scores = NN2layer(torch.Tensor(Xtr_scale)).detach().numpy()
ytr_hat = np.argmax(scores,axis=1)

# evaluate
acc = np.mean(ytr_hat == ytr)
print('Accuracy of 2-Layer NN Model: ',acc)

Accuracy of 2-Layer NN Model:  0.91718


### Loading our train CNN model

In [11]:
#Load our model in
CNN = torch.jit.load('./saved_model_CNN.pth')

#Predict the training accuracy
with torch.no_grad():
    scores = CNN(torch.Tensor(Xtr_scale).unsqueeze(1)).detach().numpy()
ytr_hat = np.argmax(scores,axis=1)

# evaluate
acc = np.mean(ytr_hat == ytr)
print('Accuracy of CNN Model: ',acc)

Accuracy of CNN Model:  0.90524
