# Classification of Iris dataset

In [None]:
from sklearn.datasets import load_iris

In [None]:
iris_data = load_iris()

In [None]:
print(iris_data.keys())

In [None]:
N, D = iris_data.data.shape
print("Number of samples: ", N)
print("Number of features: ", D)

In [None]:
print(iris_data.DESCR)

In [None]:
import pandas as pd
iris = pd.DataFrame(iris_data.data)

In [None]:
iris.head()

In [None]:
iris.columns = iris_data.feature_names

In [None]:
iris.head()

In [None]:
iris['class'] = iris_data.target

In [None]:
iris.head()

Classification is supervised learning in which the response is categorical
- "0": setosa
- "1": versicolor
- "2": virginica

In [None]:
class_names = ["setosa", "versicolor", 'virginica']

# Data preprocessing

In [None]:
# Load packages
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
import numpy as np
X = np.array(iris_data.data)
X = scaler.fit_transform(X)

In [None]:
Y = np.array(iris_data.target)

##### Split the dataset into train and test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [None]:
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

## Data Loading

## Three steps
1. Define initialization method (\_\_init\_\_)
2. Define length method (\_\_len\_\_)
3. Define method to return one item on the index (\_\_getitem\_\_)

In [None]:
from torch.utils.data import Dataset

class ClassificationDataset(Dataset):
    def __init__(self, data, output):
        self.data = data
        self.output = output

    def __len__(self):
        return len(self.data)

    def __getitem__(self, ind):
        return self.data[ind], self.output[ind]

In [None]:
train_dataset = ClassificationDataset(X_train, Y_train)
test_dataset = ClassificationDataset(X_test, Y_test)

In [None]:
from torch.utils.data import DataLoader

In [None]:
train_loader = DataLoader(train_dataset, batch_size=50, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=50)

# Define Model

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

In [None]:
class ClassificationModel(nn.Module):
    def __init__(self, feature_dim, hidden_dim, output_dim):
        super(ClassificationModel, self).__init__()
        
        self.hidden1 = nn.Linear(feature_dim, hidden_dim)
        self.hidden2 = nn.Linear(hidden_dim, hidden_dim)
        self.predict = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(0.4)
        
    def forward(self, x):
        x = self.dropout(F.relu(self.hidden1(x)))
        x = self.dropout(F.relu(self.hidden2(x)))
        x = self.predict(x)
        return x

In [None]:
hidden_dim = 100
output_dim = 3
model = ClassificationModel(D, hidden_dim, output_dim)

In [None]:
print(model)

In [None]:
from draw_neural_net import draw_neural_net
from matplotlib import pyplot as plt
# %matplotlib inline

In [None]:
# fig = plt.figure(figsize=(12, 12))
# ax = fig.gca()
# ax.axis('off')
# draw_neural_net(ax, .1, .9, .1, .9, np.array([D, hidden_dim, hidden_dim, output_dim]).astype(int))
# plt.show()

# Training Strategy

- We need Loss function to compute the prediction error
- and an optimization function to update the parameter

##### Let's define values to setup training process

In [None]:
num_epochs = 100 
lr = 0.01

In [None]:
criterion = torch.nn.CrossEntropyLoss() # log_softmax and NLL loss
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
from tqdm import trange

In [None]:
# Training loop
losses = []
epochs = trange(num_epochs, desc="Training Loss")
for epoch in epochs:
    running_loss = 0
    for data in train_loader:
        # get the data
        inputs, outputs = data
        inputs = inputs.float()
        outputs = outputs.long()
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass: Calculate predicted price by passing x to the model
        y_pred = model(inputs)
        # compute loss
        loss = criterion(y_pred, outputs)
        running_loss += loss.item()
        # perform backward pass and update the parameters
        loss.backward()
        optimizer.step()
    
    epochs.set_description("Training_loss: %g" % running_loss)
    losses.append(running_loss)

In [None]:
plt.plot(losses)
plt.show()

#### Evaluate the model

In [None]:
with torch.no_grad():
    model.eval()
    
    preds = []
    for data in test_loader:
        # get the data
        inputs, outputs = data
        inputs = inputs.type(torch.FloatTensor)
        outputs = outputs.type(torch.FloatTensor)
        
        pred = model(inputs)
        preds.append(pred.numpy())

    prediction = np.concatenate(preds)

In [None]:
Y_pred = np.argmax(prediction, 1)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
print(accuracy_score(Y_pred, Y_test))

In [None]:
conf_mat = confusion_matrix(Y_pred, Y_test)

In [None]:
from cm import print_confusion_matrix

In [None]:
fig = print_confusion_matrix(conf_mat, class_names)
plt.show()