## PyTorch vs Manual Model for Classification
In this section I would compare the performance of a classification task using PyTorch instances and an instance of the used NNetwork class with the IRIS dataset (lately used to train MNIST dataset).

Import the data

In [21]:
import model_class as mm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import torch
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

In [22]:
np.random.seed(42)
torch.manual_seed(42)

df = pd.read_csv('data/iris.csv', na_values=["NA", "?"])
df.info()
print(f"Labels: {df['species'].unique()}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   sepal_l  150 non-null    float64
 1   sepal_w  150 non-null    float64
 2   petal_l  150 non-null    float64
 3   petal_w  150 non-null    float64
 4   species  150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
Labels: ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


### Training with manual model

In [23]:
def load_mm_data():
    X = df[['sepal_l', 'sepal_w', 'petal_l', 'petal_w']].values.astype(np.float32)
    y = df['species'].values
    
    # encoding the labels in y
    encoder = OneHotEncoder(sparse_output=False)
    y_encoded = encoder.fit_transform(y.reshape(-1,1)).astype(np.int32)
    
    # split training and validation data
    x_train, x_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    # normalize x set
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)
    
    return x_train, x_test, y_train, y_test

def iris_data_wrapper(X, y):
    inputs = [np.reshape(x, (4, 1)) for x in X ]
    results = [np.reshape(y_i, (3, 1)) for y_i in y]
    data = list(zip(inputs, results))
    return data

# load normalized and encoded data
x_train, x_test, y_train, y_test = load_mm_data()

# prepare data for NNetwork
train_data = iris_data_wrapper(x_train, y_train)
test_data = iris_data_wrapper(x_test, y_test)

# initialize and train the network
net = mm.NNetwork([4, 30, 3]) 

In [24]:
# train with stochastic gradient descend
net.sgd(train_data, epochs=30, mini_batch_size=10, eta_learning_rate=0.5, test_data=test_data)   

bias sample b[0][5] [-0.12534931]


TypeError: only length-1 arrays can be converted to Python scalars