In [1]:
import numpy as np
import pandas as pd
from layer import *
from network import *

In [2]:
# dataset and preprocessing imports, no ML models imported
from sklearn.datasets import load_wine
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load and prepare data

In [3]:
def load_dataset():
    data = load_wine()
    df = pd.DataFrame(data["data"], columns=data["feature_names"])
    df["target"] = [data["target_names"][target_idx] for target_idx in data["target"]]
    return df

In [4]:
df = load_dataset()
df

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,class_0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,class_0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,class_0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,class_0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,class_0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,class_2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,class_2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,class_2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,class_2


In [5]:
def prepare_data(df):
    df = df.copy()
    df = df.rename(columns={"od280/od315_of_diluted_wines": "od280/od315"})
    df = pd.get_dummies(df, columns=["target"], dtype=int)
    return df
    
df = prepare_data(df)
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315,proline,target_class_0,target_class_1,target_class_2
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,1,0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,1,0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,1,0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,1,0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,1,0,0


#### Check null values and data types

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   alcohol               178 non-null    float64
 1   malic_acid            178 non-null    float64
 2   ash                   178 non-null    float64
 3   alcalinity_of_ash     178 non-null    float64
 4   magnesium             178 non-null    float64
 5   total_phenols         178 non-null    float64
 6   flavanoids            178 non-null    float64
 7   nonflavanoid_phenols  178 non-null    float64
 8   proanthocyanins       178 non-null    float64
 9   color_intensity       178 non-null    float64
 10  hue                   178 non-null    float64
 11  od280/od315           178 non-null    float64
 12  proline               178 non-null    float64
 13  target_class_0        178 non-null    int64  
 14  target_class_1        178 non-null    int64  
 15  target_class_2        1

# Train-test split and scaling X between 0 and 1

In [7]:
def Xy_split(df):
    df = df.copy()
    y_cols = ["target_class_0", "target_class_1", "target_class_2"]
    y = df[y_cols]
    X = df.drop(y_cols, axis=1)
    return X,y

In [8]:
X,y = Xy_split(df)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# assure that data is converted to np.array
X_train, X_test, y_train, y_test = [np.array(a) for a in [X_train, X_test, y_train, y_test]]

In [10]:
def scale_data(X_train, X_test):
    X_train, X_test = X_train.copy(), X_test.copy()
    scaler = MinMaxScaler()
    # fit the scaler only using X_train
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return X_train, X_test

In [11]:
X_train, X_test = scale_data(X_train, X_test)

In [12]:
def transpose_data(X_train, X_test, y_train):
    # necessary for this specific neural net
    # y_test doesn't need to be transposed
    X_train, X_test, y_train = X_train.copy(), X_test.copy(), y_train.copy()
    X_train, X_test, y_train = X_train.T, X_test.T, y_train.T
    return X_train, X_test, y_train

In [13]:
X_train, X_test, y_train = transpose_data(X_train, X_test, y_train)

In [14]:
def get_Ninputs_Noutputs(X_train, y_train):
    # each column is a sample (already transposed)
    n_inputs = X_train.shape[0]
    n_outputs = y_train.shape[0]
    return n_inputs, n_outputs

In [15]:
n_inputs, n_outputs = get_Ninputs_Noutputs(X_train, y_train)

# Train Model

In [16]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

In [17]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    sx = sigmoid(x)
    return sx * (1 - sx)

In [18]:
hidden1 = HiddenLayer([n_inputs, 20], relu, relu_derivative)
hidden2 = HiddenLayer([20, 10], relu, relu_derivative)
out = LastLayer([10,n_outputs], sigmoid, sigmoid_derivative)

net = Network([hidden1, hidden2, out])

In [19]:
net.fit(X_train, y_train, learning_rate=0.1, epochs=150, batch_size=10)

Epoch: 149	Error:0.070300720779149668

In [20]:
y_pred = net.forward_propagate(X_test)[-1]
y_pred = y_pred.T

In [21]:
def get_category(y_sample):
    cont_y = list(y_sample)
    max_value = max(y_sample)
    max_idx = cont_y.index(max_value)
    return max_idx

In [22]:
y_pred = [get_category(y_sample) for y_sample in y_pred]
y_test = [get_category(y_sample) for y_sample in y_test]

# Results

In [23]:
print("Real:     ",y_test)
print("Predicted:",y_pred)
print("Accuracy =",round(accuracy_score(y_test, y_pred)*100, 2), "%")

Real:      [2, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 1, 2, 1, 0, 2, 0, 0, 0, 2, 1, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 0]
Predicted: [2, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 1, 2, 1, 0, 2, 0, 0, 0, 2, 1, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 0]
Accuracy = 100.0 %
