In [7]:
import os

import numpy as np
import pandas as pd
import mlrose_hiive as ml_h
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import f1_score

In [3]:
SEED = 42

In [4]:
df = pd.read_csv('datasets/wine-quality/wine-quality-all.csv', delimiter=';', encoding='utf-8')
X = df.drop('quality', axis=1)
y = df['quality']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=SEED)

X_train_color = X_train['color'].values.reshape(-1, 1)
X_test_color = X_test['color'].values.reshape(-1, 1)

In [5]:
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')

X_train_encoded_color = encoder.fit_transform(X_train_color)
X_test_encoded_color = encoder.transform(X_test_color)

# Step 4: Concatenate the encoded 'color' column with the original data
X_train_reset = X_train.drop(columns=['color']).reset_index(drop=True)
X_train_encoded_color_reset = pd.DataFrame(X_train_encoded_color,
                                           columns=encoder.get_feature_names_out(['color'])).reset_index(drop=True)
X_train_e = pd.concat([X_train_reset, X_train_encoded_color_reset], axis=1)

X_test_reset = X_test.drop(columns=['color']).reset_index(drop=True)
X_test_encoded_color_reset = pd.DataFrame(X_test_encoded_color,
                                          columns=encoder.get_feature_names_out(['color'])).reset_index(drop=True)
X_test_e = pd.concat([X_test_reset, X_test_encoded_color_reset], axis=1)

X_all_e = pd.concat((X_train_e, X_test_e), axis=0)

#
scaler = MinMaxScaler()

X_train_n = scaler.fit_transform(X_train_e)
X_test_n = scaler.transform(X_test_e)

X_all_n = np.concatenate((X_train_n, X_test_n), axis=0)




In [10]:
X_val, X_test, y_val, y_test = train_test_split(X_test_n, y_test, test_size=0.5, random_state=SEED)

# Encode labels
one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(y_train.values.reshape(-1, 1)).todense()
y_val_hot = one_hot.transform(y_val.values.reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(y_test.values.reshape(-1, 1)).todense()

In [53]:
nn_model1 = ml_h.NeuralNetwork(hidden_nodes=[50, 50], activation='relu',
                                 algorithm='gradient_descent', max_iters=2000,
                                 bias=True, is_classifier=True, learning_rate=0.0001,
                                 early_stopping=False, clip_max=20, max_attempts=200,
                                 random_state=SEED)
y_train_pred = nn_model1.predict(X_train_n)
y_test_pred = nn_model1.predict(X_test_n)
nn_model1.fit(X_train_n, y_train_hot)

In [54]:


y_train_micro_f1 = f1_score(np.asarray(y_train_hot), np.asarray(y_train_pred), average='micro')
y_test_micro_f1 = f1_score(np.asarray(y_test_hot), np.asarray(y_test_pred), average='micro')
print(y_train_micro_f1)
print(y_test_micro_f1)

0.5716635338345865
0.5573308270676691


In [25]:
y_train

2629    8
511     7
62      5
3128    5
4233    6
       ..
3092    5
3772    5
5191    6
5226    5
860     6
Name: quality, Length: 4256, dtype: int64

In [26]:
for i in range(10):
    print(y_train_hot[i])


[[0. 0. 0. 0. 0. 1. 0.]]
[[0. 0. 0. 0. 1. 0. 0.]]
[[0. 0. 1. 0. 0. 0. 0.]]
[[0. 0. 1. 0. 0. 0. 0.]]
[[0. 0. 0. 1. 0. 0. 0.]]
[[0. 0. 0. 1. 0. 0. 0.]]
[[0. 0. 1. 0. 0. 0. 0.]]
[[0. 0. 1. 0. 0. 0. 0.]]
[[0. 0. 0. 1. 0. 0. 0.]]
[[0. 0. 0. 0. 1. 0. 0.]]


In [27]:
for i in range(100):
    print(y_train_pred[i])

[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 1 0 0 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 0 1 0 0]
[0 0 1 0 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 0 1 0 0]
[0 0 0 1 0 0 0]
[0 0 0 1 0 0 0]
[0 0 1 0 0 0 0]
[0 0 0 1 0 0 0]
[0 0 0 0

In [28]:
y_train_hot

matrix([[0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])