In [15]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential # model
from tensorflow.keras.layers import Activation, Dense, LeakyReLU # Type of layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from functions import *
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import SMOTE
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

In [None]:
physical_devices = tf.config.experimental.list_physical_devices("GPU")
print("NUM GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
df = pd.read_csv("assignment-files/wine.csv", index_col="index")
print(df.columns)
print(df.shape)
df = df.drop_duplicates()
print(df.shape)

In [None]:
y = df["target"].values
X = df.drop(["quality","target"], axis=1).values
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.2, stratify=y, shuffle=True)

In [None]:
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
layer = Dense(units=20, activation="relu")
model = Sequential([
    Dense(units=10, activation="relu", input_shape=(12,)),
    layer,
    Dense(units=40, activation="relu"),
    Dense(units=2, activation="softmax")
])
# 20 40 10 2

In [None]:
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=50)
mc = ModelCheckpoint('best_model_unbalanced.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

In [None]:
learning_rate = 0.0005
loss = "sparse_categorical_crossentropy"
metrics = "accuracy"
model.compile(optimizer=Adam(learning_rate=learning_rate),
                             loss=loss, metrics=metrics)

In [None]:
history = model.fit(x=X_train, y=y_train, batch_size=5,
                    epochs=500, verbose=0, validation_split=0.4,
                    callbacks=[es, mc])

In [None]:
best_val_model = load_model("best_model_unbalanced.h5")

In [None]:
predictions = best_val_model.predict(x=X_test, batch_size=100)
rounded_predictions = np.argmax(predictions, axis=-1)
cm = confusion_matrix(y_test,rounded_predictions)
plot_labels = ["bad","good"]

In [None]:
results = best_val_model.evaluate(X_test, y_test, batch_size=500, verbose=0)
print("test loss, test acc:", results)
print(y_test.shape)
plot_confusion_matrix(cm=cm, classes=plot_labels,
                      title= "CM using unbalanced, categorical data",
                      savefig_location="visuals/cm_best_unbalanced_NN_categories.png")

In [None]:
plot_and_print_loss(history)