In [1]:
import pandas            as pd
import numpy             as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# import dataset 
white_wine = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep =";")
red_wine = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=";")

In [3]:
red_wine["type"] = 0
white_wine["type"] = 1
wines = red_wine.append(white_wine, ignore_index= True)
wines

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,type
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,0
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6,0
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6,1
6493,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5,1
6494,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6,1
6495,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7,1


In [4]:
# create features and label 
x = wines.drop(columns= ["type"])
y = np.ravel(wines.type)

# split into train and test dataset 
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.33, random_state= 42)
print(x_train.shape)
print(x_test.shape)

(4352, 12)
(2145, 12)


In [5]:
# preprocessing 
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [6]:
# create the ANN model 
model = Sequential()

# add layers 
model.add(Dense(12, activation= "relu", input_shape = (12,))) # input layers 
model.add(Dense(8, activation= "relu")) # hidden layers 
model.add(Dense(1, activation= "sigmoid")) # Output layer

print(model.get_weights())
print(model.output_shape)

[array([[-0.08188176,  0.38857186,  0.03640628, -0.32731318, -0.16255713,
        -0.2633363 , -0.32453942, -0.26175916,  0.3360244 , -0.29483247,
         0.36785305,  0.44811738],
       [-0.07595193,  0.49125195,  0.15490627, -0.25395477, -0.22461152,
        -0.10399616,  0.0207783 , -0.3779087 ,  0.2800659 , -0.19678605,
         0.02203178, -0.36757314],
       [-0.05561948,  0.43246925, -0.05111063,  0.09080756, -0.21758282,
         0.03882682,  0.42539704,  0.38295114, -0.03951001, -0.09436524,
        -0.36330616, -0.44570696],
       [-0.06545615, -0.4521445 ,  0.20029616, -0.22680664,  0.32686877,
         0.27462435,  0.47919858,  0.14340436,  0.08417928, -0.23150098,
        -0.29538608, -0.04053319],
       [ 0.08372235, -0.3595475 ,  0.23462868, -0.29938912,  0.34270108,
        -0.42103744,  0.40204597, -0.41772115, -0.48956692, -0.40273297,
         0.42819297,  0.32852995],
       [-0.13264942, -0.42083   , -0.42424715,  0.24050784,  0.36347592,
         0.20949948, 

In [7]:
# Complile the model 
model.compile(loss= "binary_crossentropy", optimizer = "adam", metrics= ["accuracy"])
# fit the model 
model.fit(x_train,y_train, batch_size= 1, epochs= 20, verbose= 1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1d8fd5fdd90>

In [8]:
# make predictions on the test dataset 
y_pred = model.predict(x_test)
print(y_pred)
predictions = [1 if p > 0.5 else 0 for p in y_pred]
print(predictions[:20])

[[0.99977744]
 [0.00293019]
 [0.9997349 ]
 ...
 [0.99917996]
 [0.99442124]
 [0.99995863]]
[1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1]


In [9]:
# create dataframes of actual vs predicted values 
df = pd.DataFrame({
    "Actual_values": y_test,
    "Predicted_values": predictions
})
df[:20]

Unnamed: 0,Actual_values,Predicted_values
0,1,1
1,0,0
2,1,1
3,1,1
4,1,1
5,0,0
6,1,1
7,1,1
8,0,0
9,1,1


In [10]:
#Evaluate the model 
score = model.evaluate(x_test, y_test, verbose = 1)
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, classification_report
cmat= confusion_matrix(y_test, y_pred.round())
accuracy = accuracy_score(y_test, y_pred.round())
precision = precision_score(y_test, y_pred.round())
recall = recall_score(y_test, y_pred.round())
print("Precision:", precision)
print(f"accuracy: {accuracy}")
print(f"recall: {recall}")
print(f"confusion matrix:\n {cmat}")
print("\nclassification report:", "\n",classification_report(y_test, y_pred.round()))

Precision: 0.9943573667711598
accuracy: 0.9948717948717949
recall: 0.9987405541561712
confusion matrix:
 [[ 548    9]
 [   2 1586]]

classification report: 
               precision    recall  f1-score   support

           0       1.00      0.98      0.99       557
           1       0.99      1.00      1.00      1588

    accuracy                           0.99      2145
   macro avg       1.00      0.99      0.99      2145
weighted avg       0.99      0.99      0.99      2145

