# Wine Data Set

The features contained in this dataset are: 
    1. Fixed acidity 
    2. Volatile acidity
    3. Citric acid
    4. Residual sugar
    5. Chlorides
    6. Free sulfur dioxide
    7. Total sulfur dioxide
    8. Density
    9. PH
    10. Sulphates
    11. Alcohol
    12. Quality 
And the label column (called style) identifies if a certain wine is white (1) or red (0). <br>
This dataset is available to download [here](https://www.kaggle.com/sgus1318/winedata). 

## Preprocessing

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

Using TensorFlow backend.


In [2]:
df = pd.read_csv('wine_dataset.csv')
df = df.fillna(0)
df.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,style
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red


In [3]:
scaler = preprocessing.StandardScaler()
columns = df.columns[0:12]
df[columns] = scaler.fit_transform(df[columns])
df.head()

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,style
0,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,-0.93723,red
1,0.451036,3.282235,-2.192833,-0.59764,1.197975,-0.31132,-0.862469,0.701486,-0.115073,0.999579,-0.580068,-0.93723,red
2,0.451036,2.5533,-1.917553,-0.660699,1.026697,-0.874763,-1.092486,0.768188,0.25812,0.797958,-0.580068,-0.93723,red
3,3.073817,-0.362438,1.661085,-0.744778,0.541412,-0.762074,-0.986324,1.101694,-0.363868,0.32751,-0.580068,0.207999,red
4,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,-0.93723,red


In [4]:
label_encoder = preprocessing.LabelEncoder()
df['style'] = label_encoder.fit_transform(df['style'])
print(df['style'])

0       0
1       0
2       0
3       0
4       0
5       0
6       0
7       0
8       0
9       0
10      0
11      0
12      0
13      0
14      0
15      0
16      0
17      0
18      0
19      0
20      0
21      0
22      0
23      0
24      0
25      0
26      0
27      0
28      0
29      0
       ..
6467    1
6468    1
6469    1
6470    1
6471    1
6472    1
6473    1
6474    1
6475    1
6476    1
6477    1
6478    1
6479    1
6480    1
6481    1
6482    1
6483    1
6484    1
6485    1
6486    1
6487    1
6488    1
6489    1
6490    1
6491    1
6492    1
6493    1
6494    1
6495    1
6496    1
Name: style, Length: 6497, dtype: int32


In [5]:
df.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,style
0,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,-0.93723,0
1,0.451036,3.282235,-2.192833,-0.59764,1.197975,-0.31132,-0.862469,0.701486,-0.115073,0.999579,-0.580068,-0.93723,0
2,0.451036,2.5533,-1.917553,-0.660699,1.026697,-0.874763,-1.092486,0.768188,0.25812,0.797958,-0.580068,-0.93723,0
3,3.073817,-0.362438,1.661085,-0.744778,0.541412,-0.762074,-0.986324,1.101694,-0.363868,0.32751,-0.580068,0.207999,0
4,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,-0.93723,0


In [6]:
df.to_csv("wine_data.csv", index=False)

In [7]:
df = pd.read_csv('wine_data.csv')
df = df.fillna(0)
df.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,style
0,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,-0.93723,0
1,0.451036,3.282235,-2.192833,-0.59764,1.197975,-0.31132,-0.862469,0.701486,-0.115073,0.999579,-0.580068,-0.93723,0
2,0.451036,2.5533,-1.917553,-0.660699,1.026697,-0.874763,-1.092486,0.768188,0.25812,0.797958,-0.580068,-0.93723,0
3,3.073817,-0.362438,1.661085,-0.744778,0.541412,-0.762074,-0.986324,1.101694,-0.363868,0.32751,-0.580068,0.207999,0
4,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,-0.93723,0


## Machine Learning Classification

In [17]:
X_train, X_test, y_train, y_test=train_test_split(df.iloc[:,0:12], df['style'], test_size=0.33, random_state=8)

In [18]:
# Initialize the constructor
model=Sequential()
# Add an input layer 
model.add(Dense(13, activation='relu', input_shape=(12,)))

# Add one hidden layer 
model.add(Dense(60, activation='relu'))
model.add(Dense(30, activation='relu'))

# Add an output layer 
model.add(Dense(1, activation='relu'))

model.output_shape
# Model summary
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 13)                169       
_________________________________________________________________
dense_6 (Dense)              (None, 60)                840       
_________________________________________________________________
dense_7 (Dense)              (None, 30)                1830      
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 31        
Total params: 2,870
Trainable params: 2,870
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(loss='binary_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.fit(X_train, y_train,epochs=400, batch_size=100, verbose=0, callbacks=[EarlyStopping(monitor='acc', patience=200)])

W0804 11:15:22.875810 19396 deprecation_wrapper.py:119] From C:\Users\hp\Anaconda3\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0804 11:15:22.906729 19396 deprecation_wrapper.py:119] From C:\Users\hp\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:3376: The name tf.log is deprecated. Please use tf.math.log instead.

W0804 11:15:22.915704 19396 deprecation.py:323] From C:\Users\hp\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0804 11:15:23.145091 19396 deprecation_wrapper.py:119] From C:\Users\hp\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add in

<keras.callbacks.History at 0x1f5253ccdd8>

## Evaluation

In [10]:
# over the training set
y_pred=model.predict_classes(X_train)
print(confusion_matrix(y_train, y_pred))
print(classification_report(y_train, y_pred))

[[1060   10]
 [   1 3281]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      1070
           1       1.00      1.00      1.00      3282

   micro avg       1.00      1.00      1.00      4352
   macro avg       1.00      1.00      1.00      4352
weighted avg       1.00      1.00      1.00      4352



In [11]:
# over the test set
y_pred=model.predict_classes(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[ 521    8]
 [   3 1613]]
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       529
           1       1.00      1.00      1.00      1616

   micro avg       0.99      0.99      0.99      2145
   macro avg       0.99      0.99      0.99      2145
weighted avg       0.99      0.99      0.99      2145



In [9]:
import tensorflowjs as tfjs

tfjs.converters.save_keras_model(model, 'trainedmodel')

ModuleNotFoundError: No module named 'tensorflowjs'