<a href="https://colab.research.google.com/github/powerSeries/NN-projects/blob/main/project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Abbreviated data preparation for the Wine Quality Problem (Project 1)

In [2]:
# Import Required Libraries  
import matplotlib.pyplot as plt 
import numpy as np 
# This time we need to also import pandas
import pandas as pd 

# Read in white wine data 
# USES PANDAS (pd) to create a PANDAS DataFrame OBJECT:
white = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", sep =';') 

# Read in red wine data 
# USES PANDAS (pd) to create a PANDAS DataFrame OBJECT:
red = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep =';') 

# Add `type` column to `red` with price one  - done with PANDAS
red['type'] = 1
  
# Add `type` column to `white` with price zero - done with PANDAS 
white['type'] = 0
  
# Append `white` to `red` - done with PANDAS
# AFTER THIS WE HAVE ALL WINES (red and white) in a SINGLE pandas DataFrame
wines = red.append(white, ignore_index = True) 

# Import SKLEARN
import sklearn
# Import `train_test_split` from `sklearn.model_selection` 
from sklearn.model_selection import train_test_split 

# Specify the  data - 
X1 = wines.iloc[:, 0:11]
X2 = wines.iloc[:,12]
X = pd.concat([X1,X2],axis = 1)

# Specify the QUALITY target labels and flatten the array 
y = np.ravel(wines.quality) 

# Splitting the data set for training and validating  - Done with SKLEARN
X_train, X_valid, y_train, y_valid = train_test_split( 
                 X, y, test_size = 0.25, random_state = 45) 


In [3]:
# CONVERTING X_train & X_test DataFrame s to TF tensors
# Will USE NumPy, TF & Keras after this
# import tensorflow as tf 

Xtrain = X_train.to_numpy()
X_valid = X_valid.to_numpy()

In [4]:
# In reality:
# [1] ALL THE Xtrain patterns (with their y_train targets) 
# will be used for TRAINING ([TR]), as Xtrain & y_train
# [2] MOST OF THE X_valid patterns (and their y_valid targets) 
#  will be used for VALIDATION ([TT]),  as X_val & y_val
# BUT WE WILL SET ASIDE THE LAST 10 for "testing" ([TS])
#  as X_tst & y_tst

In [5]:
# To separate the last 10 in X_valid, let's first see the shape of X_valid
X_valid.shape

(1625, 12)

In [6]:
# And verify also the shape of y_valid
y_valid.shape

(1625,)

In [7]:
# Retain the first 1615 for validation ([TT])
Xval = X_valid[:1615]
Xval.shape

(1615, 12)

In [8]:
# and now set aside the last 10 for "test"
Xtst = X_valid[1615:]
Xtst.shape

(10, 12)

In [9]:
# SAME FOR THE CORRESPONDING TARGETS
# Retain the first 1615 for validation ([TT])
y_val = y_valid[:1615]
y_val.shape

(1615,)

In [10]:
y_tst = y_valid[1615:]
y_tst.shape

(10,)

In [11]:
y_tst

array([5, 5, 7, 6, 5, 5, 6, 6, 7, 6])

In [12]:
 # NOW, IN ADDITION, CREATE THE TARGETS AS ONE-HOT-ENCODED 4 quality LEVELS

In [13]:
# We will track these few targets through the conversion process
y_train[272:283]

array([5, 4, 6, 5, 5, 6, 7, 6, 5, 8, 5])

In [14]:
# Function create rank-1 arrays where 3,4,5,6,7,8,9 are mapped to 1 or 2 or 3 or 4 
def to_4cs(x):
  lx = len(x)
  results = np.zeros(lx)
  for i in range(lx):
      # print( "start")
      xa = x[i];
      if xa <= 3:
          results[i] = 1      # 1, 2 and 3 map to Q-LEVEL 1 ( BAD Wine)
      elif xa <=6:
          results[i] = 2      # 4, 5 and 6 map to Q-LEVEL 2  (MEDIUM Wine)
      elif xa <=8:
          results[i] = 3      # 7 and 8 and 6 map to Q-LEVEL 3  (GOOD Wine)
      else:
          results[i] = 4      # 9 and above map to Q-LEVEL 4  (EXCELLENT Wine)
    # results[i, label] = 1.
  results = results.astype(int)
  return results

In [15]:
train_labels = to_4cs(y_train)
val_labels = to_4cs(y_val)
tst_labels = to_4cs(y_tst)

In [16]:

# Let's verify that the trainnig targets that we are tracking
# were converted to levels (1 = BAD; 2 = MEDIUM; 3 = GOOD; 4- EXCELLENT) correctly:
train_labels[272:283]

array([2, 2, 2, 2, 2, 2, 3, 2, 2, 3, 2])

In [17]:
# NOW, ONE-HOT ENCODING OF ALL 3  TARGET ARRAYS
# define a function to do the one-hot-encoding of output labels

def to_one_hot(labels, dimension=4):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels-1):
        results[i, label] = 1.
    return results

one_hot_train_labels = to_one_hot(train_labels)
one_hot_val_labels = to_one_hot(val_labels)
one_hot_tst_labels = to_one_hot(tst_labels)

In [18]:
#Let's verify that the training targets we have tracked were
# one-hot encoded correctly
one_hot_train_labels[272:283,]

array([[0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.]])

In [19]:
# SO, AFTER EXECUTING THIS CELL, YOU WILL HAVE:
# FOR TRAINING:
#  Xtrain (4872, 12)...y_train (4872,)...train_labels(4872,)....one_hot_train_labels (4872,4)
# FOR VALIDATING:
#  Xval (1615, 12)...y_val (1615,)...val_labels(1615,)...one_hot_val_labels (1615,4)
# FOR TESTING:
#  Xtst (10, 12)...y_tst (10,)...tst_labels(10,)... one_hot_tst_labels (10,4)
# PLEASE DO NOT CHANGE THE NAMES OF THESE VARIABLES (So that instructor can use them)

 ++++  END OF THE DATA PREPARATION PART ++++ 


# Part 2: Regression Model

In [20]:
# import libraries
from tensorflow import keras
from tensorflow.keras import layers

def build_regmodl1():
  regmodl1 = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(8, activation="relu"),
    layers.Dense(1),
  ])
  regmodl1.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
  return regmodl1

regmod1 = build_regmodl1()

regmod1.fit(x=Xtrain, y=train_labels, epochs=100, verbose=2)

regmod1.summary()

Epoch 1/100
153/153 - 2s - loss: 0.8890 - mae: 0.6400 - 2s/epoch - 14ms/step
Epoch 2/100
153/153 - 0s - loss: 0.4055 - mae: 0.4809 - 227ms/epoch - 1ms/step
Epoch 3/100
153/153 - 0s - loss: 0.3060 - mae: 0.4176 - 214ms/epoch - 1ms/step
Epoch 4/100
153/153 - 0s - loss: 0.2831 - mae: 0.4214 - 206ms/epoch - 1ms/step
Epoch 5/100
153/153 - 0s - loss: 0.2391 - mae: 0.3809 - 221ms/epoch - 1ms/step
Epoch 6/100
153/153 - 0s - loss: 0.2212 - mae: 0.3643 - 243ms/epoch - 2ms/step
Epoch 7/100
153/153 - 0s - loss: 0.2054 - mae: 0.3479 - 208ms/epoch - 1ms/step
Epoch 8/100
153/153 - 0s - loss: 0.1889 - mae: 0.3360 - 220ms/epoch - 1ms/step
Epoch 9/100
153/153 - 0s - loss: 0.1829 - mae: 0.3283 - 220ms/epoch - 1ms/step
Epoch 10/100
153/153 - 0s - loss: 0.1757 - mae: 0.3232 - 218ms/epoch - 1ms/step
Epoch 11/100
153/153 - 0s - loss: 0.1685 - mae: 0.3123 - 228ms/epoch - 1ms/step
Epoch 12/100
153/153 - 0s - loss: 0.1677 - mae: 0.3107 - 212ms/epoch - 1ms/step
Epoch 13/100
153/153 - 0s - loss: 0.1634 - mae: 0.3

In [None]:
# reg mod 2
def build_regmodl2():
  model = keras.Sequential([
    layers.Dense(128, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(30, activation="relu"),
    layers.Dense(5, activation="relu"),
    layers.Dense(1, activation="relu"),
  ])
  model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
  return model;

regmodl2 = build_regmodl2();

regmodl2.fit(x=Xtrain, y=train_labels, epochs=100, verbose=2)

regmodl2_loss, regmodl2_mae = regmodl2.evaluate(Xval, val_labels)

regmodl2.summary()

In [None]:
# reg mod 3
def build_regmodl3():
  model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(125, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(30, activation="relu"),
    layers.Dense(5, activation="relu"),
    layers.Dense(1)                    
  ])
  model.compile