In [1]:
# Download the wine .csv files from data archive
!rm -f winequality-red.csv winequality-white.csv
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv

# These are the packages required for this assignment
import pandas as pd
import numpy as np


--2024-05-17 03:39:25--  https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘winequality-red.csv’

winequality-red.csv     [  <=>               ]  82.23K   213KB/s    in 0.4s    

2024-05-17 03:39:26 (213 KB/s) - ‘winequality-red.csv’ saved [84199]

--2024-05-17 03:39:26--  https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘winequality-white.csv’

winequality-white.c     [   <=>              ] 258.23K   500KB/s    in 0.5s    

2024-05-17

In [2]:
# Use Pandas to read the csv file into a dataframe.
# Note that the delimiter in this csv is the semicolon ";" instead of a ,
df_red = pd.read_csv('winequality-red.csv',delimiter=";")

# Because we are performing a classification task, we will assign all red wine a label of 1
df_red["color"] = 1

# The method .head() is super useful for seeing a preview of our data!
df_red.head()

df_white = pd.read_csv('winequality-white.csv',delimiter=";")
df_white["color"] = 0  #assign white wine the label 0
df_white.head()

# Now we combine our two dataframes
df = pd.concat([df_red, df_white])

# And shuffle them in place to mix the red and white wine data together
df = df.sample(frac=1).reset_index(drop=True)
df.head()

# We choose three attributes of the wine to perform our prediction on
input_columns = ["citric acid", "residual sugar", "total sulfur dioxide"]
output_columns = ["color"]

# We extract the relevant features into our X and Y numpy arrays
X = df[input_columns].to_numpy()
Y = df[output_columns].to_numpy()
print("Shape of X:", X.shape)
print("Shape of Y:", Y.shape)
in_features = X.shape[1]

Shape of X: (6497, 3)
Shape of Y: (6497, 1)


In [None]:
class WineClassifierModel():
  def __init__(self, in_features):
    self.w = 0.01 * np.random.randn(in_features)
    self.b = 0.01 * np.random.randn()
    self.non_zero_tolerance = 1e-8

  def forward(self,x):
    self.z = x @ self.w.T + self.b
    self.a = self.activation(self.z)
    return self.a

  def activation(self,z):
    return 1 / (1 + np.exp(-z) + self.non_zero_tolerance)

  def gradient(self,x):
    self.dw = self.a * (1- self.a) * x
    self.db = self.a * (1- self.a)

  def update(self, grad_loss, lr):
    self.w -= grad_loss * self.dw * lr
    self.b -= grad_loss * self.db * lr


In [None]:
def train_model_NLL_loss(model, input_data, output_data, learning_rate, num_epochs):
    non_zero_tolerance = 1e-8
    num_samples = len(input_data)
    for epoch in range(1, num_epochs+1):
        total_loss = 0
        for i in range(num_samples):
            x = input_data[i,...]
            y = output_data[i]
            y_predicted = model.forward(x)
            loss = -(y * np.log(y_predicted + non_zero_tolerance) + (1-y) * np.log(1-y_predicted + non_zero_tolerance))
            total_loss += loss

            model.gradient(x)
            grad_loss = (y_predicted - y)/(y_predicted * (1-y_predicted))
            model.update(grad_loss, learning_rate)

        report_every = max(1, num_epochs // 10)
        if epoch == 1 or epoch % report_every == 0: #every few epochs, report
            print("epoch", epoch, "has total loss", total_loss/ num_samples)

In [None]:
# We will use this function to evaluate how well our trained classifier perfom
# Hint: the model you define above must have a .forward function in order to be compatible
# Hint: this evaluation function is identical to those in previous notebooks
def evaluate_classification_accuracy(model, input_data, labels):
    # Count the number of correctly classified samples given a set of weights
    correct = 0
    num_samples = len(input_data)
    for i in range(num_samples):
        x = input_data[i,...]
        y = labels[i]
        y_predicted = model.forward(x)
        label_predicted = 1 if y_predicted > 0.5 else 0
        if label_predicted == y:
            correct += 1
    accuracy = correct / num_samples
    print("Our model predicted", correct, "out of", num_samples,
          "correctly for", accuracy*100, "% accuracy")
    return accuracy

In [None]:
# train the model...
lr = 0.001
epochs = 250

model = WineClassifierModel(in_features)
# train_model_NLL_loss(model, input_data, output_data, learning_rate=0.01, num_epochs=100, batch_size=32)

train_model_NLL_loss(model, X, Y, lr, epochs)
print("\nFinal weights:")
print(model.w, model.b)

epoch 1 has total loss [0.99485867]
epoch 25 has total loss [0.53205897]
epoch 50 has total loss [0.51737028]
epoch 75 has total loss [0.51783065]
epoch 100 has total loss [0.51874003]
epoch 125 has total loss [0.51879059]
epoch 150 has total loss [0.51833416]
epoch 175 has total loss [0.51773802]
epoch 200 has total loss [0.51700691]
epoch 225 has total loss [0.51606454]
epoch 250 has total loss [0.51510341]

Final weights:
[-1.36638812 -0.4019177  -0.21720915] [11.81553132]


In [None]:
print(
    "Training Accuracy",
    evaluate_classification_accuracy(model, X, Y) * 100,
    "%",
)
# print("Parameters", model.weights, model.bias)

Our model predicted 5862 out of 6497 correctly for 90.2262582730491 % accuracy
Training Accuracy 90.2262582730491 %
