In [2]:
# Download the wine .csv files from data archive
!rm -f winequality-red.csv winequality-white.csv
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv

# These are the packages required for this assignment
import pandas as pd
import numpy as np

--2024-10-08 20:42:25--  https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘winequality-red.csv’

winequality-red.csv     [ <=>                ]  82.23K   510KB/s    in 0.2s    

2024-10-08 20:42:26 (510 KB/s) - ‘winequality-red.csv’ saved [84199]

--2024-10-08 20:42:26--  https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘winequality-white.csv’

winequality-white.c     [  <=>               ] 258.23K  1.02MB/s    in 0.2s    

2024-10-08

In [3]:
df_red = pd.read_csv('winequality-red.csv',delimiter=";")

# Because we are performing a classification task, we will assign all red wine a label of 1
df_red["color"] = 1

# The method .head() is super useful for seeing a preview of our data!
df_red.head()

df_white = pd.read_csv('winequality-white.csv',delimiter=";")
df_white["color"] = 0  #assign white wine the label 0
df_white.head()

# Now we combine our two dataframes
df = pd.concat([df_red, df_white])

# And shuffle them in place to mix the red and white wine data together
df = df.sample(frac=1).reset_index(drop=True)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,color
0,10.6,0.44,0.68,4.1,0.114,6.0,24.0,0.997,3.06,0.66,13.4,6,1
1,6.3,0.13,0.42,1.1,0.043,63.0,146.0,0.99066,3.13,0.72,11.2,7,0
2,8.1,0.25,0.38,3.8,0.051,18.0,129.0,0.9928,3.21,0.38,11.5,6,0
3,6.2,0.3,0.32,1.2,0.052,32.0,185.0,0.99266,3.28,0.44,10.1,5,0
4,8.9,0.61,0.49,2.0,0.27,23.0,110.0,0.9972,3.12,1.02,9.3,5,1


In [4]:
# Separate features and labels
input_columns = df.drop(columns=["color"]).to_numpy()  # Features
labeled_column = df["color"].to_numpy()               # Labels

In [5]:
# Define sigmoid activation function
def sigmoid(z):
  return 1/(1+ np.exp(-z))

In [10]:
# Define sigmoid for a classification model
def single_neuron_classifier(w,w_0,X):
  z = np.dot(X, w) + w_0  # Perform dot product and add bias
  z = np.clip(z, -500, 500)
  return sigmoid(z)

In [14]:
def training_model_NLL_loss(model_function, w, w_0, X, y, lr, epochs):
  non_zero_tolerance = 1e-8
  for epoch in range(epochs):
      # Count loss
      total_loss = 0
      for x_i, y_i in zip(X,y):
        y_hat = model_function(w,w_0,x_i)
        error = y_hat - y_i
        total_loss += (error**2)/2

        # Update bais
        w_0 -= lr * error

        # Update weights
        for j, x_j in enumerate(x_i):
          w[j] -= lr * error * x_j

      # Every progress
      report_every = max(1,epochs // 10)
      if epoch % report_every == 0:
        print("epoch", epoch, "loss", total_loss)

  return w, w_0

In [15]:
# Define function to estimate the accuracy of the trained model
def evaluate_model_accuracy(model_function, w, w_0, X, y):
  total_loss = 0
  n = len(X)
  for x_i, y_i in zip(X,y):
    y_hat = model_function(w,w_0,x_i)
    error = y_hat - y_i
    total_loss += (error**2)/2
  accuracy = total_loss / n
  print("This model has a mean squared error of, ", accuracy)

In [16]:
w = np.zeros(input_columns.shape[1])
w_0 = 0
lr = 0.01
epochs = 200

w, w_0 = training_model_NLL_loss(single_neuron_classifier, w, w_0, input_columns, labeled_column, lr, epochs)

print("trained weights", w)
print("trained bais", w_0)

# Evaluate the accuracy of final model
evaluate_model_accuracy(single_neuron_classifier, w, w_0, input_columns, labeled_column)

epoch 0 loss 387.51875095036877
epoch 20 loss 247.58055203939253
epoch 40 loss 224.74808058903292
epoch 60 loss 217.91215812163603
epoch 80 loss 219.46894024872032
epoch 100 loss 208.36563877892544
epoch 120 loss 190.096819485171
epoch 140 loss 183.92027997548104
epoch 160 loss 188.74637884015303
epoch 180 loss 189.20356874381616
trained weights [ 17.522781   102.06179746 -24.27608884  -7.30557348  15.07359485
   1.00519259  -1.51562675  -4.20103926  50.61790568  68.97908831
 -20.02630034  -6.7496296 ]
trained bais -5.537374485660549
This model has a mean squared error of,  0.014895903660565086
