In [29]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
df = pd.read_csv("/content/winequality-red.csv")

In [4]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [5]:
df['good_quality'] = (df['quality'] >= 7).astype(int)

df = df.drop('quality', axis=1)

In [6]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,good_quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,0
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0


Data pre-processing

In [8]:
X = df.drop('good_quality', axis=1)
y = df['good_quality']

In [43]:
df.head(10)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,good_quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,0
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,0
5,7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,0
6,7.9,0.6,0.06,1.6,0.069,15.0,59.0,0.9964,3.3,0.46,9.4,0
7,7.3,0.65,0.0,1.2,0.065,15.0,21.0,0.9946,3.39,0.47,10.0,1
8,7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.9968,3.36,0.57,9.5,1
9,7.5,0.5,0.36,6.1,0.071,17.0,102.0,0.9978,3.35,0.8,10.5,0


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Logistic Regression

In [12]:
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)

In [13]:
logreg_pred = logreg_model.predict(X_test)

In [14]:
logreg_accuracy = accuracy_score(y_test, logreg_pred)
logreg_f1 = f1_score(y_test, logreg_pred)

In [15]:
print("Logistic Regression Accuracy:", logreg_accuracy)
print("Logistic Regression F1 Score:", logreg_f1)

Logistic Regression Accuracy: 0.865625
Logistic Regression F1 Score: 0.37681159420289856


K-nearest neighbours

In [17]:
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)

In [18]:
knn_accuracy = accuracy_score(y_test, knn_pred)
knn_f1 = f1_score(y_test, knn_pred)

In [19]:
print("K-Nearest Neighbors Accuracy:", knn_accuracy)
print("K-Nearest Neighbors F1 Score:", knn_f1)

K-Nearest Neighbors Accuracy: 0.88125
K-Nearest Neighbors F1 Score: 0.5128205128205128


Decision Tree Classifier

In [21]:
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

In [22]:
dt_accuracy = accuracy_score(y_test, dt_pred)
dt_f1 = f1_score(y_test, dt_pred)

In [23]:
print("Decision Trees Classifier Accuracy:", dt_accuracy)
print("Decision Trees Classifier F1 Score:", dt_f1)

Decision Trees Classifier Accuracy: 0.890625
Decision Trees Classifier F1 Score: 0.631578947368421


Random Forest Classifier

In [25]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

In [26]:
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_f1 = f1_score(y_test, rf_pred)

In [27]:
print("Random Forest Classifier Accuracy:", rf_accuracy)
print("Random Forest Classifier F1 Score:", rf_f1)

Random Forest Classifier Accuracy: 0.903125
Random Forest Classifier F1 Score: 0.6075949367088608


Logistic regression from scratch

In [30]:
X_train_bias = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_bias = np.c_[np.ones((X_test.shape[0], 1)), X_test]

In [32]:
def initialize_weights(features):
    return np.zeros((features, 1))

In [33]:
initial_weights = initialize_weights(X_train_bias.shape[1])
learning_rate = 0.01
iterations = 1000

In [34]:
def predict(X, weights):
    z = np.dot(X, weights)
    y_pred = sigmoid(z)
    return (y_pred >= 0.5).astype(int)

In [36]:
def gradient_descent(X, y, weights, learning_rate, iterations):
    m = len(y)

    for _ in range(iterations):
        z = np.dot(X, weights)
        y_pred = sigmoid(z)

        error = y_pred - y
        gradient = np.dot(X.T, error) / m

        weights -= learning_rate * gradient

    return weights

In [38]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [39]:
weights = gradient_descent(X_train_bias, y_train.values.reshape(-1, 1), initial_weights, learning_rate, iterations)

In [40]:
logreg_scratch_pred = predict(X_test_bias, weights)

In [41]:
logreg_scratch_accuracy = accuracy_score(y_test, logreg_scratch_pred)
logreg_scratch_f1 = f1_score(y_test, logreg_scratch_pred)

In [42]:
print("Logistic Regression from Scratch Accuracy:", logreg_scratch_accuracy)
print("Logistic Regression from Scratch F1 Score:", logreg_scratch_f1)

Logistic Regression from Scratch Accuracy: 0.8625
Logistic Regression from Scratch F1 Score: 0.3125
