# Import Libraries

In [None]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

In [None]:
#import wine CSVs
red_csv = "Data/winequality-red.csv"
white_csv = "Data/winequality-white.csv"

In [None]:
red = pd.read_csv(red_csv)
white= pd.read_csv(white_csv)

red.head()

In [None]:
#assign data to X and y
red_X = red[["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol"]]
red_y = red["quality"].values.reshape(-1, 1)
print(red_X.shape, red_y.shape)

In [None]:
#train test split
from sklearn.model_selection import train_test_split

redX_train, redX_test, redy_train, redy_test = train_test_split(red_X, red_y, stratify=red_y, random_state=42)

In [None]:
#scale Data
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
redX_scaler = StandardScaler().fit(redX_train)

In [None]:
# Transform the training and testing data using the X_scaler and y_scaler models
redX_train_scaled = redX_scaler.transform(redX_train)
redX_test_scaled = redX_scaler.transform(redX_test)

In [None]:
number_list = np.array(redy_train)

(unique, counts) = np.unique(number_list, return_counts=True)
frequencies = np.asarray((unique, counts)).T

frequencies

In [None]:
number_list = np.array(redy_test)

(unique, counts) = np.unique(number_list, return_counts=True)
frequencies = np.asarray((unique, counts)).T

frequencies

# Red Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
red_classifier = LogisticRegression(max_iter = 10000)
red_classifier

In [None]:
red_classifier = red_classifier.fit(redX_train_scaled, redy_train.ravel())

In [None]:
print(f"Training Data Score: {red_classifier.score(redX_train_scaled, redy_train)}")
print(f"Testing Data Score: {red_classifier.score(redX_test_scaled, redy_test)}")

In [None]:
#make predictions
predictions = red_classifier.predict(redX_test_scaled)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {redy_test[:10].tolist()}")

In [None]:
red_df = pd.DataFrame({"Prediction": predictions, "Actual": redy_test.ravel()}).reset_index(drop=True)
red_df

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC

predictions = red_classifier.predict(redX_test_scaled)
cm = confusion_matrix(redy_test, predictions, labels=red_classifier.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=red_classifier.classes_)
disp.plot()

plt.savefig("Red Wine Logistic Regression Matrix"+'.jpg')

plt.show()

# Scale White Wine Data

In [None]:
#assign data to X and y

whiteX = white[["fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol"]]
whitey = white["quality"].values.reshape(-1, 1)
print(whiteX.shape, whitey.shape)

In [None]:
#train test split
from sklearn.model_selection import train_test_split

whiteX_train, whiteX_test, whitey_train, whitey_test = train_test_split(whiteX, whitey, stratify=whitey, random_state=42)

In [None]:
#scale Data
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data

whiteX_scaler = StandardScaler().fit(whiteX_train)

In [None]:
# Transform the training and testing data using the X_scaler and y_scaler models

whiteX_train_scaled = whiteX_scaler.transform(whiteX_train)
whiteX_test_scaled = whiteX_scaler.transform(whiteX_test)

In [None]:
number_list = np.array(whitey_train)

(unique, counts) = np.unique(number_list, return_counts=True)
frequencies = np.asarray((unique, counts)).T

frequencies

In [None]:
number_list = np.array(whitey_test)

(unique, counts) = np.unique(number_list, return_counts=True)
frequencies = np.asarray((unique, counts)).T

frequencies

# White Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
white_classifier = LogisticRegression(max_iter = 20000)
white_classifier

In [None]:
white_classifier = white_classifier.fit(whiteX_train_scaled, whitey_train.ravel())

In [None]:
print(f"Training Data Score: {white_classifier.score(whiteX_train_scaled, whitey_train)}")
print(f"Testing Data Score: {white_classifier.score(whiteX_test_scaled, whitey_test)}")

In [None]:
#make predictions
predictions = white_classifier.predict(whiteX_test_scaled)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {whitey_test[:10].tolist()}")

In [None]:
pd.DataFrame({"Prediction": predictions, "Actual": whitey_test.ravel()}).reset_index(drop=True)

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.svm import SVC

predictions = white_classifier.predict(whiteX_test_scaled)
cm = confusion_matrix(whitey_test, predictions, labels=white_classifier.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=white_classifier.classes_)
disp.plot()

plt.savefig("White Wine Logistic Regression Matrix"+'.jpg')

plt.show()