In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the red wine dataset
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')

# Prepare the data
X_red = red_wine.drop(columns=['quality'])
y_red = red_wine['quality']

# Split the data into training and test sets
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_red, test_size=0.2, random_state=42)

# Standardize the data
scaler_red = StandardScaler()
X_train_red = scaler_red.fit_transform(X_train_red)
X_test_red = scaler_red.transform(X_test_red)

# Train SVM for red wine
svm_red = SVC(kernel='linear', random_state=42)
svm_red.fit(X_train_red, y_train_red)

# Predict and evaluate for red wine
y_pred_red = svm_red.predict(X_test_red)
accuracy_red = accuracy_score(y_test_red, y_pred_red)
print(f"Red Wine SVM Model Accuracy: {accuracy_red}")
print(classification_report(y_test_red, y_pred_red))
print(confusion_matrix(y_test_red, y_pred_red))


Red Wine SVM Model Accuracy: 0.559375
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        10
           5       0.60      0.77      0.68       130
           6       0.51      0.60      0.55       132
           7       0.00      0.00      0.00        42
           8       0.00      0.00      0.00         5

    accuracy                           0.56       320
   macro avg       0.19      0.23      0.20       320
weighted avg       0.46      0.56      0.50       320

[[  0   0   1   0   0   0]
 [  0   0   9   1   0   0]
 [  0   0 100  30   0   0]
 [  0   0  53  79   0   0]
 [  0   0   3  39   0   0]
 [  0   0   0   5   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
# Load the white wine dataset
white_wine = pd.read_csv('winequality-white.csv', delimiter=';')

# Prepare the data
X_white = white_wine.drop(columns=['quality'])
y_white = white_wine['quality']

# Split the data into training and test sets
X_train_white, X_test_white, y_train_white, y_test_white = train_test_split(X_white, y_white, test_size=0.2, random_state=42)

# Standardize the data
scaler_white = StandardScaler()
X_train_white = scaler_white.fit_transform(X_train_white)
X_test_white = scaler_white.transform(X_test_white)

# Train SVM for white wine
svm_white = SVC(kernel='linear', random_state=42)
svm_white.fit(X_train_white, y_train_white)

# Predict and evaluate for white wine
y_pred_white = svm_white.predict(X_test_white)
accuracy_white = accuracy_score(y_test_white, y_pred_white)
print(f"White Wine SVM Model Accuracy: {accuracy_white}")
print(classification_report(y_test_white, y_pred_white))
print(confusion_matrix(y_test_white, y_pred_white))


White Wine SVM Model Accuracy: 0.5091836734693878
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         5
           4       0.00      0.00      0.00        25
           5       0.56      0.53      0.55       291
           6       0.49      0.80      0.61       432
           7       0.00      0.00      0.00       192
           8       0.00      0.00      0.00        35

    accuracy                           0.51       980
   macro avg       0.18      0.22      0.19       980
weighted avg       0.38      0.51      0.43       980

[[  0   0   2   3   0   0]
 [  0   0  14  11   0   0]
 [  0   0 155 136   0   0]
 [  0   0  88 344   0   0]
 [  0   0  15 177   0   0]
 [  0   0   1  34   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
# Load the combined dataset
combined_wine = pd.read_csv('combined_winequality.csv')

# Separate the datasets
red_wine = combined_wine[combined_wine['type'] == 1].drop(columns=['type'])
white_wine = combined_wine[combined_wine['type'] == 0].drop(columns=['type'])

# Balance the combined dataset
min_count = min(len(red_wine), len(white_wine))
balanced_red = red_wine.sample(n=min_count, random_state=42)
balanced_white = white_wine.sample(n=min_count, random_state=42)
balanced_combined = pd.concat([balanced_red, balanced_white])

# Prepare the data for the combined model
X_combined = balanced_combined.drop(columns=['quality'])
y_combined = balanced_combined['quality']

# Split the data into training and test sets
X_train_combined, X_test_combined, y_train_combined, y_test_combined = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)

# Standardize the data
scaler_combined = StandardScaler()
X_train_combined = scaler_combined.fit_transform(X_train_combined)
X_test_combined = scaler_combined.transform(X_test_combined)

# Train SVM for combined data
svm_combined = SVC(kernel='linear', random_state=42)
svm_combined.fit(X_train_combined, y_train_combined)

# Predict and evaluate for combined data
y_pred_combined = svm_combined.predict(X_test_combined)
accuracy_combined = accuracy_score(y_test_combined, y_pred_combined)
print(f"Combined Wine SVM Model Accuracy: {accuracy_combined}")
print(classification_report(y_test_combined, y_pred_combined))
print(confusion_matrix(y_test_combined, y_pred_combined))


Combined Wine SVM Model Accuracy: 0.55
              precision    recall  f1-score   support

           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00        15
           5       0.62      0.62      0.62       250
           6       0.50      0.72      0.59       273
           7       0.00      0.00      0.00        84
           8       0.00      0.00      0.00        17

    accuracy                           0.55       640
   macro avg       0.19      0.22      0.20       640
weighted avg       0.46      0.55      0.50       640

[[  0   0   1   0   0   0]
 [  0   0  10   5   0   0]
 [  0   0 155  95   0   0]
 [  0   0  76 197   0   0]
 [  0   0   6  78   0   0]
 [  0   0   1  16   0   0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
