In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, mean_squared_error

In [4]:
# Load the datasets
red_wine = pd.read_csv("C:/Users/KIIT/Documents/jupyter notebook/winequality-red.csv", sep=';')
white_wine = pd.read_csv("C:/Users/KIIT/Documents/jupyter notebook/winequality-white.csv", sep=';')

# Combine datasets and add labels
red_wine["wine_type"] = "red"
white_wine["wine_type"] = "white"
wine_data = pd.concat([red_wine, white_wine])

# Separate features and target
X = wine_data.drop(["quality", "wine_type"], axis=1)
y = wine_data["quality"]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Helper function to discretize wine quality scores
def discretize_quality(y):
    return (y > 5).astype(int)  # High quality (1) if >5, else Low quality (0)

# Discretize the true target values
y_train_class = discretize_quality(y_train)
y_test_class = discretize_quality(y_test)

In [6]:
# Model training and accuracy calculation
results = {}

# Support Vector Regression (SVR)
svr = SVR()
svr.fit(X_train, y_train)
y_pred_svr = svr.predict(X_test)
svr_acc = accuracy_score(y_test_class, discretize_quality(y_pred_svr))
results["SVR"] = svr_acc
print(f"SVR Accuracy: {svr_acc:.2f}")


SVR Accuracy: 0.71


In [7]:
# Linear Regression (LR)
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
lr_acc = accuracy_score(y_test_class, discretize_quality(y_pred_lr))
results["LR"] = lr_acc
print(f"Linear Regression Accuracy: {lr_acc:.2f}")


Linear Regression Accuracy: 0.68


In [8]:
# K-Nearest Neighbors (KNN)
knn = KNeighborsRegressor()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
knn_acc = accuracy_score(y_test_class, discretize_quality(y_pred_knn))
results["KNN"] = knn_acc
print(f"KNN Accuracy: {knn_acc:.2f}")

KNN Accuracy: 0.72


In [9]:
# Random Forest (RF)
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
rf_acc = accuracy_score(y_test_class, discretize_quality(y_pred_rf))
results["RF"] = rf_acc
print(f"Random Forest Accuracy: {rf_acc:.2f}")

Random Forest Accuracy: 0.68


In [10]:
# Decision Tree (DT)
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
dt_acc = accuracy_score(y_test_class, discretize_quality(y_pred_dt))
results["DT"] = dt_acc
print(f"Decision Tree Accuracy: {dt_acc:.2f}")

Decision Tree Accuracy: 0.77


In [11]:
# Naive Bayes (NB)
nb = GaussianNB()
nb.fit(X_train, y_train_class)
y_pred_nb = nb.predict(X_test)
nb_acc = accuracy_score(y_test_class, y_pred_nb)
results["NB"] = nb_acc
print(f"Naive Bayes Accuracy: {nb_acc:.2f}")

Naive Bayes Accuracy: 0.69


In [12]:
gb = GradientBoostingRegressor(random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)
gb_acc = accuracy_score(y_test_class, discretize_quality(y_pred_gb))
results["GB"] = gb_acc
print(f"Gradient Boosting Accuracy: {gb_acc:.2f}")


Gradient Boosting Accuracy: 0.67


In [13]:
# Display final accuracy results
print("\nFinal Accuracy Results:")
for model, acc in results.items():
    print(f"{model}: {acc:.2f}")


Final Accuracy Results:
SVR: 0.71
LR: 0.68
KNN: 0.72
RF: 0.68
DT: 0.77
NB: 0.69
GB: 0.67


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load wine datasets
red_wine = pd.read_csv('C:/Users/KIIT/Documents/jupyter notebook/winequality-red.csv', sep=';')
white_wine = pd.read_csv('C:/Users/KIIT/Documents/jupyter notebook/winequality-white.csv', sep=';')

# Combine datasets
wine_data = pd.concat([red_wine, white_wine])
X = wine_data.drop("quality", axis=1)
y = wine_data["quality"]  # Target variable as continuous for regression

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to evaluate models
from sklearn.model_selection import KFold


def evaluate_model(model, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"{model_name} Results:")
    print(f"MSE: {mse:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"R^2 Score: {r2:.4f}\n")

    # Cross-validation for different folds
    for folds in [5, 10, 15, 20]:
        kfold = KFold(n_splits=folds, shuffle=True, random_state=42)
        scores = cross_val_score(model, X, y, scoring='r2', cv=kfold)
        print(f"Cross-validation ({folds}-fold) R^2 Score: {np.mean(scores):.4f}")
    print("-" * 50)

# Instantiate models
models = {
    "Support Vector Regression (SVR)": SVR(C=10, kernel='rbf'),
    "Linear Regression": LinearRegression(),
    "K-Nearest Neighbors (KNN)": KNeighborsRegressor(n_neighbors=5),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42),
    "Decision Tree Regressor": DecisionTreeRegressor(max_depth=8, random_state=42),
    "Gradient Boosting Regressor": GradientBoostingRegressor(n_estimators=150, learning_rate=0.1, max_depth=8, random_state=42)
}

# Evaluate all models
for model_name, model in models.items():
    evaluate_model(model, model_name)


Support Vector Regression (SVR) Results:
MSE: 0.4547
MAE: 0.5045
R^2 Score: 0.3844

Cross-validation (5-fold) R^2 Score: 0.2661
Cross-validation (10-fold) R^2 Score: 0.2678
Cross-validation (15-fold) R^2 Score: 0.2695
Cross-validation (20-fold) R^2 Score: 0.2698
--------------------------------------------------
Linear Regression Results:
MSE: 0.5467
MAE: 0.5659
R^2 Score: 0.2598

Cross-validation (5-fold) R^2 Score: 0.2882
Cross-validation (10-fold) R^2 Score: 0.2894
Cross-validation (15-fold) R^2 Score: 0.2894
Cross-validation (20-fold) R^2 Score: 0.2885
--------------------------------------------------
K-Nearest Neighbors (KNN) Results:
MSE: 0.4925
MAE: 0.5300
R^2 Score: 0.3331

Cross-validation (5-fold) R^2 Score: 0.1578
Cross-validation (10-fold) R^2 Score: 0.1608
Cross-validation (15-fold) R^2 Score: 0.1712
Cross-validation (20-fold) R^2 Score: 0.1717
--------------------------------------------------
Random Forest Regressor Results:
MSE: 0.4199
MAE: 0.4951
R^2 Score: 0.4314

Cr