Importing Libraries

In [38]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

Arranging dataset

In [39]:
dataset = pd.read_csv('/content/winequality-red.csv')

# Now you can work with the data in your Jupyter Notebook
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [40]:
print(f"NaNs in original X: {np.isnan(X).sum()}")

NaNs in original X: 3


Handelling NaN values

In [41]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X = imputer.fit_transform(X)

In [42]:
print(f"NaNs in X after imputation: {np.isnan(X).sum()}")

NaNs in X after imputation: 0


In [43]:
print(X)

[[ 7.4    0.7    0.    ...  3.51   0.56   9.4  ]
 [ 7.8    0.88   0.    ...  3.2    0.68   9.8  ]
 [ 7.8    0.76   0.04  ...  3.26   0.65   9.8  ]
 ...
 [ 6.3    0.51   0.13  ...  3.42   0.75  11.   ]
 [ 5.9    0.645  0.12  ...  3.57   0.71  10.2  ]
 [ 6.     0.31   0.47  ...  3.39   0.66  11.   ]]


Training Data

In [44]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [45]:
print(f"NaNs in X_train after splitting: {np.isnan(X_train).sum()}")
print(f"NaNs in X_test after splitting: {np.isnan(X_test).sum()}")

NaNs in X_train after splitting: 0
NaNs in X_test after splitting: 0


In [46]:
nan_indices = np.argwhere(np.isnan(X_train))
print(f"Indices of NaNs in X_train: {nan_indices}")


Indices of NaNs in X_train: []


In [47]:
if np.isnan(X_train).sum() > 0:
    X_train = imputer.fit_transform(X_train)

In [48]:
print(f"NaNs in X_train after reapplying imputer: {np.isnan(X_train).sum()}")

NaNs in X_train after reapplying imputer: 0


In [49]:

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [50]:
print(f"NaNs in X_train: {np.isnan(X_train).sum()}")
print(f"NaNs in X_test: {np.isnan(X_test).sum()}")


NaNs in X_train: 0
NaNs in X_test: 0


In [51]:
print(X_train)
print(X_test)

[[ 0.21833164  0.88971201  0.19115195 ...  1.09349989  0.45822284
   1.12317723]
 [-1.29016623 -1.78878251  0.65216404 ... -0.40043872 -0.40119696
   1.40827174]
 [ 1.49475291 -0.78434707  1.010729   ... -0.07566946  0.51551749
  -0.58738978]
 ...
 [-0.65195559  0.49909822 -1.08943719 ...  1.28836145 -0.68767023
  -0.87248428]
 [-0.24582155 -1.84458448  0.39604621 ...  0.05423824  0.80199076
   1.40827174]
 [-1.46422367 -1.34236676 -0.06496588 ...  0.50891521 -0.68767023
   2.92877575]]
[[-3.61859850e-01  1.64286407e-01 -9.86990061e-01 ... -4.65392578e-01
  -1.34389336e-04 -7.77452782e-01]
 [-3.03840702e-01 -1.70525408e-01 -5.25977970e-01 ...  5.08915214e-01
  -1.03143815e+00 -8.72484283e-01]
 [ 1.37871461e+00  7.78108067e-01 -2.69860142e-01 ... -2.05577167e-01
   1.83329452e+00 -4.92358280e-01]
 ...
 [-1.37449586e-02  3.87494284e-01 -1.16189445e-01 ... -1.04997725e+00
  -7.44964886e-01 -5.87389780e-01]
 [ 2.76350785e-01 -1.45397070e+00  6.00940475e-01 ... -1.04997725e+00
   1.71749571

Support Vector Regression

In [61]:
from sklearn.svm import SVR

# Instantiate the model
model = SVR()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
svr_predictions = model.predict(X_test)
print(svr_predictions)


[5.43705847 5.09148742 5.59937275 5.32894194 5.7111594  5.15272993
 4.93848231 4.93836407 5.80724797 5.6637205  6.49443236 5.1035639
 5.66129372 5.11417861 5.38743009 6.53168545 5.2107995  5.46959857
 6.9920408  4.95569581 4.90011315 5.21717149 5.58281908 6.22562883
 5.40091092 5.72698533 6.08390283 5.21529947 4.95538502 6.08258326
 5.16141866 5.11152938 5.88867122 4.94250818 5.71965431 5.00520348
 6.3859316  5.80016687 5.65106855 6.1003401  5.72664412 5.21729328
 6.26635425 5.08342857 6.08929805 5.94617248 6.59472646 5.76041143
 5.10039627 5.57077941 4.88326651 5.047233   5.80296272 6.98417955
 4.95348197 5.11885306 6.16002544 5.49345018 5.87323623 5.09984194
 5.66125572 6.00159426 5.13910465 5.10356606 6.446385   5.13775323
 6.57026653 5.44096341 6.59166738 5.29517461 6.10962606 5.08851941
 5.83154041 5.75761212 6.18711978 5.10039167 6.53652853 5.37985851
 5.98831474 6.63726077 5.10004449 6.68848594 5.26265515 5.28943779
 6.1104571  6.29232957 5.09977253 6.05604122 6.57497721 4.99100

In [62]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(y_test, svr_predictions)
mse = mean_squared_error(y_test, svr_predictions)
r2 = r2_score(y_test, svr_predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")

Mean Absolute Error (MAE): 0.4535910872530087
Mean Squared Error (MSE): 0.35172233375700596
R-squared (R²): 0.46179161408990865


Gradient Boosting Regressor

In [63]:
from xgboost import XGBRegressor

# Instantiate the model
model = XGBRegressor(random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
xgb_predictions = model.predict(X_test)
print(xgb_predictions)


[5.295484  5.0994434 5.5624557 5.3151455 5.9985027 5.0127673 5.0162635
 4.995122  6.2172465 5.658531  6.954452  5.3691854 5.842692  5.1505237
 5.563595  6.538722  5.3507857 5.670526  6.8481426 4.998055  4.9689507
 6.00458   5.6407185 5.752792  5.6120696 5.9968295 6.034592  5.7824993
 5.023897  5.996535  5.2753577 5.236585  5.977275  5.5492954 5.9290347
 5.001721  6.4305506 6.0882745 5.6476603 6.020171  5.3746676 5.217167
 6.486559  5.327756  5.1806526 5.80129   5.9301696 5.5454106 4.9923506
 5.7804704 4.9920382 4.9521074 5.223649  7.0042048 5.001881  5.9725537
 6.060155  5.967267  5.192905  5.0178103 6.1649046 6.916137  5.4710116
 5.2959466 6.576915  5.269301  6.712698  5.593771  6.2935457 5.554867
 6.204447  5.1530533 5.678393  5.559818  5.9797525 5.0127606 6.502949
 4.972099  5.8699703 6.1077256 5.0040216 7.059235  5.016856  5.9608865
 6.3687296 6.1414933 5.018312  5.9533477 6.72358   5.594555  6.7443476
 5.5689483 4.991212  5.024968  5.3010545 5.627131  5.0498104 6.268465
 3.9123409

In [64]:
mae = mean_absolute_error(y_test, xgb_predictions)
mse = mean_squared_error(y_test, xgb_predictions)
r2 = r2_score(y_test, xgb_predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")

Mean Absolute Error (MAE): 0.4191360227763653
Mean Squared Error (MSE): 0.3603729762415421
R-squared (R²): 0.4485543303526067


Random Forest Regressor

In [65]:
from sklearn.ensemble import RandomForestRegressor

# Instantiate the model
model = RandomForestRegressor(random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
rfr_predictions = model.predict(X_test)
print(rfr_predictions)


[5.3  5.23 5.45 5.17 6.   5.06 5.13 4.93 6.16 5.89 6.75 5.29 5.72 5.32
 5.46 6.36 5.43 5.79 6.85 5.06 4.94 5.86 5.38 5.96 5.59 5.97 6.43 5.33
 5.32 5.98 5.31 5.46 5.9  5.57 5.86 5.14 6.26 5.98 5.38 6.05 5.2  5.13
 6.25 5.09 5.56 5.68 6.32 5.62 5.19 5.75 5.06 5.25 5.65 7.06 5.2  5.62
 5.95 5.85 5.62 5.02 5.66 6.11 5.5  5.33 6.69 5.36 6.69 5.62 6.72 5.49
 6.06 5.26 5.85 5.56 6.06 5.04 6.62 5.25 5.95 6.59 5.2  6.78 5.18 5.63
 5.81 6.46 5.12 5.97 6.5  5.41 6.34 5.58 5.02 5.26 5.25 5.42 5.11 5.93
 4.63 5.51 5.07 5.02 5.81 6.44 5.56 6.69 5.86 5.2  5.42 5.21 6.47 5.03
 6.4  5.01 5.22 6.18 5.32 5.26 5.11 5.86 6.14 5.75 5.77 5.32 5.73 5.28
 6.37 5.52 5.19 5.58 5.85 5.33 5.03 6.34 5.75 5.06 4.85 5.36 5.15 5.9
 6.61 6.24 6.51 5.41 5.52 5.14 5.56 5.6  5.63 5.11 5.85 6.23 5.38 5.16
 5.85 5.4  5.53 6.53 5.19 5.91 5.96 5.59 6.38 5.09 5.39 5.7  5.48 4.98
 4.77 5.18 5.07 4.94 6.5  5.36 6.53 5.9  6.17 5.08 5.4  5.01 4.5  5.88
 5.47 6.23 5.14 6.57 5.68 5.89 6.69 5.36 5.61 6.02 5.13 6.32 5.72 5.03
 4.57 5

In [66]:
mae = mean_absolute_error(y_test, rfr_predictions)
mse = mean_squared_error(y_test, rfr_predictions)
r2 = r2_score(y_test, rfr_predictions)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R-squared (R²): {r2}")

Mean Absolute Error (MAE): 0.42384374999999996
Mean Squared Error (MSE): 0.30341593749999995
R-squared (R²): 0.5357104559243264
