In [61]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_wine
from sklearn.preprocessing import MinMaxScaler,PolynomialFeatures
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
#install catboost
!pip install catboost
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression , Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score, mean_squared_error
import warnings
warnings.filterwarnings('ignore')



In [2]:
data = load_wine().data
df = pd.DataFrame(data, columns=load_wine().feature_names)

In [3]:
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [4]:
#Divide to X,y
X = df.drop('alcohol' , axis = 1)
y = df['alcohol']

In [5]:
print(X.shape)
print(y.shape)

(178, 12)
(178,)


In [7]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    shuffle=True,
                                                    test_size=0.2,
                                                    random_state=42)

In [8]:
#Scale  Features
mms = MinMaxScaler()
X_train = mms.fit_transform(X_train)
X_test = mms.transform(X_test)

1. Linear regression

In [13]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(np.sqrt(mean_squared_error(y_test, y_pred)))

0.6889364317843825
0.18571608706852405
0.43094789368150305


2. Ridge Regression

In [14]:
RR = Ridge(alpha=0.01)
RR.fit(X_train, y_train)
y_pred = RR.predict(X_test)
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.6894425170284211
0.1854139360587317


In [52]:
# alpha Tunnug via RandomizedSearchCV
from scipy.stats import randint,uniform
# define the parameter space
param_dist = {'alpha': randint(0, 10)}

# create the model
ridge = Ridge()

# create the randomized search object
random_search = RandomizedSearchCV(ridge, param_distributions=param_dist, cv=5, n_iter=10)

# fit the randomized search object to the data
random_search.fit(X_train, y_train)

# print the best hyperparameters
print(random_search.best_params_)

# get the best model
best_model = random_search.best_estimator_

# predict on the test set
y_pred = best_model.predict(X_test)

# evaluate the model
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))


{'alpha': 1}
0.7043371117831323
0.17652132972690004


3. Elastic Net

In [16]:
#ElasticNet
EN = ElasticNet(alpha=0.01)
EN.fit(X_train, y_train)
y_pred = EN.predict(X_test)
print(r2_score(y_test, y_pred))

0.6995392097553832


In [51]:
# alpha Tunnug via RandomizedSearchCV
elasticnet = ElasticNet()
param_dist = {'alpha': randint(0, 10)}
random_search = RandomizedSearchCV(elasticnet, param_distributions=param_dist, cv=5, n_iter=10)
random_search.fit(X_train, y_train)
print(random_search.best_params_)
best_model = random_search.best_estimator_

# predict on the test set
y_pred = best_model.predict(X_test)

# evaluate the model
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))


{'alpha': 0}
0.6889364317843822
0.18571608706852422


4. Polynomial Regression

In [65]:
#Polynomial Regression
poly=PolynomialFeatures(degree=2)
X_train_poly=poly.fit_transform(X_train)
X_test_poly=poly.transform(X_test)
LR=LinearRegression()
LR.fit(X_train_poly,y_train)
y_pred = LR.predict(X_test_poly)
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

-0.5174015328326627
0.9059430418226457


5. KNN Regressor

In [66]:
#KNN
n_neighbors = np.arange(1,10)
for n in n_neighbors:
  knnr = KNeighborsRegressor(n_neighbors=n)
  knnr.fit(X_train, y_train)
  y_pred = knnr.predict(X_test)
  print(f"n_neighbors {n} : {r2_score(y_test, y_pred)}")

n_neighbors 1 : 0.38123476268561374
n_neighbors 2 : 0.6603777096313268
n_neighbors 3 : 0.7447590875911653
n_neighbors 4 : 0.7554396501543632
n_neighbors 5 : 0.7535277161449876
n_neighbors 6 : 0.7470692402176181
n_neighbors 7 : 0.7673021154185298
n_neighbors 8 : 0.7560851277995818
n_neighbors 9 : 0.7187769320357478


6.Gradient Boosting Regression

In [67]:
# Gradient Boosting Regression
from sklearn.ensemble import GradientBoostingRegressor
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)
y_pred = gbr.predict(X_test)
print(r2_score(y_test, y_pred))

0.6733721876180208


7. Catboost

In [69]:
ct = CatBoostRegressor( verbose=0)
ct.fit(X_train, y_train)
y_pred = ct.predict(X_test)
print(r2_score(y_test, y_pred))

0.7140891256004653


The best Model that I have found is KNN with** N=7**

**r2 score = 0.7673021154185298**