In [None]:
import numpy as np


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from xgboost import plot_importance

In [None]:
from sklearn.preprocessing import StandardScaler as ss

In [None]:
from sklearn.decomposition import PCA

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [None]:
from xgboost.sklearn import XGBClassifier

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import auc, roc_curve
from sklearn.metrics import confusion_matrix

In [None]:
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization

In [None]:
import eli5
from eli5.sklearn import PermutationImportance

In [None]:
import time
import os
import gc
import random
from scipy.stats import uniform

In [None]:
os.chdir("../input/")

In [None]:
data = pd.read_csv("winequalityN.csv")

In [None]:
data.head()


In [None]:
data.info()


In [None]:
data.shape

In [None]:
data.describe

In [None]:
data.isnull().values.any()


In [None]:
data.isnull().sum()


In [None]:
data.dropna(axis=0,inplace=True)

In [None]:
data.dropna(axis=0,inplace=True)

In [None]:
data.shape

In [None]:
sns.pairplot(data, diag_kind='scatter',hue='type')


In [None]:
X = data.iloc[ :, 1:14]

In [None]:
y=data.iloc[:,0]

In [None]:
y = y.map({'white':1, 'red' : 0})


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.35,
                                                    shuffle = True
                                                    )

In [None]:
pipe_list=[('scale',ss()),
           ('pca',PCA()),
		   ('xgb',XGBClassifier(n_jobs=2,
							    silent=False))
							 ]

In [None]:
pipe=Pipeline(pipe_list)

In [None]:
grid_parameters={'xgb__learning_rate':  [0.05, 0.07],
              'xgb__n_estimators':   [50,  100],
              'xgb__max_depth':      [3,5],
              'pca__n_components' : [5,7]
              }

In [None]:
grid_search=GridSearchCV(pipe,
						  grid_parameters,
						   cv=3,
						   n_jobs=3,
						   verbose=1,
						   scoring=['accuracy','roc_auc'],
						   refit='roc_auc')

In [None]:
start=time.time()
grid_search.fit(X_train,y_train)
stop=time.time()
(stop-start)/60

In [None]:
f"Best score: {grid_search.best_score_} "

In [None]:
f"Best parameter set {grid_search.best_params_}"

In [None]:
plt.bar(grid_search.best_params_.keys(), grid_search.best_params_.values(), color='r')
plt.xticks(rotation=45)

In [None]:
y_pred=grid_search.predict(X_test)
y_pred

In [None]:
accuracy=accuracy_score(y_test,y_pred)

In [None]:
f"Accuracy is{accuracy*100}%"

In [None]:
parameter_random = {'xgb__learning_rate':  uniform(0, 1),
                    'xgb__n_estimators':   range(50,100),
                    'xgb__max_depth':      range(3,5),
                    'pca__n_components' : range(5,7)}

In [None]:
random_search=RandomizedSearchCV(pipe,
                             param_distributions=parameter_random,
                             cv=3,
                             n_iter=27,
                             n_jobs=3,
                             verbose=1,
                             scoring=['accuracy','roc_auc'],
                             refit='roc_auc')

In [None]:
start=time.time()
random_search.fit(X_train,y_train)
stop=time.time()
(stop-start)/60

In [None]:
f"Best score: {random_search.best_score_} "

In [None]:
f"Best parameter set {random_search.best_params_}"

In [None]:
y_pred=random_search.predict(X_test)
y_pred

In [None]:
accuracy=accuracy_score(y_test,y_pred)

In [None]:
f"Accuracy is{accuracy*100}%"

In [None]:
plt.bar(random_search.best_params_.keys(), random_search.best_params_.values(), color='g')
plt.xticks(rotation=45)

In [None]:
parameter_bo={
           'learning_rate':  (0, 1),            
           'n_estimators':   (50,100),         
           'max_depth':      (3,5),            
           'n_components' :  (5,7)
            }