# HISTORY

* **fixed acidity**: most acids involved with wine or fixed or nonvolatile (do not evaporate readily)
* **volatile acidity**: the amount of acetic acid in wine, which at too high of levels can lead to an unpleasant, vinegar taste
* **citric acid**: found in small quantities, citric acid can add 'freshness' and flavor to wines
* **residual sugar**: the amount of sugar remaining after fermentation stops, it's rare to find wines with less than 1 gram/liter
* **chlorides**: the amount of salt in the wine
* **free sulfur dioxide**: the free form of SO2 exists in equilibrium between molecular SO2 (as a dissolved gas) and bisulfite ion; it prevents
* **total sulfur dioxide**: amount of free and bound forms of S02; in low concentrations, SO2 is mostly undetectable in wine, but at free SO2
* **density**: the density of water is close to that of water depending on the percent alcohol and sugar content
* **pH**: describes how acidic or basic a wine is on a scale from 0 (very acidic) to 14 (very basic)
* **sulphates**: a wine additive which can contribute to sulfur dioxide gas (S02) levels, wich acts as an antimicrobial
* **alcohol**: the percent alcohol content of the wine
* **quality**: output variable (based on sensory data, score between 0 and 10)

# PROCESS

* #### PACKAGES AND LIBRARIES
* #### DATA
* #### DATA PROCESS
* #### EXPLORATORY DATA ANALYSIS
* #### CORRELATION - COVARIANCE - NORMALITY - HOMOGENEITY
* #### OUTLIER
* #### STANDARDIZATION FOR QUALITY
* #### VISUALIZATION
* #### ENCODE
* #### MODELING
* #### RANDOM FOREST PROCESS
* #### DESICION TREE PROCESS
* #### GRADIENT BOOST PROCESS

# PACKAGES AND LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from warnings import filterwarnings
from mpl_toolkits.mplot3d import Axes3D
import statsmodels.api as sm
import missingno as msno
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.neighbors import LocalOutlierFactor
from scipy.stats import levene
from scipy.stats import shapiro
from scipy.stats.stats import pearsonr
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.preprocessing import scale
from sklearn.model_selection import ShuffleSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
import xgboost as xgb
from xgboost import XGBRegressor, XGBClassifier
from lightgbm import LGBMRegressor, LGBMClassifier
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn import tree
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score, roc_curve
from yellowbrick.cluster import KElbowVisualizer
from sklearn.cluster import KMeans

#### Warnings

In [None]:
filterwarnings("ignore", category=DeprecationWarning) 
filterwarnings("ignore", category=FutureWarning) 
filterwarnings("ignore", category=UserWarning)

# DATA 

In [None]:
RedWine = pd.read_csv("../input/red-wine-quality-cortez-et-al-2009/winequality-red.csv")
# main

In [None]:
data = RedWine.copy()
# to protect main data

In [None]:
df = data.select_dtypes(include=["float64","int64","int32"])
# for mathematical analysis, to guarantee even though it is not necessary

# DATA PROCESS

#### Changing Columns' Name

In [None]:
data.rename(columns={"fixed acidity":"fixed_acidity",
            "volatile acidity":"volatile_acidity",
                    "citric acid":"citric_acid",
                    "residual sugar":"residual_sugar",
                    "free sulfur dioxide":"free_sulfur_dioxide",
                    "total sulfur dioxide":"total_sulfur_dioxide"})

#### Max Values

In [None]:
maxFreeAcidity = data[data["fixed_acidity"] == data["fixed_acidity"].max()]
maxVolatileAcidity = data[data["volatile_acidity"] == data["volatile_acidity"].max()]
maxCitricAcid = data[data["citric_acid"] == data["citric_acid"].max()]
maxResidualSugar = data[data["residual_sugar"] == data["residual_sugar"].max()]
maxFreeSulfurD = data[data["free_sulfur_dioxide"] == data["free_sulfur_dioxide"].max()]
maxTotalSulfurD = data[data["total_sulfur_dioxide"] == data["total_sulfur_dioxide"].max()]
maxChlorides = data[data["chlorides"] == data["chlorides"].max()]
maxDensity = data[data["density"] == data["density"].max()]
maxpH = data[data["pH"] == data["pH"].max()]
maxSulphates = data[data["sulphates"] == data["sulphates"].max()]
maxAlcohol = data[data["alcohol"] == data["alcohol"].max()]
maxQuality = data[data["quality"] == data["quality"].max()]

#### Min Values

In [None]:
minFreeAcidity = data[data["fixed_acidity"] == data["fixed_acidity"].min()]
minVolatileAcidity = data[data["volatile_acidity"] == data["volatile_acidity"].min()]
minCitricAcid = data[data["citric_acid"] == data["citric_acid"].min()]
minResidualSugar = data[data["residual_sugar"] == data["residual_sugar"].min()]
minFreeSulfurD = data[data["free_sulfur_dioxide"] == data["free_sulfur_dioxide"].min()]
minTotalSulfurD = data[data["total_sulfur_dioxide"] == data["total_sulfur_dioxide"].min()]
minChlorides = data[data["chlorides"] == data["chlorides"].min()]
minDensity = data[data["density"] == data["density"].min()]
minpH = data[data["pH"] == data["pH"].min()]
minSulphates = data[data["sulphates"] == data["sulphates"].min()]
minAlcohol = data[data["alcohol"] == data["alcohol"].min()]
minQuality = data[data["quality"] == data["quality"].min()]

#### Mean Values

In [None]:
meanFreeAcidity = data[data["fixed_acidity"] == data["fixed_acidity"].mean()]
meanVolatileAcidity = data[data["volatile_acidity"] == data["volatile_acidity"].mean()]
meanCitricAcid = data[data["citric_acid"] == data["citric_acid"].mean()]
meanResidualSugar = data[data["residual_sugar"] == data["residual_sugar"].mean()]
meanFreeSulfurD = data[data["free_sulfur_dioxide"] == data["free_sulfur_dioxide"].mean()]
meanTotalSulfurD = data[data["total_sulfur_dioxide"] == data["total_sulfur_dioxide"].mean()]
meanChlorides = data[data["chlorides"] == data["chlorides"].mean()]
meanDensity = data[data["density"] == data["density"].mean()]
meanpH = data[data["pH"] == data["pH"].mean()]
meanSulphates = data[data["sulphates"] == data["sulphates"].mean()]
meanAlcohol = data[data["alcohol"] == data["alcohol"].mean()]
meanQuality = data[data["quality"] == data["quality"].mean()]

# EXPLORATORY DATA ANALYSIS

In [None]:
print(data.shape)

In [None]:
print(data.columns)

In [None]:
print(data.info())

In [None]:
print(data.describe())

In [None]:
print(data.corr())

In [None]:
print(data.cov())

In [None]:
print("fixed_acidity\n")
print(data.groupby(["quality"])["fixed_acidity"].mean())

In [None]:
print("volatile_acidity\n")
print(data.groupby(["quality"])["volatile_acidity"].mean())

In [None]:
print("citric_acid\n")
print(data.groupby(["quality"])["citric_acid"].mean())

In [None]:
print("residual_sugar\n")
print(data.groupby(["quality"])["residual_sugar"].mean())

In [None]:
print("chlorides\n")
print(data.groupby(["quality"])["chlorides"].mean())

In [None]:
print("free_sulfur_dioxide\n")
print(data.groupby(["quality"])["free_sulfur_dioxide"].mean())

In [None]:
print("total_sulfur_dioxide\n")
print(data.groupby(["quality"])["total_sulfur_dioxide"].mean())

In [None]:
print("density\n")
print(data.groupby(["quality"])["density"].mean())

In [None]:
print("pH\n")
print(data.groupby(["quality"])["pH"].mean())

In [None]:
print("sulphates\n")
print(data.groupby(["quality"])["sulphates"].mean())

In [None]:
print("alcohol\n")
print(data.groupby(["quality"])["alcohol"].mean())

In [None]:
MaxList = [maxFreeAcidity,maxVolatileAcidity,maxCitricAcid,
              maxResidualSugar,maxFreeSulfurD,maxTotalSulfurD,
              maxChlorides,maxDensity,maxpH,maxSulphates,maxAlcohol]

MinList = [minFreeAcidity,minVolatileAcidity,minCitricAcid,
              minResidualSugar,minFreeSulfurD,minTotalSulfurD,
              minChlorides,minDensity,minpH,minSulphates,minAlcohol]

In [None]:
for a in MaxList:
    indexF = a.index
    print(indexF)
    print("---"*7)
    print("MAX: ",a["quality"])
    print("---\n"*5)

In [None]:
for i in MinList:
    indexIF = i.index
    print(indexIF)
    print("---"*7)
    print("MAX: ",i["quality"])
    print("---\n"*5)

In [None]:
print(data.where(data["alcohol"] <= data["alcohol"].mean()).value_counts().sum())

In [None]:
print(data.where(data["pH"] <= data["pH"].mean()).value_counts().sum())

In [None]:
print(data.where(data["total_sulfur_dioxide"] >= data["total_sulfur_dioxide"].mean()).value_counts().sum())

In [None]:
print(data.where(data["residual_sugar"] >= data["residual_sugar"].mean()).value_counts().sum())

In [None]:
print(data["quality"].value_counts())

In [None]:
print(data.isnull().all())

In [None]:
print(data.isnull().sum())

In [None]:
print(data.duplicated().sum())

In [None]:
data = data.drop_duplicates(keep="first")

In [None]:
print(data.duplicated().sum())

# CORRELATION - COVARIANCE - NORMALITY - HOMOGENEITY

#### Correlation

In [None]:
corrPearson = data.corr(method="pearson")
corrSpearman = data.corr(method="spearman")

In [None]:
figure = plt.figure(figsize=(15,8))
sns.heatmap(corrPearson,annot=True,vmin=-1,center=0,vmax=1,cmap="PiYG",linewidths=2, linecolor='green')
plt.title("PEARSON")
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.heatmap(corrSpearman,annot=True,vmin=-1,center=0,vmax=1,cmap="PiYG",linewidths=2, linecolor='green')
plt.title("SPEARMAN")
plt.show()

#### Covariance

In [None]:
covariance = data.cov()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.heatmap(covariance,annot=True,vmin=-1,center=0,vmax=1,linewidths=2)
plt.title("SPEARMAN")
plt.show()

#### Normality

In [None]:
for n in data.columns:
    print(n)
    print("---"*3)
    print("%.3f - %.3f" % shapiro(data[n]))
    print("---\n"*3)

#### Homogeneity

In [None]:
print("%.4f - %.4f" % levene(data["total_sulfur_dioxide"],data["density"],
                            data["sulphates"],data["pH"],
                            data["alcohol"],data["quality"],data["free_sulfur_dioxide"],data["chlorides"],
                            data["residual_sugar"],data["citric_acid"],data["volatile_acidity"],data["fixed_acidity"]))

# OUTLIER

In [None]:
DataForA = data.copy()
# to protect data

In [None]:
clf = LocalOutlierFactor()
clf.fit_predict(DataForA)

In [None]:
score = clf.negative_outlier_factor_

In [None]:
scoresorted = np.sort(score)

In [None]:
print(scoresorted[0:50])
# checking outlier, look where the biggest jump took place
# we can identify 2.index as point

In [None]:
point = scoresorted[2]
print(point)
print("---"*10)
print(DataForA[score == point])

In [None]:
outlier = score < point
normalvalues = score > point

In [None]:
print(data[outlier])

In [None]:
print(data.loc[1079])

In [None]:
print(data.loc[1081])

In [None]:
data = data.drop(index=1081)

In [None]:
data = data.drop(index=1079)

In [None]:
print(DataForA[normalvalues].value_counts().sum())

# STANDARDIZATION FOR QUALITY

In [None]:
def qualityfunc(x):
    if x>6.5:
        return "GOOD"
    else:
        return "BAD"

In [None]:
data["quality"]= data["quality"].apply(lambda x: qualityfunc(x))

In [None]:
print(data["quality"].value_counts())

# VISUALIZATION

In [None]:
dataVis = data.copy() # to protect data
dataVis["quality"] = pd.Categorical(dataVis["quality"])

In [None]:
data.hist(figsize=(20,20))
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.pairplot(dataVis,hue="quality")
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.scatterplot(x="total_sulfur_dioxide",y="free_sulfur_dioxide",hue="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.scatterplot(x="pH",y="alcohol",hue="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.scatterplot(x="volatile_acidity",y="fixed_acidity",hue="quality",data=dataVis)
plt.show()


In [None]:
figure = plt.figure(figsize=(15,8))
sns.scatterplot(x="volatile_acidity",y="pH",data=dataVis, hue="quality",size="quality",legend=False,sizes=(10, 700))
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.barplot(x="pH",y="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.barplot(x="total_sulfur_dioxide",y="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.barplot(x="alcohol",y="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.barplot(x="fixed_acidity",y="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.barplot(x="chlorides",y="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.barplot(x="residual_sugar",y="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.lineplot(x="volatile_acidity",y="fixed_acidity",hue="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.lineplot(x="alcohol",y="residual_sugar",hue="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.lineplot(x="citric_acid",y="sulphates",hue="quality",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.boxplot(x="quality",y="sulphates",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.boxplot(x="quality",y="citric_acid",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.boxplot(x="quality",y="density",data=dataVis)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.histplot(
    dataVis,
    x="residual_sugar", hue="quality",
    multiple="stack",
    edgecolor=".3",
    linewidth=.5,
    log_scale=True,
)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.histplot(
    dataVis,
    x="density", hue="quality",
    multiple="stack",
    edgecolor=".3",
    linewidth=.5,
    log_scale=True,
)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.histplot(
    dataVis,
    x="sulphates", hue="quality",
    multiple="stack",
    edgecolor=".3",
    linewidth=.5,
    log_scale=True,
)
plt.show()

In [None]:
figure = plt.figure(figsize=(15,8))
sns.histplot(
    dataVis,
    x="pH", hue="quality",
    multiple="stack",
    edgecolor=".3",
    linewidth=.5,
    log_scale=True,
)
plt.show()

In [None]:
figure = plt.figure(figsize=(20,8))
sns.distplot(dataVis[dataVis['quality'] == "BAD"]["citric_acid"], color='black',label='BAD') 
sns.distplot(dataVis[dataVis['quality'] == "GOOD"]["citric_acid"], color='red',label='GOOD')

plt.title('quality', fontsize=10)
plt.legend()

In [None]:
figure = plt.figure(figsize=(20,8))
sns.distplot(dataVis[dataVis['quality'] == "BAD"]["total_sulfur_dioxide"], color='black',label='BAD') 
sns.distplot(dataVis[dataVis['quality'] == "GOOD"]["total_sulfur_dioxide"], color='red',label='GOOD')
plt.title('quality', fontsize=10)
plt.legend()

In [None]:
g = sns.JointGrid(data=data, x="sulphates", y="pH", hue="quality",space=0)
g.plot_joint(sns.kdeplot,cmap="rocket")
plt.show()

In [None]:
g = sns.JointGrid(data=data, x="pH", y="total_sulfur_dioxide", hue="quality", space=0)
g.plot_joint(sns.kdeplot,cmap="rocket")
plt.show()

In [None]:
data['quality'].value_counts().plot.pie(autopct='%1.1f%%',shadow=True,figsize=(15,8))
plt.show()

In [None]:
fig = plt.figure(figsize=(20,10))
ax = Axes3D(fig)
ax.scatter(data["sulphates"],
           data["citric_acid"], data["alcohol"], c="red", s=80, alpha=0.3)
plt.show()

# ENCODE

In [None]:
encode = LabelEncoder()

In [None]:
print(data["quality"].value_counts())
print("---"*20)
data["quality"] = encode.fit_transform(data["quality"])
print(data["quality"].value_counts())

# MODELING

#### X & Y 

In [None]:
x = data.drop("quality",axis=1)
y = data["quality"]

xTrain,xTest,yTrain,yTest = train_test_split(x,y,test_size=0.2,random_state=42)

#### Classifiers

In [None]:
lj = LogisticRegression(solver="liblinear").fit(xTrain,yTrain)
gnb = GaussianNB().fit(xTrain,yTrain)
knnc = KNeighborsClassifier().fit(xTrain,yTrain)
cartc = DecisionTreeClassifier(random_state=42).fit(xTrain,yTrain)
rfc = RandomForestClassifier(random_state=42,verbose=False).fit(xTrain,yTrain)
gbmc = GradientBoostingClassifier(verbose=False).fit(xTrain,yTrain)
xgbc = XGBClassifier().fit(xTrain,yTrain)
lgbmc = LGBMClassifier().fit(xTrain,yTrain)
catbc = CatBoostClassifier(verbose=False).fit(xTrain,yTrain)

In [None]:
modelsc = [lj,gnb,knnc,cartc,rfc,gbmc,xgbc,lgbmc,catbc]

#### Comparison

In [None]:
for model in modelsc:
    name = model.__class__.__name__
    predict = model.predict(xTest)
    R2CV = cross_val_score(model,xTest,yTest,cv=10,verbose=False).mean()
    error = -cross_val_score(model,xTest,yTest,cv=10,scoring="neg_mean_squared_error",verbose=False).mean()
    print(name + ": ")
    print("-" * 10)
    print("ACC-->",accuracy_score(yTest,predict))
    print("R2CV-->",R2CV)
    print("MEAN SQUARED ERROR-->",np.sqrt(error))
    print("-" * 30)

#### ACCURACY Visualization

In [None]:
r = pd.DataFrame(columns=["MODELS","R2CV"])
for model in modelsc:
    name = model.__class__.__name__
    R2CV = cross_val_score(model,xTest,yTest,cv=10,verbose=False).mean()
    result = pd.DataFrame([[name,R2CV*100]],columns=["MODELS","R2CV"])
    r = r.append(result)
    
figure = plt.figure(figsize=(20,8))   
sns.barplot(x="R2CV",y="MODELS",data=r,color="k")
plt.xlabel("R2CV")
plt.ylabel("MODELS")
plt.xlim(0,100)
plt.title("MODEL ACCURACY COMPARISON")
plt.show()

#### ERROR Visualization

In [None]:
r = pd.DataFrame(columns=["MODELS","error"])
for model in modelsc:
    name = model.__class__.__name__
    error = -cross_val_score(model,xTest,yTest,cv=10,scoring="neg_mean_squared_error").mean()
    result = pd.DataFrame([[name,np.sqrt(error)]],columns=["MODELS","error"])
    r = r.append(result)
    
figure = plt.figure(figsize=(20,8))   
sns.barplot(x="error",y="MODELS",data=r,color="r")
plt.xlabel("ERROR")
plt.ylabel("MODELS")
plt.xlim(0,2)
plt.title("MODEL ERROR COMPARISON")
plt.show()

#### CNN - ADDITIONAL

In [None]:
scaler = StandardScaler().fit(xTrain,yTrain)
xRTrain = scaler.transform(xTrain)
xRTest = scaler.transform(xTest)

In [None]:
mlpc = MLPClassifier().fit(xRTrain,yTrain)
predict = mlpc.predict(xRTest)

R2CV = cross_val_score(mlpc,xRTest,yTest,cv=10).mean()
print(R2CV)
error = -cross_val_score(mlpc,xRTest,yTest,cv=10,scoring="neg_mean_squared_error").mean()
print(np.sqrt(error))

# RANDOM FOREST PROCESS

#### Tuning

In [None]:
params = {"max_depth": [2, 5, 8, 10],
          "max_features": [2, 5, 8],
          "n_estimators": [10, 500, 1000],
          "min_samples_split": [2, 5, 10]}

In [None]:
cv = GridSearchCV(rfc,params,cv=10,verbose=False,n_jobs=-1).fit(xTrain,yTrain)
print(cv.best_params_)
print(cv.best_score_)

#### Importance

In [None]:
Importance = pd.DataFrame({"Importance": rfc.feature_importances_ * 100},
                          index=xTrain.columns)
Importance.sort_values(by="Importance",
                       axis=0,
                       ascending=True).plot(kind="barh", color="r")
plt.show()

# DESICION TREE PROCESS

#### Tuning

In [None]:
cartparams = {"min_samples_split": range(2, 100),
              "max_leaf_nodes": range(2, 10)}

In [None]:
cartmodelcv = GridSearchCV(cartc, cartparams, cv=10).fit(xTrain, yTrain)
print(cartmodelcv.best_params_)
print(cartmodelcv.best_score_)

#### Confusion Matrix Visualization

In [None]:
predictcart = cartc.predict(xTest)

In [None]:
figure = plt.figure(figsize=(15,8))
sns.heatmap(confusion_matrix(yTest,predictcart), annot=True,cmap="PiYG",linewidths=2, linecolor='black')
plt.show()

#### Tree Visualization

In [None]:
plt.figure(figsize=(20,40))
tree.plot_tree(cartc)

# GRADIENT BOOST PROCESS

#### Tuning

In [None]:
paramgbs = {"learning_rate": [0.001, 0.01, 0.1, 0.2],
          "max_depth": [3, 5, 8, 50, 100],"n_estimators": [200, 500, 1000, 2000],"subsample": [1, 0.5, 0.75]}

In [None]:
gbmcvmodel = GridSearchCV(gbmc, paramgbs, cv=10, verbose=False).fit(xTrain, yTrain)
print(gbmcvmodel.best_params_)
print(gbmcvmodel.best_score_)

# process report --> "learning_rate": 0.001, "max_depth": 8,"n_estimators": 500,"subsample": 0.5
# best score --> 0.8990

#### Predict Probably

In [None]:
predictprob = gbmc.predict_proba(xTest)
print(predictprob)