In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB 
import scikitplot as skplt
from sklearn.preprocessing import StandardScaler

# <font color = 'green'> Importing data </font>

In [None]:
#importing data
covid = pd.read_csv('../input/covid19-patient-precondition-dataset/covid.csv')
covid.head()

In [None]:
covid.info()

In [None]:
covid.isna().sum().sum()

In [None]:
covid.describe().T

# <font color = 'green'> Treating missing values </font>

In [None]:
covid = covid.replace(99, np.NaN)
covid = covid.replace(98, np.NaN)
covid = covid.replace(97, np.NaN)

In [None]:
covid.describe().T

In [None]:
covid.isnull().sum()

In [None]:
sns.heatmap(covid.isna(), cmap = 'viridis', cbar = False, yticklabels = False)
plt.text(11, 0, "missing data with yellow", ha = "center", va= "bottom")

In [None]:
round((covid['intubed'].value_counts() / covid.shape[0] )*100, 2)

## <font color = 'red'> we can see that the missing is about 80 % of the columns so we will drop it </font>

In [None]:
round((covid['pregnancy'].value_counts() / covid.shape[0] )*100, 2)

## <font color = 'red'> we can see that the missing is about 51 % of the columns so we will drop it </font>

In [None]:
round((covid['contact_other_covid'].value_counts() / covid.shape[0] )*100, 2)

## <font color = 'blue'> we can see that the missing is about 32 % of the columns but it's a very important feature so we will fill it </font>

In [None]:
covid['contact_other_covid'].fillna(method = 'ffill', inplace  = True)

In [None]:
round((covid['icu'].value_counts() / covid.shape[0] )*100, 2)

## <font color = 'red'> we can see that the missing is about 80 % of the columns so we will drop it </font>

In [None]:
covid.drop(['icu', 'id', 'intubed', 'pregnancy'], axis = 1, inplace = True)

In [None]:
covid

In [None]:
covid.isnull().sum()

In [None]:
round((covid['pneumonia'].value_counts() / covid.shape[0] )*100, 2)

In [None]:
round((covid['diabetes'].value_counts() / covid.shape[0] )*100, 2)

In [None]:
round((covid['copd'].value_counts() / covid.shape[0] )*100, 2)

## <font color = 'blue'> then the proportions of rest missing data is too small compared to number of raws so we will drop these raws </font>

In [None]:
covid.dropna(inplace = True)

In [None]:
covid.isna().sum()

In [None]:
covid.info()

In [None]:
covid

In [None]:
sns.heatmap(covid.isna(), cmap = 'viridis', cbar = False, yticklabels = False)
plt.text(8, 0, "NO such missing data", ha = "center", va= "bottom")

# <font color = 'green'>Treating outliers </font>

In [None]:
covid.describe().T

## <font color = 'blue'> we can see that there is no outliers </font>

# <font color = 'green'>Label encoding</font>

In [None]:
covid.columns

In [None]:
covid_coded = covid.copy(deep = True)
covid_coded = covid_coded.loc[(covid_coded['covid_res'] == 1) | (covid_coded['covid_res'] == 2)]

In [None]:
les = {}
for i in (['entry_date', 'date_symptoms', 'date_died']) :
    les[i] = LabelEncoder()
    covid_coded[i] = les[i].fit_transform(covid_coded[i]) 


In [None]:
covid_coded

In [None]:
covid_coded.drop('date_died', axis = 1, inplace = True)

In [None]:
covid_coded.head()

In [None]:
plt.figure(figsize = (20, 15))
sns.heatmap(covid_coded.corr(), cbar = True, cmap = 'viridis', annot = True)
plt.title("coorelation between our features")

In [None]:
print("                    Good relations")
for i in range(len(covid_coded.columns)) :
    for j in range(i) :
        if (abs(covid_coded.corr().iloc[i,j]) > 0.3 and covid_coded.corr().iloc[i,j] != 1 ) :
            print("(" + covid_coded.corr().columns[i] + ") and (" + covid_coded.corr().columns[j] + ")", end = "")
            print("         with coorelation : ", covid_coded.corr().iloc[i,j])
        

# <font color = 'green'> covid death only and its Label encoding </font>

In [None]:
covid_death_not_coded = covid.copy(deep = True)

In [None]:
covid_death_not_coded = covid_death_not_coded.loc[(covid_death_not_coded['covid_res'] == 1)]

covid_death_not_coded

In [None]:
covid_death_not_coded.loc[covid_death_not_coded['date_died'] != '9999-99-99' , 'date_died'] = 'yes die'

covid_death_not_coded['date_died'] = covid_death_not_coded['date_died'].replace('9999-99-99', 'no die')


In [None]:
covid_death_not_coded

In [None]:
covid_death_not_coded['Died or not'] = covid_death_not_coded['date_died']
covid_death_not_coded.drop('date_died', axis= 1, inplace = True)

covid_death_not_coded

In [None]:
covid_death = covid_death_not_coded.copy(deep = True)

In [None]:
les = {}
for i in (['entry_date', 'date_symptoms', 'Died or not']) :
    les[i] = LabelEncoder()
    covid_death[i] = les[i].fit_transform(covid_death[i]) 


In [None]:
covid_death

In [None]:
plt.figure(figsize = (20, 15))
sns.heatmap(covid_death.corr(), cbar = True, cmap = 'viridis', annot = True)
plt.title("coorelation between our features")

In [None]:
print("                    Good relations")
for i in range(len(covid_death.columns)) :
    for j in range(i) :
        if (abs(covid_death.corr().iloc[i,j]) > 0.3 and covid_death.corr().iloc[i,j] != 1 ) :
            print("(" + covid_death.corr().columns[i] + ") and (" + covid_death.corr().columns[j] + ")", end = "")
            print("         with coorelation : ", covid_death.corr().iloc[i,j])
        

# <font color = 'green'> Standardization </font>

In [None]:
covid_coded.columns

In [None]:
covid_death.columns

In [None]:
for i in (['entry_date', 'date_symptoms', 'age']) :
    covid_coded[i] = (covid_coded[i] - covid_coded[i].min()) / (covid_coded[i].max() - covid_coded[i].min())
    
for i in (['entry_date', 'date_symptoms', 'age']) :
    covid_death[i] = (covid_death[i] - covid_death[i].min()) / (covid_death[i].max() - covid_death[i].min())

In [None]:
#scale = StandardScaler()
#scaled = StandardScaler()

#covid_coded = pd.DataFrame(scale.fit_transform(covid_coded[['entry_date', 'date_symptoms', 'age']]), columns = ['sex', 'patient_type', 'entry_date', 'date_symptoms', 'pneumonia','age', 'diabetes', 'copd', 'asthma', 'inmsupr', 'hypertension','other_disease', 'cardiovascular', 'obesity', 'renal_chronic','tobacco', 'contact_other_covid', 'covid_res'])
#covid_death = pd.DataFrame(scaled.fit_transform(covid_death[['entry_date', 'date_symptoms', 'age']]), columns = ['sex', 'patient_type', 'entry_date', 'date_symptoms', 'date_died','pneumonia', 'age', 'diabetes', 'copd', 'asthma', 'inmsupr','hypertension', 'other_disease', 'cardiovascular', 'obesity','renal_chronic', 'tobacco', 'contact_other_covid', 'covid_res'])

In [None]:
covid_coded

In [None]:
covid_death

In [None]:
covid_coded.describe().T

In [None]:
covid_death.describe().T

# <font color = 'green'> Data visualization </font>

## Covid test result 

In [None]:
explode = (0.1, 0.0, 0.2)
fig, ax = plt.subplots(figsize =(10, 7))
round(covid['covid_res'].value_counts() / covid.shape[0] * 100 , 2).plot.pie(colors = ['green', 'orange', 'blue'] , autopct = "%1.2f%%", figsize = (12,8),labels = ['positive test', 'negative test', 'waiting test result'] ,
                                                                                        explode  = explode, shadow = True, startangle = 90, wedgeprops = { 'linewidth' : 1, 'edgecolor' : "brown" }, textprops = dict(color ="black")
                                                                                       , legend = True)
ax.set_title("covid test results")
#ax.legend(covid, ['positive test', 'negative test', 'waiting test result'], title ="covid test results",loc ="center left",bbox_to_anchor =(1, 0, 0.5, 1))
#plt.setp(['positive test', 'negative test', 'waiting test result'], size = 8, weight ="bold")

In [None]:
f = plt.figure(figsize=(12,4))
sns.boxplot(x='age', y='covid_res', data=covid_coded, orient = 'h' )

### So we conclude that older people are more affected to test positive for corona virus

In [None]:
plt.figure(figsize=(8,6))
plot = sns.countplot(x='contact_other_covid',hue='covid_res',data=covid, palette = 'icefire')
#sns.color_palette("magma", as_cmap=True)
plot.legend_.set_title('covid result')
plt.show()

### we can conclude that contacting persons with covid increases the percentage of testing positive so anyone tests positive should stay in quarantine

In [None]:
features=[ 'pneumonia','diabetes', 'copd', 'asthma', 'inmsupr', 'hypertension',
       'other_disease', 'cardiovascular', 'obesity', 'renal_chronic',
       'tobacco']
list(enumerate(features))


In [None]:
plt.figure(figsize=(15,15))
for i in enumerate(features):
    plt.subplot(6,3,i[0]+1)
    sns.countplot(i[1],hue='covid_res',data=covid_coded,palette='viridis')
    plt.xticks(rotation=45,size=8)
    plt.tight_layout() 

### So we can notice a few things from the previous graphs which are :  
### -  People who suffer from pneumonia usually tests positive for covid 19
### - Diabetic people are at high risk of testing positive for covid 19 
### - A good percentage of hypertension people tests positive for covid 19 
### - some tobacco users tests positive for covid 19  
### - people who suffer from obesity are at risk of testing positive for covid 19

In [None]:
female = covid_coded[covid_coded['sex'] == 1]
male   = covid_coded[covid_coded['sex'] == 2]
female_positive = female[female['covid_res'] == 1]
female_negative = female[female['covid_res'] == 2]
male_positive = male[male['covid_res'] == 1]
male_negative = male[male['covid_res'] == 2]

In [None]:
plt.figure(figsize = (20,20))
plt.subplot(3,2,1)
sns.distplot(female['age'], kde = True, label = 'female', color = 'red',hist=False)
sns.distplot(male['age'], kde = True, label = '  male', color = 'yellow',hist=False)
plt.legend()

plt.subplot(3,2,2)
sns.distplot(covid_coded[covid_coded['covid_res']==1]['age'], kde = True, label = 'positive covid', color = 'blue',hist=False)
sns.distplot(covid_coded[covid_coded['covid_res']==2]['age'], kde = True, label = 'negative covid', color = 'orange',hist=False)
plt.legend()

plt.subplot(3,2,3)
sns.kdeplot(female_positive['age'], label = 'female: positive covid', color = 'red')
sns.kdeplot(female_negative['age'], label = 'female: negative covid', color = 'green')
plt.legend()
plt.subplot(3,2,4)
sns.kdeplot(male_positive['age'], label = 'male: positive covid', color = 'red')
sns.kdeplot(male_negative['age'], label = 'male: negative covid', color = 'green')
plt.legend()

plt.subplot(3,2,5)
sns.kdeplot(female_positive['age'], label = 'positive covid: female', color = 'orange')
sns.kdeplot(male_positive['age'],   label = 'positive covid: male', color = 'blue')
plt.legend()

plt.subplot(3,2,6)
sns.kdeplot(female_negative['age'], label = 'negative covid: female', color = 'orange')
sns.kdeplot(male_negative['age'], label   = 'negative covid: male', color = 'blue')
plt.legend()

plt.tight_layout()

### We can notice that gender doesn't have a great effect on the test result 

## Covid death

In [None]:
sns.catplot('age',data=covid_death_not_coded,hue='Died or not',kind='count',palette='bright',aspect=3,height=9)


## We can see that deaths increase as the age increase

In [None]:
sns.countplot(x='sex',hue='Died or not',data=covid_death,palette='bright')

In [None]:
explode = (0.1, 0.0)
fig, ax = plt.subplots(figsize =(10, 7))
round(covid_death_not_coded['Died or not'].value_counts() / covid.shape[0] * 100 , 2).plot.pie(colors = ['blue', 'red'] , autopct = "%1.2f%%", figsize = (12,8),labels = ['Didnot died','Died'] ,
                                                                                        explode  = explode, shadow = True, startangle = 90, wedgeprops = { 'linewidth' : 1, 'edgecolor' : "brown" }, textprops = dict(color ="black")
                                                                                       , legend = True)
ax.set_title("Death percentage")
#ax.legend(covid, ['positive test', 'negative test', 'waiting test result'], title ="covid test results",loc ="center left",bbox_to_anchor =(1, 0, 0.5, 1))
#plt.setp(['positive test', 'negative test', 'waiting test result'], size = 8, weight ="bold")

## We can see that more males have died from covid 19 then females

In [None]:
plt.scatter(covid_death['date_symptoms'],covid_death['age'])

In [None]:
sns.lineplot('Died or not', 'patient_type', data=covid_death)

In [None]:
plt.figure(figsize=(15,15))
for i in enumerate(features):
    plt.subplot(6,3,i[0]+1)
    sns.countplot(i[1],hue='Died or not',data=covid_death,palette='icefire')
    plt.xticks(rotation=45,size=8)
    plt.tight_layout() 

In [None]:
round(covid_death['Died or not'].value_counts() / covid_death.shape[0] * 100 , 2).plot.pie(colors = ['blue', 'red'] , autopct = "%1.2f%%", figsize = (8,6),labels = ['Alive', 'Dead']  )

# <font color = 'green' >Droping Gender from data sets</font>

In [None]:
covid_coded.drop('sex', axis =1, inplace = True)
covid_death.drop('sex', axis =1, inplace = True)

# <font color = 'green'> Choosing important features for modeling (PCA) method (Princible component analysis) </font>

In [None]:
from sklearn.decomposition import PCA

In [None]:
covid_res = covid_coded['covid_res']

In [None]:
covid_coded = covid_coded.drop('covid_res', axis = 1)
covid_coded

In [None]:
pca = PCA(n_components = 10)
covid_coded = pd.DataFrame(pca.fit_transform(covid_coded))

In [None]:
covid_coded['covid_res'] = covid_res
covid_coded

In [None]:
print(pca.explained_variance_ratio_)

In [None]:
print(pca.components_)

In [None]:
date_died = covid_death['Died or not']

In [None]:
covid_death.drop("Died or not", axis = 1, inplace = True)
covid_death

In [None]:
pcad = PCA(n_components = 10)
covid_death = pd.DataFrame(pcad.fit_transform(covid_death))

In [None]:
covid_death['Died or not'] = date_died

In [None]:
covid_death

In [None]:
print(pcad.explained_variance_ratio_)

In [None]:
print(pcad.components_)

In [None]:
covid_coded

In [None]:
covid_coded['covid_res'].value_counts()

In [None]:
covid_death['Died or not'].value_counts()

In [None]:
covid_coded.dropna(inplace = True)
covid_death.dropna(inplace = True)

In [None]:
covid_death

In [None]:
covid_death['Died or not'].value_counts() / covid_death.shape[0] *100

# <font color = 'green'>Applying models</font>

In [None]:
x_train, x_test, y_train, y_test = train_test_split(covid_coded.drop('covid_res', axis = 1).values, covid_coded['covid_res'].values, test_size = 0.25, random_state = 12)
x_traind, x_testd, y_traind, y_testd = train_test_split(covid_death.drop('Died or not', axis = 1).values, covid_death['Died or not'].values, test_size = 0.25, random_state = 12)

# <font color = 'green'>Logistic Regression</font>

In [None]:
accs = []
accsd = []

In [None]:
log = LogisticRegression()
logd = LogisticRegression()

log.fit(x_train, y_train)
logd.fit(x_traind, y_traind)

In [None]:
test_res = log.predict(x_test)
test_resd = logd.predict(x_testd)

print("     our confusion matrix for covid")
metrics.confusion_matrix (y_test, test_res)

In [None]:
print("     our confusion matrix for covid_death")
metrics.confusion_matrix (y_testd, test_resd)

In [None]:
skplt.metrics.plot_confusion_matrix(y_test, test_res, cmap = "Greens")
skplt.metrics.plot_confusion_matrix(y_testd, test_resd, cmap = "OrRd")

In [None]:
print("Accuracy score for logistic regression is : ", metrics.accuracy_score(y_test, test_res))
accs.append(metrics.accuracy_score(y_test, test_res))
print("Recall score for logistic regression is : ", metrics.recall_score(y_test, test_res))
print("Precision score for logistic regression is : ", metrics.precision_score(y_test, test_res))

print("____________________________________________________________________")

print("Accuracy score for logistic regression is : ", metrics.accuracy_score(y_testd, test_resd))
accsd.append(metrics.accuracy_score(y_testd, test_resd))
print("Recall score for logistic regression is : ", metrics.recall_score(y_testd, test_resd))
print("Precision score for logistic regression is : ", metrics.precision_score(y_testd, test_resd))

In [None]:
print(log.score(x_train, y_train))
print(log.score(x_test, y_test))
print("Error for logistic regression equals : "+ str(round(log.score(x_test, y_test), 4)) + " - " + str(round(log.score(x_train, y_train), 4)) + " = " + str(abs(round(log.score(x_test, y_test) - log.score(x_train, y_train), 4))))

print("____________________________________________________________________\n")

print(logd.score(x_traind, y_traind))
print(logd.score(x_testd, y_testd))
print("Error for logistic regression equals : "+ str(round(logd.score(x_testd, y_testd), 4)) + " - " + str(round(logd.score(x_traind, y_traind), 4)) + " = " + str(abs(round(logd.score(x_testd, y_testd) - logd.score(x_traind, y_traind), 4))))

In [None]:
importance = log.coef_[0]

for i,v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i,v))
    
plt.bar([x for x in range(len(importance))], importance)
plt.show()

In [None]:
importance = logd.coef_[0]

for i,v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i,v))
    
plt.bar([x for x in range(len(importance))], importance)
plt.show()

# <font color = 'green'> Applying Decesion tree , XGboost , Adaboost, naive bias on covid and covid_death </font>

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier

In [None]:
classifiers = [DecisionTreeClassifier (max_depth = 7, max_features = 4), 
               RandomForestClassifier(n_estimators = 10, max_depth = 6, max_features = 4), 
               XGBClassifier(objective = 'binary:logistic', n_estimators = 10, random_state = 42, learning_rate = 0.5, max_depth = 5),
               AdaBoostClassifier(n_estimators = 50 , learning_rate = 0.4),
               GaussianNB() 
        ]

In [None]:
for i in classifiers :
    classifier = i
    classifier.fit(x_train, y_train)
    res = classifier.predict(x_test)
    print("Accuracy score for " + str(i) + " is : ", metrics.accuracy_score(y_test, res))
    accs.append(metrics.accuracy_score(y_test, res))
    print(str(i) + "score is : ", classifier.score(x_train, y_train))
    print(str(i) + "score is : ", classifier.score(x_test, y_test))
    skplt.metrics.plot_confusion_matrix(y_test, res, cmap = "Greens")
    plt.show()
    if str(classifier) != 'GaussianNB()' :
        importance = classifier.feature_importances_

        for i,v in enumerate(importance):
            print('Feature: %0d, Score: %.5f' % (i,v))

        plt.bar([x for x in range(len(importance))], importance)
        plt.show()
    print("\n____________________________________________________________\n")

In [None]:
# results for covid death
for i in classifiers :
    classifier = i
    classifier.fit(x_traind, y_traind)
    resd = classifier.predict(x_testd)
    print("Accuracy score for " + str(i) + " is : ", metrics.accuracy_score(y_testd, resd))
    accsd.append(metrics.accuracy_score(y_testd, resd))
    print(str(i) + "score for train is : ", classifier.score(x_traind, y_traind))
    print(str(i) + "score for test is : ", classifier.score(x_testd, y_testd))
    skplt.metrics.plot_confusion_matrix(y_testd, resd, cmap = "OrRd")
    plt.show()
    if str(classifier) != 'GaussianNB()' :
        importance = classifier.feature_importances_

        for i,v in enumerate(importance):
            print('Feature: %0d, Score: %.5f' % (i,v))

        plt.bar([x for x in range(len(importance))], importance)
        plt.show()
    print("\n____________________________________________________________\n")

# <font color= 'green'> Applying Voting classifier on covid and covid death </font>

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
ada = AdaBoostClassifier(n_estimators = 10 , learning_rate = 0.4)
rnf = RandomForestClassifier(n_estimators = 10, max_depth = 6, max_features = 10)
XGb = XGBClassifier(objective = 'binary:logistic', n_estimators = 10, random_state = 42, learning_rate = 0.5, max_depth = 5)

vot = VotingClassifier(estimators = [('ada', ada), ('XGb',XGb), ('rnf', rnf)], voting = 'hard')

In [None]:
vot.fit(x_train, y_train)
pred_test6 = vot.predict(x_test)
skplt.metrics.plot_confusion_matrix(y_test, pred_test6, cmap = "Greens")

In [None]:
print("Accuracy score for  Voting Classifier is : ", metrics.accuracy_score(y_test, pred_test6))
accs.append(metrics.accuracy_score(y_test, pred_test6))
print("Voting Classifier score is : ", vot.score(x_train, y_train))
print("Voting Classifier score is : ", vot.score(x_test, y_test))

In [None]:
votd = VotingClassifier(estimators = [('ada', ada), ('XGb',XGb), ('rnf', rnf)], voting = 'hard')

In [None]:
votd.fit(x_traind, y_traind)
pred_testd6 = votd.predict(x_testd)
skplt.metrics.plot_confusion_matrix(y_testd, pred_testd6, cmap = "OrRd")

In [None]:
print("Accuracy score for  Voting Classifier is : ", metrics.accuracy_score(y_testd, pred_testd6))
accsd.append(metrics.accuracy_score(y_testd, pred_testd6))
print("Voting Classifier score is : ", votd.score(x_traind, y_traind))
print("Voting Classifier score is : ", votd.score(x_testd, y_testd))

# <font color = 'blue'>We can see that XGboost is best for covid and covid death </font>

In [None]:
algorithm_names = ['Logistic Regression', 'Decision tree Classifier', 'Random Forest Classifier', 
                   'XGboost','Adaboost', 'GaussianNB', 'Voting Classifier']


In [None]:
algorithm_names

In [None]:
accs = np.array(accs)
algorithm_names = np.array(algorithm_names)

plt.figure(figsize = (10,6))
sns.barplot(y = accs*100, x = algorithm_names, estimator = np.median,  palette = 'Greens')
plt.xlabel('Algorithm Name')
plt.ylabel('Accuracies')
plt.xticks(rotation = 45)

In [None]:
accsd = np.array(accsd)

plt.figure(figsize = (10,6))
sns.barplot(x = algorithm_names, y = accsd*100,estimator = np.median,  palette = 'Reds')
plt.xlabel('Algorithm Name')
plt.ylabel('Accuracies')
plt.xticks(rotation = 45)

In [None]:
accs

# <font color= 'green'> Applying pipe line on XGboost </font>

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
pipe = Pipeline ([
    ('reducer' , PCA()),
    ('classifier' , XGBClassifier(objective = 'binary:logistic', n_estimators = 10, random_state = 42, learning_rate = 0.5, max_depth = 5)
)]
)

In [None]:
pipe.fit(x_train, y_train)

In [None]:
resp = pipe.predict(x_test)

In [None]:
skplt.metrics.plot_confusion_matrix(y_test, resp)

In [None]:
pipe.steps[0][1].explained_variance_ratio_.cumsum()

In [None]:
print("Accuracy score for  pipe is : ", metrics.accuracy_score(y_test, resp))
print(pipe.score(x_train, y_train))
print(pipe.score(x_test, y_test))

# <font color= 'green'> Applying Grid search on XGboost </font>

In [None]:
#from sklearn.model_selection import GridSearchCV

In [None]:
#param_grid = {
 #   'max_depth' : [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
  #  'n_estimators' : [50, 60, 70, 80, 90, 100, 110]
   # }

In [None]:
#XGB2 = XGBClassifier(objective = 'binary:logistic', n_estimators = 10, random_state = 42, learning_rate = 0.5, max_depth = 5)


In [None]:
#grid = GridSearchCV(estimator = XGB2, param_grid = param_grid, n_jobs = -1, verbose = 1, cv =3)
#grid_result = grid.fit(x_train, y_train)

In [None]:
#print("best : " + str(grid_result.best_score_) + " using : " + str(grid_result.best_params_))

In [None]:
#model = grid_result.best_estimator_
#model

In [None]:
#model_pred = model.predict(x_test)
#print("The accuracy : ", metrics.accuracy_score(y_test, model_pred))
#print(model.score(x_train, y_train))
#print(model.score(x_test, y_test))

# <font color = 'red'> End of project </font>

# <font color = 'blue'> Thank you for your time :) </font>