In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")


In [None]:
#import data 
diabetes_df=pd.read_csv("../input/diabetes-dataset/diabetes.csv")
diabetes_df.head()



In [None]:
diabetes_df.info()

In [None]:
# Reaarange Outcome columns 
l1=[col  for col in diabetes_df.columns if col!="Outcome"]
l1.append("Outcome")
diabetes_df=diabetes_df[l1]
diabetes_df.head()

## Data Clean


In [None]:
def data_clean(df=None):
        df["Exercise"]=df["Exercise"].replace({"No":1,"Evening":2,"Morning":3,"Both":4}).astype(int)
        df["Gender"]=df["Gender"].replace({"M":1,"F":0}).astype(int)
        df["CalorieIntake"].fillna(df["CalorieIntake"].median(),inplace=True)
        return df
    
    
diabetes_df_clean=data_clean(diabetes_df)

In [None]:
diabetes_df_clean.head()

<b> seaborn Through visualization </b>

In [None]:
sns.pairplot(diabetes_df_clean,hue="Outcome")

## Visuliazation Of Data by LDA


In [None]:
#lda_df use for Without affecting clean dataframe
Lda_df=diabetes_df_clean.copy()

In [None]:
X=Lda_df.iloc[:,:-1]
y=Lda_df.iloc[:,-1]


In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
clf1=LinearDiscriminantAnalysis("eigen",n_components=1)

X_r1=clf1.fit(X,y).transform(X)
l2=np.ones(len(X_r1))  ## dummy variable use for X_train,X_test because they not allow 1-D array

lda_df=pd.DataFrame(data=np.column_stack([X_r1,l2,diabetes_df_clean["Outcome"]]),columns=["X_r1","l2","Outcome"])

In [None]:
lda_df

In [None]:
sns.displot(x=lda_df["X_r1"],hue=lda_df["Outcome"])

In [None]:
from sklearn.model_selection import train_test_split 
X_train,X_test,y_train,y_test=train_test_split(lda_df.iloc[:,:-1],lda_df["Outcome"],test_size=0.33)

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
clf=LinearDiscriminantAnalysis()
clf.fit(X_train,y_train)
clf.score(X_test,y_test)
y_pre=clf.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pre))

<b>Conclusion:</b> Here We can see LDA  transformation gives best Results  <strong>0.85 +/0.4recall</strong> is the best because Here  We don't Want to Consider Diabetes Patient as normal person
<b>Note:</b> We don't perform <b>scaling </b> and </b>balanced</b> dataset operation we will do ahead.

## <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">PCA</a>

In [None]:
from sklearn.decomposition import PCA
pca=PCA(n_components=2)
X_embedded=pca.fit_transform(X)

pca_df=pd.DataFrame(data=X_embedded,columns=["x1","x2"])
pca_df["Outcome"]=y

In [None]:
pca_df.head()

<p> Below plot capture Overall Data shape By PCA  </p> 

In [None]:
sns.scatterplot(data=pca_df,x='x1',y="x2",hue="Outcome")

## <a href="https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html"> T-SNE </a>

Another Unsupervised Machine learning Algorithms

In [None]:
from sklearn.manifold import TSNE
X_embedded = TSNE(n_components=2,perplexity=30.0,n_iter=5000).fit_transform(X)
X_embedded.shape

In [None]:
t_sne_df=pd.DataFrame(data=X_embedded,columns=["x1","x2"])
t_sne_df["Outcome"]=y

In [None]:
t_sne_df

In [None]:
sns.scatterplot(data=t_sne_df,x='x1',y="x2",hue='Outcome')

## Feature Selection

In [None]:
from matplotlib import pyplot
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
# fit the model
model.fit(X, y)
# get importance
importance = model.feature_importances_
# summarize feature importance
map_dict={}
for i,v in zip(X.columns,importance):
#          print('Feature: %0d, Score: %.5f' % (i,v))/abs
         map_dict[i]=v
    
    
# plot feature importance
pyplot.bar(X.columns, importance)
pyplot.xticks(rotation=90)
pyplot.show()

<b>Note</b>
- Glucose :- Glucose Level increase then Diabetes chance Increase
- CalorieIntake :- per day how many calorieIntake? if people take more calories than fat and sugar level increase                    so Diabetes chances Increase 
- Excercise :- In Every disease excercise will perform major role
- sleepDuration :- If someone take less sleep it's also increase chance of diabetes, our body needs good sleep 
- BMI :- Overweighted person have more probability with context of diabetes.

Other Factor Also play important role in Diabetes but we needs higher important feature so we would select above  5 Feature. 

In [None]:
map_dict_list=dict(sorted(map_dict.items(),key=lambda kv: kv[1],reverse=True))

In [None]:
map_dict_list

In [None]:
X=X[list(map_dict_list.keys())[:5]]
y=y

In [None]:
##

## General Class For ML Algorithms

### Below Class have ability to make our data scale,handle imbalance dataset,fitting,validation and visualization.

In [None]:
class Ml_Model(object):
    '''
        Here in this class we will implements methods like fitting,classification report,
        validation, visualization.
    '''

    def __init__(self, *arg, scaling=False, balance=False, Oversampling=False):
        
        ''' Here in this constructor we will perform the basic preprocessing steps
            like scaling and balance dataset.
          
        '''
        import numpy as np
        self.X = arg[0]
        self.y = arg[1]
       
        self.score_dict=None
        
        self.scaling=scaling
   
        if balance:

            if not Oversampling:
                from imblearn.under_sampling import RandomUnderSampler
                b = RandomUnderSampler(sampling_strategy=1)
                self.X, self.y = b.fit_resample(self.X, self.y)

            else:
                from imblearn.over_sampling import RandomOverSampler
                b = RandomOverSampler(sampling_strategy=1)
                self.X, self.y = b.fit_resample(self.X, self.y)

        if not scaling:
            from sklearn.model_selection import train_test_split
            from sklearn.preprocessing import StandardScaler
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y)

        else:
            from sklearn.model_selection import train_test_split
            from sklearn.preprocessing import StandardScaler
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y,random_state=34)

            self.sc = StandardScaler()
            self.X_train = self.sc.fit_transform(self.X_train)
            self.X_test = self.sc.transform(self.X_test)
            
            
    def train_test_split_obj(self):
        ''' this function useful for return scaling train,test data
        '''
        
        
        return (self.X_train,self.X_test,self.y_train,self.y_test)

    def pass_classifier(self, clf=None):
        self.clf = clf
        print(clf)

        return self.clf

    def fit_method_and_score(self):

        clf.fit(self.X_train, self.y_train)

        return clf.predict(self.X_test)

    def classification_report(self):
        self.y_pred = self.clf.predict(self.X_test)
        from sklearn.metrics import classification_report
        return print(classification_report(self.y_test, self.y_pred))

    def plot_boundary(self, X, y, fitted_model):
        '''helping function for visualization below method(decision boundry of algo)'''

        plt.figure(figsize=(9.8, 5), dpi=100)
        X = X
        y = y
        for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):
            plt.subplot(1, 2, i+1)

            mesh_step_size = 0.01  # step size in the mesh
            x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
            y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
            xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(
                y_min, y_max, mesh_step_size))
            if i == 0:
                Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])
            else:
                try:
                    Z = fitted_model.predict_proba(
                        np.c_[xx.ravel(), yy.ravel()])[:, 1]
                except:
                    plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',
                             verticalalignment='center', transform=plt.gca().transAxes, fontsize=12)
                    plt.axis('off')
                    break
            Z = Z.reshape(xx.shape)
            plt.scatter(X[y.values == 0, 0], X[y.values == 0, 1],
                        alpha=0.4, label="0", s=5)
            plt.scatter(X[y.values == 1, 0], X[y.values == 1, 1],
                        alpha=0.4, label="1", s=5)
            plt.imshow(Z, interpolation='nearest', cmap='RdYlBu_r', alpha=0.15,
                       extent=(x_min, x_max, y_min, y_max), origin='lower')
            plt.title(plot_type + '\n' +
                      str(fitted_model).split('(')[0] + ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))
            plt.gca().set_aspect('equal')

        plt.tight_layout()
        plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)

    def validation(self):
        '''this function is used for validate our model'''

        from sklearn.model_selection import StratifiedKFold
        from sklearn.metrics import f1_score
        from sklearn.metrics import recall_score
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from colorama import Fore, Back, Style

        lst_accu_stratified = []
        lst_recall_stratified = []
        lst_precision_stratified = []

        lst_f1_score_stratified = []
        x = 0
        skf = StratifiedKFold(n_splits=18, shuffle=True, random_state=1)
      
        for train_index, test_index in skf.split(self.X, self.y):
            x_train_fold, x_test_fold = self.X.loc[train_index,
                                                   :], self.X.loc[test_index, :]
            y_train_fold, y_test_fold = self.y[train_index], self.y[test_index]

            if not self.scaling :
                         self.clf.fit(x_train_fold, y_train_fold)
                         y_pre = clf.predict(self.X_test)
            else:
               
                from sklearn.preprocessing import StandardScaler
                sc=StandardScaler()
                x_train_fold = sc.fit_transform(x_train_fold)
                x_test_fold = sc.transform(x_test_fold)
                clf.fit(x_train_fold, y_train_fold)
                y_pre = clf.predict(self.X_test)


#                 print(classification_report(self.y_test,y_pre))

            lst_accu_stratified.append(
                round(accuracy_score(self.y_test, y_pre), 4))
            lst_recall_stratified.append(
                round(recall_score(self.y_test, y_pre), 4))
            lst_f1_score_stratified.append(
                round(f1_score(self.y_test, y_pre), 4))
            lst_precision_stratified.append(
                round(precision_score(self.y_test, y_pre), 4))

            
        self.score_dict=dict(zip(["Accuracy", "Recall", "Precision", "F1-Score"],
                            [lst_accu_stratified, lst_recall_stratified, lst_precision_stratified, lst_f1_score_stratified]))
        
        for i, j in zip(["Accuracy", "Recall", "Precision", "F1-Score"], [lst_accu_stratified, lst_recall_stratified, lst_precision_stratified, lst_f1_score_stratified]):

            print(Fore.BLUE+f"\n\n{i} ")
            print(Style.RESET_ALL)

            print(f'List of possible {i} Score:\n', j)

            print(f'\nMaximum {i} Score That can be obtained from this model is:', max(
                j)*100, '%')
            print(f'\nMinimum {i} Score:', min(j)*100, '%')
            print(
                f'\nAverage {i} Score That can be obtained from this model is::', np.mean(j))
            print(
                f'\nMedian {i} Score That can be obtained from this model is::', np.median(j))
            print('\nStandard Deviation is:', np.std(j))
            
        return None
            
    def score_return(self,score="F1-Score"):
            '''for returning the evalution score return'''
            
            try :
                 return (score,self.score_dict[score])

            except KeyError :
                    print()
                    print(Fore.RED+"KeyError : please follow  given list score format for gaining score list that created by validation function\n"+
                          ":[Accuracy,Recall,Precision,F1-Score]")


                    print()
                
       
            
            

    def visualization(self):
        '''this function helps us to visualize for how our alogrithms seperate our classes
           by decision boundry'''
        from colorama import Fore, Back, Style
        print()
        print(Fore.BLUE+"\n Below Test Acurracy Based On PCA")
        from sklearn.model_selection import train_test_split
        from imblearn.under_sampling import RandomUnderSampler
        from sklearn.preprocessing import StandardScaler
        from sklearn.decomposition import PCA
        pca = PCA(n_components=2,)
        X_embedded = pca.fit_transform(self.X)
        sc = StandardScaler()

        y_pca = self.y
        X_pca = X_embedded
        X_pca = sc.fit_transform(X_pca)
        b = RandomUnderSampler(sampling_strategy=1)
        X_pca_balance, y_pca_balance = b.fit_resample(X_pca, y_pca)

        X_pca_balance_train, X_pca_balance_test, y_pca_balance_train, y_pca_balance_test = train_test_split(
            X_pca_balance, y_pca_balance)

        model = self.clf
        model.fit(X_pca_balance_train, y_pca_balance_train)
        model.score(X_pca_balance_test, y_pca_balance_test)
        self.plot_boundary(X_pca_balance_test, y_pca_balance_test, model)

<b style="font-size:16px">Aim:</b><b style="font-size:16px; color:green"> Don't Forget Our Main is Aim to identify the diabetes person and it's represented by 1
so our main focus on       Recall </b>

## LDA 

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
m1=Ml_Model(X,y,scaling=True,balance=True,Oversampling=False)
clf=LinearDiscriminantAnalysis()
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()

m1.visualization()
m1.validation()

<b> Conclusion : </b> LDA is do pretty good job on recall Median=0.9 and f1-score 0.94.

**Nomination Start**

Nomination List: ["LDA"] 

**In The Race Now Winner is**: LDA


<b style="color:blue"> We Build Box Plot For Comaparision Our </b>

In [None]:
recall=m1.score_return(score="Recall")[1]
recall_list=[recall]
names=["LDA"]
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(recall_list)
ax.set_xticklabels(names)
plt.title("Recall distribution comparision")
plt.ylabel("Recall")
plt.xticks(rotation=75)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([0.88,np.max(recall_list)+0.020])

# plt.yticks(np.arange(0.90, np.max(recall_list)+0.02, 0.01))


plt.show()

## QDA

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
m1=Ml_Model(X,y,scaling=True,balance=True)
clf=QuadraticDiscriminantAnalysis()
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()

<b> Conclusion : </b> If LDA perform well then QDA Definetly perform well and QDA get simliar recall 

QDA is do pretty good job.
-Nomination List: ["QDA",LDA"] 





In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("QDA")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b> QDA is clearly won
<br>
**In The Race Now Winner is**: QDA

## SVM

In [None]:
from sklearn.svm import SVC
m1=Ml_Model(X,y,scaling=True,balance=True)
clf=SVC()
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()


    



<b> Nomination List</b>: ["QDA","SVM",LDA"] 





In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("SVM")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b> Svm doing great job on Recall(median) with standard deviation
    ,So Both QDA and SVM give us almost same results
<br>
**In The Race Now Winner is**: QDA is still winner



<h2> <a href="https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html"> Logistics Regression</a> </h2>

In [None]:
from sklearn.linear_model import LogisticRegression

m1=Ml_Model(X,y,scaling=True,balance=True)
clf=LogisticRegression(max_iter=1000,C=1)
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()


<b> Nomination List</b>: ["QDA","SVM",LDA","Lg"] 

In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("Lg")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b> Still QDA is  Winner
<br>
**In The Race Now Winner is**: QDA 

## MLP Classifier

In [None]:
from sklearn.neural_network import MLPClassifier

m1=Ml_Model(X,y,scaling=True,balance=True)
clf=MLPClassifier()
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()


<b> Nomination List</b>: ["QDA","SVM","LDA","Lg","MLP"] 

In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("MLP")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b> MLP is doing great job without good parameter so in competition we have two algo
(QDA,MLP) Note: neural network has capability of non-linear functionality
<br>
**In The Race Now Winner is**: ["QDA","MLP"] 

<h2> <a href="https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html">KNN Algorithms</a> </h2>

In [None]:
from sklearn.neighbors import KNeighborsClassifier
m1=Ml_Model(X,y,scaling=True,balance=True)
clf=KNeighborsClassifier(p=1.5,n_jobs=-1,n_neighbors=5,weights="distance") 
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()


<b> Nomination List</b>: ["QDA","SVM","LDA","Lg","MLP","KNN"] 

In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("KNN")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b>KNN Algorithms wins But KNN is Not Learning somethings from data so it's  highly unstable for prediction.
<br>
**In The Race Now Winner is**: ["QDA","MLP"] 

## Decision Tree 

### with prepruning

In [None]:
from sklearn.tree import DecisionTreeClassifier
m1=Ml_Model(X,y,scaling=True,balance=True)
clf=DecisionTreeClassifier(max_depth=4,max_features=2,min_samples_leaf=20,min_samples_split=8)
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()

### Extra Experiments By Post-Prunings

In [None]:
X_train_dc=m1.train_test_split_obj()[0]
X_test_dc=m1.train_test_split_obj()[1]
y_train_dc=m1.train_test_split_obj()[2]
y_test_dc=m1.train_test_split_obj()[3]


In [None]:
Dc=DecisionTreeClassifier()
Dc.fit(X_train_dc,y_train_dc)

In [None]:
from sklearn import tree
plt.figure(figsize=(10,15))
tree.plot_tree(Dc,filled=True)
plt.show()

In [None]:
path=Dc.cost_complexity_pruning_path(X_train_dc,y_train_dc)
ccp_alphas, impurities = path.ccp_alphas, path.impurities

In [None]:
ccp_alphas

In [None]:
clfs = []
for ccp_alpha in ccp_alphas:
    clf = DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha,)
    clf.fit(X_train, y_train)
    clfs.append(clf)
print("Number of nodes in the last tree is: {} with ccp_alpha: {}".format(
      clfs[-1].tree_.node_count, ccp_alphas[-1]))

In [None]:
train_scores = [clf.score(X_train, y_train) for clf in clfs]
test_scores = [clf.score(X_test, y_test) for clf in clfs]

fig, ax = plt.subplots()
ax.set_xlabel("alpha")
ax.set_ylabel("accuracy")
ax.set_title("Accuracy vs alpha for training and testing sets")
ax.plot(ccp_alphas, train_scores, marker='o', label="train",
        drawstyle="steps-post")
ax.plot(ccp_alphas, test_scores, marker='o', label="test",
        drawstyle="steps-post")
ax.legend()
plt.show()


<b> Nomination List</b>: ["QDA","SVM","LDA","Lg","MLP","KNN","Decision Tree"] 

In [None]:
m1=Ml_Model(X,y,balance=True)
clf=DecisionTreeClassifier(ccp_alpha=0.02)
m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()

In [None]:
from sklearn import tree
plt.figure(figsize=(10,15))
test=DecisionTreeClassifier(ccp_alpha=0.02)
test.fit(X_train_dc,y_train_dc)
tree.plot_tree(test,filled=True)
plt.show()

## Here is Decision Tree overcome their overfiting

In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("Decision Tree")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b> Decision Tree is have less variance so DT is good competeitor but Still Arbitrary Decision boundry win
<br>
**In The Race Now Winners Are**: ["QDA","MLP","DT"] 

## Random Forest

In [None]:

from sklearn.ensemble import RandomForestClassifier
m1=Ml_Model(X,y)
clf=RandomForestClassifier(n_estimators=10,max_leaf_nodes=5,max_depth=3,min_samples_split=20,min_samples_leaf=15)

m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()


<b> Nomination List</b>: ["QDA","SVM","LDA","Lg","MLP","KNN","Decision Tree","RF] 

In [None]:
recall=m1.score_return("Recall")[1]
recall_list.append(recall)

names.append("RandomF")

ax = fig.add_subplot(111)
plt.boxplot(recall_list,labels=names)
# ax.xticks(labels=names)
# plt.show()
# # plt.boxplot(recall_list)
# plt.ylim([np.min(recall_list),np.max(recall_list)+0.001])

# plt.yticks(np.arange(np.min(recall_list), np.max(recall_list), 0.0001))


plt.show()

<b> Conclusion : </b> Decision Tree is have less variance so DT is good competeitor but Still Arbitrary Decision boundry win
<br>
**In The Race Now Winners Are**: ["QDA","MLP","DT","RF"] 

<b> Final Winner </b>:  Because **Random forest** have good diversity for futuristics data we can also select the Decision tree but Random Forest have randomness and bootstrap sampling so that point makes random forest more generalized algo. 