In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warningsD
warnings.filterwarnings("ignore")

#import data 
diabetes_df=pd.read_csv("./data/diabetes.csv")
diabetes_df.head()

# Reaarange Outcome columns 
l1=[col  for col in diabetes_df.columns if col!="Outcome"]
l1.append("Outcome")
diabetes_df=diabetes_df[l1]
diabetes_df.head()

def data_clean(df=None):
        df["Exercise"]=df["Exercise"].replace({"No":1,"Evening":2,"Morning":3,"Both":4}).astype(int)
        df["Gender"]=df["Gender"].replace({"M":1,"F":0}).astype(int)
        df["CalorieIntake"].fillna(df["CalorieIntake"].median(),inplace=True)
        return df
    
    
diabetes_df_clean=data_clean(diabetes_df)

X=diabetes_df_clean.iloc[:,:-1]
y=diabetes_df_clean.iloc[:,-1]

class Ml_Model(object):

    def __init__(self, *arg, scaling=False, balance=False, Oversampling=False):
        import numpy as np
        self.X = arg[0]
        self.y = arg[1]
        self.sc=None
        self.score_dict=None
        
        self.scaling=scaling
   
        if balance:

            if not Oversampling:
                from imblearn.under_sampling import RandomUnderSampler
                b = RandomUnderSampler(sampling_strategy=1)
                self.X, self.y = b.fit_resample(self.X, self.y)

            else:
                from imblearn.over_sampling import RandomOverSampler
                b = RandomOverSampler(sampling_strategy=1)
                self.X, self.y = b.fit_resample(self.X, self.y)

        if not scaling:
            from sklearn.model_selection import train_test_split
            from sklearn.preprocessing import StandardScaler
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y)

        else:
            from sklearn.model_selection import train_test_split
            from sklearn.preprocessing import StandardScaler
            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
                self.X, self.y,random_state=34)

            self.sc = StandardScaler()
            self.X_train = self.sc.fit_transform(self.X_train)
            self.X_test = self.sc.transform(self.X_test)
            
            
    def train_test_split_obj(self):
        
        return (self.X_train,self.X_test,self.y_train,self.y_test)

    def pass_classifier(self, clf=None):
        self.clf = clf
        print(clf)

        return self.clf

    def fit_method_and_score(self):

        clf.fit(self.X_train, self.y_train)

        return clf.predict(self.X_test)

    def classification_report(self):
        self.y_pred = self.clf.predict(self.X_test)
        from sklearn.metrics import classification_report
        return print(classification_report(self.y_test, self.y_pred))

    def plot_mushroom_boundary(self, X, y, fitted_model):

        plt.figure(figsize=(9.8, 5), dpi=100)
        X = X
        y = y
        for i, plot_type in enumerate(['Decision Boundary', 'Decision Probabilities']):
            plt.subplot(1, 2, i+1)

            mesh_step_size = 0.01  # step size in the mesh
            x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1
            y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1
            xx, yy = np.meshgrid(np.arange(x_min, x_max, mesh_step_size), np.arange(
                y_min, y_max, mesh_step_size))
            if i == 0:
                Z = fitted_model.predict(np.c_[xx.ravel(), yy.ravel()])
            else:
                try:
                    Z = fitted_model.predict_proba(
                        np.c_[xx.ravel(), yy.ravel()])[:, 1]
                except:
                    plt.text(0.4, 0.5, 'Probabilities Unavailable', horizontalalignment='center',
                             verticalalignment='center', transform=plt.gca().transAxes, fontsize=12)
                    plt.axis('off')
                    break
                    
            Z = Z.reshape(xx.shape)
          
            plt.scatter(X[y.values == 0, 0], X[y.values == 0, 1],
                        alpha=0.4, label="0", s=5)
            plt.scatter(X[y.values == 1, 0], X[y.values == 1, 1],
                        alpha=0.4, label="1", s=5)
            plt.imshow(Z, interpolation='nearest', cmap='RdYlBu_r', alpha=0.15,
                       extent=(x_min, x_max, y_min, y_max), origin='lower')
            plt.title(plot_type + '\n' +
                      str(fitted_model).split('(')[0] + ' Test Accuracy: ' + str(np.round(fitted_model.score(X, y), 5)))
            plt.gca().set_aspect('equal')

        plt.tight_layout()
        plt.subplots_adjust(top=0.9, bottom=0.08, wspace=0.02)

    def validation(self):

        from sklearn.model_selection import StratifiedKFold
        from sklearn.metrics import f1_score
        from sklearn.metrics import recall_score
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import precision_score
        from colorama import Fore, Back, Style

        lst_accu_stratified = []
        lst_recall_stratified = []
        lst_precision_stratified = []

        lst_f1_score_stratified = []
        x = 0
        skf = StratifiedKFold(n_splits=18, shuffle=True, random_state=1)
      
        for train_index, test_index in skf.split(self.X, self.y):
            x_train_fold, x_test_fold = self.X.loc[train_index,
                                                   :], self.X.loc[test_index, :]
            y_train_fold, y_test_fold = self.y[train_index], self.y[test_index]

            if not self.scaling :
                         self.clf.fit(x_train_fold, y_train_fold)
                         y_pre = clf.predict(self.X_test)
            else:
               
                from sklearn.preprocessing import StandardScaler
                sc=StandardScaler()
                x_train_fold = sc.fit_transform(x_train_fold)
                x_test_fold = sc.transform(x_test_fold)
                clf.fit(x_train_fold, y_train_fold)
                y_pre = clf.predict(self.X_test)


#                 print(classification_report(self.y_test,y_pre))

            lst_accu_stratified.append(
                round(accuracy_score(self.y_test, y_pre), 4))
            lst_recall_stratified.append(
                round(recall_score(self.y_test, y_pre), 4))
            lst_f1_score_stratified.append(
                round(f1_score(self.y_test, y_pre), 4))
            lst_precision_stratified.append(
                round(precision_score(self.y_test, y_pre), 4))

            
        self.score_dict=dict(zip(["Accuracy", "Recall", "Precision", "F1-Score"],
                            [lst_accu_stratified, lst_recall_stratified, lst_precision_stratified, lst_f1_score_stratified]))
        
        for i, j in zip(["Accuracy", "Recall", "Precision", "F1-Score"], [lst_accu_stratified, lst_recall_stratified, lst_precision_stratified, lst_f1_score_stratified]):

            print(Fore.BLUE+f"\n\n{i} ")
            print(Style.RESET_ALL)

            print(f'List of possible {i} Score:\n', j)

            print(f'\nMaximum {i} Score That can be obtained from this model is:', max(
                j)*100, '%')
            print(f'\nMinimum {i} Score:', min(j)*100, '%')
            print(
                f'\nAverage {i} Score That can be obtained from this model is::', np.mean(j))
            print(
                f'\nMedian {i} Score That can be obtained from this model is::', np.median(j))
            print('\nStandard Deviation is:', np.std(j))
            
        return None
            
    def score_return(self,score="F1-Score"):
            
            try :
                 return (score,self.score_dict[score])

            except KeyError :
                    print()
                    print(Fore.RED+"KeyError : please follow  given list score format for gaining score list that created by validation function\n"+
                          ":[Accuracy,Recall,Precision,F1-Score]")


                    print()
                
       
            
            

    def visualization(self):
        from colorama import Fore, Back, Style
        print()
        print(Fore.BLUE+"\n Below Test Acurracy Based On PCA")
        from sklearn.model_selection import train_test_split
        from imblearn.under_sampling import RandomUnderSampler
        from sklearn.preprocessing import StandardScaler
        from sklearn.decomposition import PCA
        pca = PCA(n_components=2,)
        X_embedded = pca.fit_transform(self.X)
        sc = StandardScaler()

        y_pca = self.y
        X_pca = X_embedded
        X_pca = sc.fit_transform(X_pca)
        b = RandomUnderSampler(sampling_strategy=1)
        X_pca_balance, y_pca_balance = b.fit_resample(X_pca, y_pca)

        X_pca_balance_train, X_pca_balance_test, y_pca_balance_train, y_pca_balance_test = train_test_split(
            X_pca_balance, y_pca_balance)

        model = self.clf
        model.fit(X_pca_balance_train, y_pca_balance_train)
        model.score(X_pca_balance_test, y_pca_balance_test)
        self.plot_mushroom_boundary(X_pca_balance_test, y_pca_balance_test, model)


from sklearn.ensemble import RandomForestClassifier
m1=Ml_Model(X,y)
clf=RandomForestClassifier(n_estimators=10,max_leaf_nodes=5,max_depth=3,min_samples_split=20,min_samples_leaf=15)

m1.pass_classifier(clf)
m1.fit_method_and_score()
m1.classification_report()
m1.validation()
m1.visualization()


