# <b> Notebook Basics Data Scientists Package Import

In [1]:
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from xgboost import XGBClassifier

## <b> List of classifiers to compare

In [None]:
classifiers = {
    "Logistic Regression Algotithm": LogisticRegression(max_iter = 10000),
    "Linear Discriminant Analysis": LinearDiscriminantAnalysis(),
    "Decision Tree \t\t": DecisionTreeClassifier(),
    "Random Forest \t\t": RandomForestClassifier(),
    "Gradient Boosting \t": GradientBoostingClassifier(),
    "XGBoost Classifier \t" : XGBClassifier(),
    "Support Vector Classification": SVC(),
    "Naive Bayes Classification": GaussianNB(),
    "K Nearest Neighbour Algorithm": KNeighborsClassifier(),
    "Stochastic Gradient Descent": SGDClassifier(),
}

## <b> List of regressors to compare

### compare_class_algo()

In [2]:
def compare_class_algo(classifiers,X_train, X_test, y_train, y_test, display=False, display_labels=None):
    '''
    Create clustering model using training dataframe, and predict on pca dataframe
    
    Parameters
    ----------
    classifiers : dictionnary
        Dictionnary of labels and corresponding classifiers of classification algorithm example: {'XGBoost':XGBClassifier(), 'Logistic Regression' : LogisticRegression()}
    display_labels : array 
        Target label for display example: ['Business','Infinite','Platinium Facelia']
    X_train, X_test, y_train, y_test : pd.DataFrame
        DataFrame for algorithm evaluation

    Returns
    -------
    names : array
        Array containing name of each algorithms
    accuracies : array
        Array containing accuracy of each algorithm
    f_scores : array
        Array containing f_score of each algorithm
    confusion_matrixes : array
        Array containing confusion matrix of each algorithm
    '''
    names = []
    accuracies = []
    f_scores = []
    confusion_matrixes = []
    if display==True:
        f, axes = plt.subplots(2, 5, figsize=(20, 5), sharey='row')
        for i, (key, classifier) in enumerate(classifiers.items()): 
            j = 0
            k = i
            if i>4:
                k = i-5
                j+=1
            y_pred = classifier.fit(X_train, y_train).predict(X_test)   
            accuracy = round(accuracy_score(y_test,y_pred),3)
            f_score = round(f1_score(y_test,y_pred,average='weighted'),3)
            print(key, "\t Accuracy:", accuracy,"\t F-score:", f_score, "\t Overall:", round((accuracy+f_score)/2,3))
            names.append(key)
            accuracies.append(accuracy)
            f_scores.append(f_score)
            cf_matrix = confusion_matrix(y_test, y_pred)
            disp = ConfusionMatrixDisplay(cf_matrix, display_labels=display_labels)
            disp.plot(ax=axes[j][k], xticks_rotation=45)
            disp.ax_.set_title(key)
            disp.im_.colorbar.remove()
            disp.ax_.set_xlabel('')
            if i!=0:
                disp.ax_.set_ylabel('')
        f.text(0.43, -0.1, 'Predicted label', ha='left')
        plt.subplots_adjust(wspace=0.40, hspace=1)
        f.colorbar(disp.im_, ax=axes)
        plt.show()
    else:
        for i, (key, classifier) in enumerate(classifiers.items()): 
            y_pred = classifier.fit(X_train, y_train).predict(X_test)   
            accuracy = round(accuracy_score(y_test,y_pred),3)
            f_score = round(f1_score(y_test,y_pred,average='weighted'),3)
            print(key, "\t Accuracy:", accuracy,"\t F-score:", f_score, "\t Overall:", round((accuracy+f_score)/2,3))
            names.append(key)
            accuracies.append(accuracy)
            f_scores.append(f_score)
            cf_matrix = confusion_matrix(y_test, y_pred)
            confusion_matrixes.append(cf_matrix)
    return names,accuracies,f_scores,confusion_matrixes