In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def getRows(ds):
    data=pd.read_csv(ds)
    data
    dataset=pd.get_dummies(data,dtype=int,drop_first=True)
    independent=dataset[[  'Age','EstimatedSalary','Gender_Male']]
    dependent=dataset[["Purchased"]]
    # dataset=dataset.drop("User ID",axis=1)
    shape=independent.shape
    return independent,dependent,shape

In [3]:
def getSVMLinearClassificaiton(ind,dep):
    independent = ind
    dependent   = dep
    
    ##-SPLIT TRAIN & TEST
    from sklearn.model_selection import train_test_split as tts
    X_Train,X_Test,Y_Train,Y_Test=tts(independent,dependent,test_size=1/3,random_state=0)

    ##STANDARDIZATION
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_Train = sc.fit_transform(X_Train)
    X_Test = sc.transform(X_Test)
    
    ##MODEL CREATION
    from sklearn.svm import SVC
    from sklearn.model_selection import GridSearchCV

    param_grid = {'kernel':['rbf','poly','sigmoid'],
                  'gamma' :['auto','scale'],  
                      'C' :[10,100,1000,2000,3000]} 
    
    # fitting the model for grid search 
    grid = GridSearchCV(SVC(probability=True), param_grid, refit = True, verbose = 0,n_jobs=-1,scoring='f1_weighted') 
    
    grid.fit(X_Train, Y_Train) 
    
    # print best parameter after tuning 
    #print(grid.best_params_) 
    re=grid.cv_results_
    #print(re)
    grid_predictions = grid.predict(X_Test) 
    
    
    ##CLASSIFICATION
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(Y_Test, grid_predictions)
    
    from sklearn.metrics import classification_report
    clf_report = classification_report(Y_Test, grid_predictions)
    
    # print("The confusion Matrix:\n",cm)


    ##F1_SCORE
    
    from sklearn.metrics import f1_score
    f1_macro   = f1_score(Y_Test,grid_predictions,average='weighted')
    best_param = format(grid.best_params_)
    # print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
    
    # print("The confusion Matrix:\n",cm)
    # print("The report:\n",clf_report)
    
    ## roc_auc_score
    from sklearn.metrics import roc_auc_score
    
    roc_auc_score(Y_Test,grid.predict_proba(X_Test)[:,1])
    
    return grid,cm,clf_report,f1_macro,best_param

In [4]:
def getDecisionTreeClassification(ind,dep):
    independent = ind
    dependent   = dep
    
    ##-SPLIT TRAIN & TEST
    from sklearn.model_selection import train_test_split as tts
    X_Train,X_Test,Y_Train,Y_Test=tts(independent,dependent,test_size=1/3,random_state=0)

    ##STANDARDIZATION
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_Train = sc.fit_transform(X_Train)
    X_Test = sc.transform(X_Test)
    
    ##MODEL CREATION
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.model_selection import GridSearchCV
    
    param_grid = {'criterion'   :['gini','entropy'],
                  'max_features': ['auto','sqrt','log2'],
                  'splitter'    :['best','random']} 
    
    # fitting the model for grid search 
    grid = GridSearchCV(DecisionTreeClassifier(), param_grid, refit = True, verbose = 0,n_jobs=-1,scoring='f1_weighted')   
    grid.fit(X_Train, Y_Train) 
    
    # print best parameter after tuning 
    #print(grid.best_params_) 
    re=grid.cv_results_
    #print(re)
    grid_predictions = grid.predict(X_Test) 
    
    
    ##CLASSIFICATION
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(Y_Test, grid_predictions)
    
    from sklearn.metrics import classification_report
    clf_report = classification_report(Y_Test, grid_predictions)
    
    # print("The confusion Matrix:\n",cm)


    ##F1_SCORE   
    from sklearn.metrics import f1_score
    f1_macro   = f1_score(Y_Test,grid_predictions,average='weighted')
    best_param = format(grid.best_params_)
    # print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
    
    # print("The confusion Matrix:\n",cm)
    # print("The report:\n",clf_report)
    
    ## roc_auc_score
    from sklearn.metrics import roc_auc_score
    
    roc_auc_score(Y_Test,grid.predict_proba(X_Test)[:,1])
    
    return grid,cm,clf_report,f1_macro,best_param

In [5]:
def getRandomForestClassification(ind,dep):
    independent = ind
    dependent   = dep
    
    ##-SPLIT TRAIN & TEST
    from sklearn.model_selection import train_test_split as tts
    X_Train,X_Test,Y_Train,Y_Test=tts(independent,dependent,test_size=1/3,random_state=0)

    ##STANDARDIZATION
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_Train = sc.fit_transform(X_Train)
    X_Test = sc.transform(X_Test)
    
    ##MODEL CREATION
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import GridSearchCV
    
    param_grid = {'criterion':['gini','entropy'],
                  'max_features': ['auto','sqrt','log2'],
                  'n_estimators':[10,100]} 
    
    # fitting the model for grid search 
    grid = GridSearchCV(RandomForestClassifier(), param_grid, refit = True, verbose = 0,n_jobs=-1,scoring='f1_weighted')   
    grid.fit(X_Train, Y_Train) 
    
    # print best parameter after tuning 
    #print(grid.best_params_) 
    re=grid.cv_results_
    #print(re)
    grid_predictions = grid.predict(X_Test) 
    
    
    ##CLASSIFICATION
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(Y_Test, grid_predictions)
    
    from sklearn.metrics import classification_report
    clf_report = classification_report(Y_Test, grid_predictions)
    
    # print("The confusion Matrix:\n",cm)


    ##F1_SCORE   
    from sklearn.metrics import f1_score
    f1_macro   = f1_score(Y_Test,grid_predictions,average='weighted')
    best_param = format(grid.best_params_)
    # print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
    
    # print("The confusion Matrix:\n",cm)
    # print("The report:\n",clf_report)
    
    ## roc_auc_score
    from sklearn.metrics import roc_auc_score
    
    roc_auc_score(Y_Test,grid.predict_proba(X_Test)[:,1])
    
    return grid,cm,clf_report,f1_macro,best_param

In [6]:
def getLogisticClassification(ind,dep):
    independent = ind
    dependent   = dep
    
    ##-SPLIT TRAIN & TEST
    from sklearn.model_selection import train_test_split as tts
    X_Train,X_Test,Y_Train,Y_Test=tts(independent,dependent,test_size=1/3,random_state=0)

    ##STANDARDIZATION
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_Train = sc.fit_transform(X_Train)
    X_Test = sc.transform(X_Test)
    
    ##MODEL CREATION
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import GridSearchCV

    # param_grid = {'solver':['newton-cg', 'lbfgs', 'liblinear', 'saga']}
    param_grid = {'solver':['newton-cg', 'lbfgs', 'liblinear', 'saga'], 'penalty':['l2']} 
 

    # param_grid = {'weights':['uniform', 'lbfgs', 'liblinear', 'saga'],
    #              'penalty':['l2']} 
    
    # fitting the model for grid search 
    grid = GridSearchCV(LogisticRegression(), param_grid, refit = True, verbose = 0,n_jobs=-1,scoring='f1_weighted')   
    grid.fit(X_Train, Y_Train) 
    
    # print best parameter after tuning 
    #print(grid.best_params_) 
    re=grid.cv_results_
    #print(re)
    grid_predictions = grid.predict(X_Test) 
    
    
    ##CLASSIFICATION
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(Y_Test, grid_predictions)
    
    from sklearn.metrics import classification_report
    clf_report = classification_report(Y_Test, grid_predictions)
    
    # print("The confusion Matrix:\n",cm)


    ##F1_SCORE   
    from sklearn.metrics import f1_score
    f1_macro   = f1_score(Y_Test,grid_predictions,average='weighted')
    best_param = format(grid.best_params_)
    # print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
    
    # print("The confusion Matrix:\n",cm)
    # print("The report:\n",clf_report)
    
    ## roc_auc_score
    from sklearn.metrics import roc_auc_score
    
    roc_auc_score(Y_Test,grid.predict_proba(X_Test)[:,1])
    
    return grid,cm,clf_report,f1_macro,best_param

In [7]:
def getKNearestNeighbor(ind,dep):
    independent = ind
    dependent   = dep
    
    ##-SPLIT TRAIN & TEST
    from sklearn.model_selection import train_test_split as tts
    X_Train,X_Test,Y_Train,Y_Test=tts(independent,dependent,test_size=1/3,random_state=0)

    ##STANDARDIZATION
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_Train = sc.fit_transform(X_Train)
    X_Test = sc.transform(X_Test)
    
    ##MODEL CREATION
    # from sklearn.linear_model import KNeighborsClassifier
    from sklearn.neighbors import KNeighborsClassifier    
    from sklearn.model_selection import GridSearchCV

 
    param_grid = {'weights' :['uniform', 'distance'],
                 'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute']}
 
    
    # fitting the model for grid search 
    grid = GridSearchCV(KNeighborsClassifier(), param_grid, refit = True, verbose = 0,n_jobs=-1,scoring='f1_weighted')   
    grid.fit(X_Train, Y_Train) 
    
    # print best parameter after tuning 
    #print(grid.best_params_) 
    re=grid.cv_results_
    #print(re)
    grid_predictions = grid.predict(X_Test) 
    
    
    ##CLASSIFICATION
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(Y_Test, grid_predictions)
    
    from sklearn.metrics import classification_report
    clf_report = classification_report(Y_Test, grid_predictions)
    
    # print("The confusion Matrix:\n",cm)


    ##F1_SCORE   
    from sklearn.metrics import f1_score
    f1_macro   = f1_score(Y_Test,grid_predictions,average='weighted')
    best_param = format(grid.best_params_)
    # print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)
    
    # print("The confusion Matrix:\n",cm)
    # print("The report:\n",clf_report)
    
    ## roc_auc_score
    from sklearn.metrics import roc_auc_score
    
    roc_auc_score(Y_Test,grid.predict_proba(X_Test)[:,1])
    
    return grid,cm,clf_report,f1_macro,best_param

    

In [8]:
def getNeiveBayesMultinomialNB(ind,dep):
    independent = ind
    dependent   = dep
    return clf_report,cm,y_pred        


In [9]:
def getNeiveBayesBernoulliNB(ind,dep):
    independent = ind
    dependent   = dep
    return clf_report,cm,y_pred        


In [10]:
def getNeiveBayesCategoricalNB(ind,dep):
    independent = ind
    dependent   = dep
    return clf_report,cm,y_pred        


In [11]:
def getNeiveBayesComplementNB(ind,dep):
    independent = ind
    dependent   = dep
    return clf_report,cm,y_pred        
