In [6]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score,f1_score,roc_auc_score,roc_curve


In [7]:
data=pd.read_excel('Dry_Bean_Dataset.xlsx')
from Data_Cleaning import outlier_replacer
from Data_Cleaning2 import Data_Cleaner

In [8]:
x,y,x_cols=outlier_replacer(data)
x_train,x_test,y_train,y_test,scaler,Encoder,pca=Data_Cleaner(x,y,x_cols)

In [9]:
models_dict={
    "LogisticRegression":LogisticRegression(),
    "SupportVector":SVC(),
    "NaiveBayes":GaussianNB(),
    "KnnClassifier":KNeighborsClassifier(),
    "DecisionTree":DecisionTreeClassifier(),
    "RandomForest":RandomForestClassifier(),
    "AdaBoost":AdaBoostClassifier(),
    "GradientBoost":GradientBoostingClassifier(),
    "XgbClassifier":XGBClassifier()    
}

In [5]:
for i in range(len(list(models_dict))):
    model=list(models_dict.values())[i]
    model.fit(x_train,y_train)

    y_train_pred=model.predict(x_train)
    y_test_pred=model.predict(x_test)

    train_acc=accuracy_score(y_train,y_train_pred)
    train_f1=f1_score(y_train,y_train_pred,average='weighted')
    train_prec=precision_score(y_train,y_train_pred,average='weighted')
    train_recall=recall_score(y_train,y_train_pred,average='weighted')
    #train_roc=roc_auc_score(y_train,y_train_pred,average='weighted',multi_class="ovo")

    test_acc=accuracy_score(y_test,y_test_pred)
    test_f1=f1_score(y_test,y_test_pred,average='weighted')
    test_prec=precision_score(y_test,y_test_pred,average='weighted')
    test_recall=recall_score(y_test,y_test_pred,average='weighted')
    #test_roc=roc_auc_score(y_test,y_test_pred,average='weighted',multi_class='ovo')

    print(list(models_dict.keys())[i])
    print("Training AccuracyScore:{:.4f}".format(train_acc))
    print("Training F1Score:{:.4f}".format(train_f1))
    print("Training PrecisionScore:{:.4f}".format(train_prec))
    print("Training RecallScore:{:.4f}".format(train_recall))
    #print("RocAocScore:{:.4f}".format(train_roc))
    print("Test AccuracyScore:{:.4f}".format(test_acc))
    print("Test F1Score:{:.4f}".format(test_f1))
    print("Test PrecisionScore:{:.4f}".format(test_prec))
    print("Test RecallScore:{:.4f}".format(test_recall))
    print('\n')


LogisticRegression
Training AccuracyScore:0.9335
Training F1Score:0.9335
Training PrecisionScore:0.9337
Training RecallScore:0.9335
Test AccuracyScore:0.9289
Test F1Score:0.9290
Test PrecisionScore:0.9294
Test RecallScore:0.9289


SupportVector
Training AccuracyScore:0.9417
Training F1Score:0.9418
Training PrecisionScore:0.9421
Training RecallScore:0.9417
Test AccuracyScore:0.9347
Test F1Score:0.9350
Test PrecisionScore:0.9356
Test RecallScore:0.9347


NaiveBayes
Training AccuracyScore:0.9067
Training F1Score:0.9065
Training PrecisionScore:0.9071
Training RecallScore:0.9067
Test AccuracyScore:0.9029
Test F1Score:0.9029
Test PrecisionScore:0.9034
Test RecallScore:0.9029


KnnClassifier
Training AccuracyScore:0.9569
Training F1Score:0.9569
Training PrecisionScore:0.9570
Training RecallScore:0.9569
Test AccuracyScore:0.9301
Test F1Score:0.9303
Test PrecisionScore:0.9306
Test RecallScore:0.9301


DecisionTree
Training AccuracyScore:1.0000
Training F1Score:1.0000
Training PrecisionScore:1.0



AdaBoost
Training AccuracyScore:0.7359
Training F1Score:0.7027
Training PrecisionScore:0.7608
Training RecallScore:0.7359
Test AccuracyScore:0.7265
Test F1Score:0.6921
Test PrecisionScore:0.7371
Test RecallScore:0.7265


GradientBoost
Training AccuracyScore:0.9572
Training F1Score:0.9572
Training PrecisionScore:0.9573
Training RecallScore:0.9572
Test AccuracyScore:0.9325
Test F1Score:0.9327
Test PrecisionScore:0.9330
Test RecallScore:0.9325


XgbClassifier
Training AccuracyScore:0.9998
Training F1Score:0.9998
Training PrecisionScore:0.9998
Training RecallScore:0.9998
Test AccuracyScore:0.9384
Test F1Score:0.9386
Test PrecisionScore:0.9391
Test RecallScore:0.9384




In [10]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

# Define the parameter grid
param_dist = {
    'n_estimators': randint(50, 400),
    'max_depth': randint(3, 25),
    'learning_rate': uniform(0.01, 0.3),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4),
    'min_child_weight': randint(1, 10),
    'gamma': uniform(0, 0.5),
    'reg_alpha': uniform(0, 1),
    'reg_lambda': uniform(0.1, 1)
}

# Initialize the XGBClassifier
xgb = XGBClassifier(eval_metric='mlogloss')

# Initialize RandomizedSearchCV
random_search = RandomizedSearchCV(xgb, param_distributions=param_dist, n_iter=100,
                                   scoring='accuracy', n_jobs=-1, cv=5, verbose=1, random_state=42)

# Fit the model
random_search.fit(x_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", random_search.best_params_)
print("Best accuracy: ", random_search.best_score_)


Fitting 5 folds for each of 100 candidates, totalling 500 fits
Best parameters found:  {'colsample_bytree': np.float64(0.8738286191519333), 'gamma': np.float64(0.043934055621328405), 'learning_rate': np.float64(0.05164743203260611), 'max_depth': 11, 'min_child_weight': 1, 'n_estimators': 314, 'reg_alpha': np.float64(0.5956387406078443), 'reg_lambda': np.float64(0.5715761885501583), 'subsample': np.float64(0.7647363656589075)}
Best accuracy:  0.9470215203224385
