In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score,f1_score,roc_auc_score,roc_curve


In [2]:
data=pd.read_excel('Dry_Bean_Dataset.xlsx')
from Data_Cleaning import outlier_replacer
from Data_Cleaning2 import Data_Cleaner

In [3]:
x,y,x_cols=outlier_replacer(data)


x_train,x_test,y_train,y_test,scaler,Encoder,pc=Data_Cleaner(x,y,x_cols)

In [4]:
models_dict={
    "LogisticRegression":LogisticRegression(),
    "SupportVector":SVC(),
    "NaiveBayes":GaussianNB(),
    "KnnClassifier":KNeighborsClassifier(),
    "DecisionTree":DecisionTreeClassifier(),
    "RandomForest":RandomForestClassifier(),
    "AdaBoost":AdaBoostClassifier(),
    "GradientBoost":GradientBoostingClassifier(),
    "XgbClassifier":XGBClassifier()    
}

In [5]:
for i in range(len(list(models_dict))):
    model=list(models_dict.values())[i]
    model.fit(x_train,y_train)

    y_train_pred=model.predict(x_train)
    y_test_pred=model.predict(x_test)

    train_acc=accuracy_score(y_train,y_train_pred)
    train_f1=f1_score(y_train,y_train_pred,average='weighted')
    train_prec=precision_score(y_train,y_train_pred,average='weighted')
    train_recall=recall_score(y_train,y_train_pred,average='weighted')
    #train_roc=roc_auc_score(y_train,y_train_pred,average='weighted',multi_class="ovo")

    test_acc=accuracy_score(y_test,y_test_pred)
    test_f1=f1_score(y_test,y_test_pred,average='weighted')
    test_prec=precision_score(y_test,y_test_pred,average='weighted')
    test_recall=recall_score(y_test,y_test_pred,average='weighted')
    #test_roc=roc_auc_score(y_test,y_test_pred,average='weighted',multi_class='ovo')

    print(list(models_dict.keys())[i])
    print("Training AccuracyScore:{:.4f}".format(train_acc))
    print("Training F1Score:{:.4f}".format(train_f1))
    print("Training PrecisionScore:{:.4f}".format(train_prec))
    print("Training RecallScore:{:.4f}".format(train_recall))
    #print("RocAocScore:{:.4f}".format(train_roc))
    print("Test AccuracyScore:{:.4f}".format(test_acc))
    print("Test F1Score:{:.4f}".format(test_f1))
    print("Test PrecisionScore:{:.4f}".format(test_prec))
    print("Test RecallScore:{:.4f}".format(test_recall))
    print('\n')


LogisticRegression
Training AccuracyScore:0.9320
Training F1Score:0.9320
Training PrecisionScore:0.9322
Training RecallScore:0.9320
Test AccuracyScore:0.9283
Test F1Score:0.9283
Test PrecisionScore:0.9288
Test RecallScore:0.9283


SupportVector
Training AccuracyScore:0.9429
Training F1Score:0.9430
Training PrecisionScore:0.9433
Training RecallScore:0.9429
Test AccuracyScore:0.9370
Test F1Score:0.9371
Test PrecisionScore:0.9376
Test RecallScore:0.9370


NaiveBayes
Training AccuracyScore:0.9050
Training F1Score:0.9048
Training PrecisionScore:0.9053
Training RecallScore:0.9050
Test AccuracyScore:0.9029
Test F1Score:0.9027
Test PrecisionScore:0.9037
Test RecallScore:0.9029


KnnClassifier
Training AccuracyScore:0.9564
Training F1Score:0.9565
Training PrecisionScore:0.9566
Training RecallScore:0.9564
Test AccuracyScore:0.9368
Test F1Score:0.9368
Test PrecisionScore:0.9373
Test RecallScore:0.9368


DecisionTree
Training AccuracyScore:1.0000
Training F1Score:1.0000
Training PrecisionScore:1.0



AdaBoost
Training AccuracyScore:0.6840
Training F1Score:0.6626
Training PrecisionScore:0.7363
Training RecallScore:0.6840
Test AccuracyScore:0.6844
Test F1Score:0.6590
Test PrecisionScore:0.7386
Test RecallScore:0.6844


GradientBoost
Training AccuracyScore:0.9566
Training F1Score:0.9566
Training PrecisionScore:0.9567
Training RecallScore:0.9566
Test AccuracyScore:0.9343
Test F1Score:0.9344
Test PrecisionScore:0.9348
Test RecallScore:0.9343


XgbClassifier
Training AccuracyScore:0.9990
Training F1Score:0.9990
Training PrecisionScore:0.9990
Training RecallScore:0.9990
Test AccuracyScore:0.9458
Test F1Score:0.9459
Test PrecisionScore:0.9463
Test RecallScore:0.9458




In [6]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

params= {
    'n_estimators': randint(50, 400),
    'max_depth': randint(3, 25),
    'learning_rate': uniform(0.01, 0.3),
    'subsample': uniform(0.6, 0.4),
    'colsample_bytree': uniform(0.6, 0.4),
    'min_child_weight': randint(1, 10),
    'gamma': uniform(0, 0.5),
    'reg_alpha': uniform(0, 1),
    'reg_lambda': uniform(0.1, 1)
}

# Initialize the XGBClassifier
xgb = XGBClassifier(eval_metric='mlogloss')


random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter=100,
                                   scoring='accuracy', n_jobs=-1, cv=5, verbose=1, random_state=42)

# Fit the model
random_search.fit(x_train, y_train)

# Print the best parameters and the best score
print("Best parameters found: ", random_search.best_params_)
print("Best accuracy: ", random_search.best_score_)


Fitting 5 folds for each of 100 candidates, totalling 500 fits
Best parameters found:  {'colsample_bytree': np.float64(0.8950004992438994), 'gamma': np.float64(0.22610897044490358), 'learning_rate': np.float64(0.07738144688199458), 'max_depth': 14, 'min_child_weight': 3, 'n_estimators': 231, 'reg_alpha': np.float64(0.1763869865062233), 'reg_lambda': np.float64(0.5983677727394797), 'subsample': np.float64(0.7675701798018192)}
Best accuracy:  0.9450070031900463
