In [1]:
import numpy as np
import pandas as pd
import time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score

data = pd.read_csv("E:/R(5)/Dataset_3.csv")
X = data.drop('label', axis=1)
y = data['label']

split_ratios = [0.7, 0.8, 0.9]

accuracy_results = []
confusion_matrix_results = []
auc_results = []
training_testing_times = []

for ratio in split_ratios:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 - ratio, random_state=42)

    classifiers = {
        'Random Forest': RandomForestClassifier(random_state=42),
        'Gradient Boosting': GradientBoostingClassifier(random_state=42),
        'AdaBoost': AdaBoostClassifier(random_state=42),
        'Naive Bayes': GaussianNB(),
        'Support Vector Machine': SVC(probability=True),
        'XGBoost': XGBClassifier(random_state=42),
        'Decision Tree': DecisionTreeClassifier(random_state=42)
    }

    for clf_name, clf in classifiers.items():
        start_time = time.time()
        clf.fit(X_train, y_train)
        end_time = time.time()
        training_time = end_time - start_time

        start_time = time.time()
        y_pred = clf.predict(X_test)
        end_time = time.time()
        testing_time = end_time - start_time
        
        accuracy = accuracy_score(y_test, y_pred)
        accuracy_results.append({
            'Split Ratio': ratio,
            'Classifier': clf_name,
            'Accuracy': accuracy
        })
        
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        tpr = tp / (tp + fn)
        fnr = fn / (tp + fn)
        tnr = tn / (tn + fp)
        fpr = fp / (tn + fp)
        confusion_matrix_results.append({
            'Split Ratio': ratio,
            'Classifier': clf_name,
            'True Positive': tp,
            'False Negative': fn,
            'True Negative': tn,
            'False Positive': fp,
            'True Positive Rate': tpr,
            'False Negative Rate': fnr,
            'True Negative Rate': tnr,
            'False Positive Rate': fpr
        })
        
        try:
            probas = clf.predict_proba(X_test)
            auc_value = roc_auc_score(y_test, probas[:, 1])
        except AttributeError:
            auc_value = None
        auc_results.append({
            'Split Ratio': ratio,
            'Classifier': clf_name,
            'AUC Value': auc_value
        })

        training_testing_times.append({
            'Split Ratio': ratio,
            'Classifier': clf_name,
            'Training Time (s)': training_time,
            'Testing Time (s)': testing_time
        })
accuracy_df = pd.DataFrame(accuracy_results)
confusion_matrix_df = pd.DataFrame(confusion_matrix_results)
auc_df = pd.DataFrame(auc_results)
training_testing_times_df = pd.DataFrame(training_testing_times)

In [2]:
# Print the DataFrames
print("Accuracy:")
accuracy_df

Accuracy:


Unnamed: 0,Split Ratio,Classifier,Accuracy
0,0.7,Random Forest,0.92562
1,0.7,Gradient Boosting,0.92562
2,0.7,AdaBoost,0.92562
3,0.7,Naive Bayes,0.909091
4,0.7,Support Vector Machine,0.92562
5,0.7,XGBoost,0.884298
6,0.7,Decision Tree,0.92562
7,0.8,Random Forest,0.95
8,0.8,Gradient Boosting,0.95
9,0.8,AdaBoost,0.95


In [3]:
print("\nConfusion Matrix Metrics:")
confusion_matrix_df


Confusion Matrix Metrics:


Unnamed: 0,Split Ratio,Classifier,True Positive,False Negative,True Negative,False Positive,True Positive Rate,False Negative Rate,True Negative Rate,False Positive Rate
0,0.7,Random Forest,68,9,44,0,0.883117,0.116883,1.0,0.0
1,0.7,Gradient Boosting,68,9,44,0,0.883117,0.116883,1.0,0.0
2,0.7,AdaBoost,68,9,44,0,0.883117,0.116883,1.0,0.0
3,0.7,Naive Bayes,68,9,42,2,0.883117,0.116883,0.954545,0.045455
4,0.7,Support Vector Machine,68,9,44,0,0.883117,0.116883,1.0,0.0
5,0.7,XGBoost,67,10,40,4,0.87013,0.12987,0.909091,0.090909
6,0.7,Decision Tree,68,9,44,0,0.883117,0.116883,1.0,0.0
7,0.8,Random Forest,48,4,28,0,0.923077,0.076923,1.0,0.0
8,0.8,Gradient Boosting,48,4,28,0,0.923077,0.076923,1.0,0.0
9,0.8,AdaBoost,48,4,28,0,0.923077,0.076923,1.0,0.0


In [4]:
print("\nAUC Values:")
auc_df


AUC Values:


Unnamed: 0,Split Ratio,Classifier,AUC Value
0,0.7,Random Forest,0.94392
1,0.7,Gradient Boosting,0.94392
2,0.7,AdaBoost,0.94392
3,0.7,Naive Bayes,0.919717
4,0.7,Support Vector Machine,0.939787
5,0.7,XGBoost,0.938607
6,0.7,Decision Tree,0.94392
7,0.8,Random Forest,0.967033
8,0.8,Gradient Boosting,0.967033
9,0.8,AdaBoost,0.967033


In [5]:
print("\nTraining Times:")
training_testing_times_df


Training Times:


Unnamed: 0,Split Ratio,Classifier,Training Time (s),Testing Time (s)
0,0.7,Random Forest,0.294727,0.020943
1,0.7,Gradient Boosting,0.186528,0.00399
2,0.7,AdaBoost,0.172539,0.025931
3,0.7,Naive Bayes,0.00399,0.00299
4,0.7,Support Vector Machine,0.017952,0.002992
5,0.7,XGBoost,0.060835,0.056848
6,0.7,Decision Tree,0.00399,0.001993
7,0.8,Random Forest,0.371007,0.035906
8,0.8,Gradient Boosting,0.277294,0.001995
9,0.8,AdaBoost,0.195874,0.02992
