In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score

In [3]:
def load_data(file_path):
    data = pd.read_csv(file_path)
    x = data.iloc[:, :-1]  # Features
    y = data['Judgement Status']  # Target
    return x, y

x_custom, y_custom = load_data('English_Abstractive.csv')

In [4]:
def split_data(x, y, test_size=0.2, random_state=42):
    return train_test_split(x, y, test_size=test_size, random_state=random_state)

x_train, x_test, y_train, y_test = split_data(x_custom, y_custom)

In [5]:
def tune_mlp_classifier(x_train, y_train):
    mlp = MLPClassifier(max_iter=1000, random_state=42)
    
    param_distributions = {
        'hidden_layer_sizes': [(5,), (10,), (20,), (50,), (100,)],
        'activation': ['logistic', 'tanh', 'relu'],
        'solver': ['adam', 'sgd'],
        'alpha': np.logspace(-5, 0, 10),
        'learning_rate': ['constant', 'adaptive']
    }
    
    search = RandomizedSearchCV(mlp, param_distributions, n_iter=50,
                                 scoring='accuracy', cv=3, random_state=42)
    search.fit(x_train, y_train)
    
    return search.best_estimator_, search.best_params_

best_mlp_model, best_mlp_params = tune_mlp_classifier(x_train, y_train)



In [9]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Function to evaluate classifier and calculate required metrics
def evaluate_classifier(classifier, x_train, y_train, x_test, y_test):
    classifier.fit(x_train, y_train)
    predictions = classifier.predict(x_test)
    
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, average='weighted')
    recall = recall_score(y_test, predictions, average='weighted')
    f1 = f1_score(y_test, predictions, average='weighted')
    
    return accuracy, precision, recall, f1

# List of classifiers to evaluate
classifiers = {
    "MLPClassifier": best_mlp_model,
    "Support Vector Machine": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False),
    "Naive Bayes": GaussianNB()
}

results = []

# Evaluate each classifier and collect results
for name, clf in classifiers.items():
    accuracy, precision, recall, f1 = evaluate_classifier(clf, x_train, y_train, x_test, y_test)
    results.append({
        "Classifier": name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    })

# Create a DataFrame to display the results
results_df = pd.DataFrame(results)

# Display the results in a well-formatted table
print(results_df[['Classifier', 'Accuracy', 'Precision', 'Recall', 'F1-Score']].to_string(index=False))

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


TypeError: Cannot convert numpy.ndarray to numpy.ndarray