<a href="https://colab.research.google.com/github/sripriyakonjarla/Machine_Learning/blob/main/lab_Session_7ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
data = pd.read_excel('training_mathbert.xlsx')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grids for classifiers
param_grids = {
    'perceptron': {
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'max_iter': [1000, 2000, 3000],
        'tol': [1e-4, 1e-3]
    },
    'mlp': {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['tanh', 'relu'],
        'alpha': [0.0001, 0.001, 0.01]
    },
    'svm': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'decision_tree': {
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    },
    'random_forest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'ada_boost': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 1.0]
    },
    'xgboost': {
        'n_estimators': [50, 100],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.3]
    },
    'naive_bayes': {}
}

# Function to tune and evaluate models
def tune_and_evaluate(model, param_grid, X_train, y_train, X_test, y_test):
    search = RandomizedSearchCV(model, param_grid, n_iter=10, cv=10, random_state=42, n_jobs=-1)
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)

    return {
        'best_params': search.best_params_,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted'),
    }

# List to store results
results = []

# Classifiers to evaluate
classifiers = {
    'Perceptron': (Perceptron(), param_grids['perceptron']),
    'MLP': (MLPClassifier(max_iter=1000), param_grids['mlp']),
    'SVM': (SVC(probability=True), param_grids['svm']),
    'Decision Tree': (DecisionTreeClassifier(), param_grids['decision_tree']),
    'Random Forest': (RandomForestClassifier(), param_grids['random_forest']),
    'AdaBoost': (AdaBoostClassifier(), param_grids['ada_boost']),
    'XGBoost': (XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'), param_grids['xgboost']),
    'Naïve Bayes': (GaussianNB(), param_grids['naive_bayes'])
}

# Evaluate each classifier
for name, (model, params) in classifiers.items():
    metrics = tune_and_evaluate(model, params, X_train, y_train, X_test, y_test)
    metrics['Classifier'] = name
    results.append(metrics)

# Create DataFrame from results
results_df = pd.DataFrame(results)

# Format the output for better readability
results_df = results_df[['Classifier', 'best_params', 'accuracy', 'precision', 'recall', 'f1_score']]
results_df['accuracy'] = results_df['accuracy'].map('{:.2f}'.format)
results_df['precision'] = results_df['precision'].map('{:.2f}'.format)
results_df['recall'] = results_df['recall'].map('{:.2f}'.format)
results_df['f1_score'] = results_df['f1_score'].map('{:.2f}'.format)

# Styling the output
styled_results = results_df.style.format({
    'best_params': lambda x: str(x).replace(", ", ",\n"),
}).set_table_attributes('style="width: 80%; margin: auto;"') \
 .set_caption("Classifier Performance Metrics") \
 .set_properties(**{'text-align': 'center'})

# Display the styled output
styled_results


Parameters: { "use_label_encoder" } are not used.



Unnamed: 0,Classifier,best_params,accuracy,precision,recall,f1_score
0,Perceptron,"{'tol': 0.0001, 'max_iter': 2000, 'alpha': 0.001}",0.85,0.91,0.85,0.86
1,MLP,"{'hidden_layer_sizes': (50,), 'alpha': 0.01, 'activation': 'relu'}",0.93,0.93,0.93,0.93
2,SVM,"{'kernel': 'linear', 'gamma': 'auto', 'C': 10}",1.0,1.0,1.0,1.0
3,Decision Tree,"{'min_samples_split': 5, 'max_depth': 30}",1.0,1.0,1.0,1.0
4,Random Forest,"{'n_estimators': 200, 'min_samples_split': 2, 'max_depth': None}",0.97,0.97,0.97,0.97
5,AdaBoost,"{'n_estimators': 50, 'learning_rate': 0.01}",1.0,1.0,1.0,1.0
6,XGBoost,"{'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.01}",1.0,1.0,1.0,1.0
7,Naïve Bayes,{},0.8,0.79,0.8,0.79
