<a href="https://colab.research.google.com/github/sripriyakonjarla/Machine_Learning/blob/main/lab_Session_7ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
data = pd.read_excel('training_mathbert.xlsx')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grids for each classifier
param_grids = {
    'perceptron': {
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'max_iter': [1000, 2000, 3000],
        'tol': [1e-4, 1e-3]
    },
    'mlp': {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['tanh', 'relu'],
        'alpha': [0.0001, 0.001, 0.01]
    },
    'svm': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'decision_tree': {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 10, 20, 30],
        'max_features': ['sqrt', 'log2', None],
        'min_samples_leaf': [1, 2, 4],
        'min_samples_split': [2, 5, 10]
    },
    'random_forest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'ada_boost': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 1.0]
    },
    'xgboost': {
        'n_estimators': [50, 100],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.3]
    },
    'naive_bayes': {
        'var_smoothing': [1e-9, 1e-8, 1e-7]
    }
}

def tune_and_evaluate(model, param_grid, X_train, y_train, X_test, y_test):
    n_iter = min(10, len(param_grid)) if len(param_grid) > 0 else 1
    search = RandomizedSearchCV(model, param_grid, n_iter=n_iter, cv=10, random_state=42, n_jobs=-1)
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)

    return {
        'best_params': search.best_params_,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted'),
    }

results = []

for name, (model, params) in classifiers.items():
    metrics = tune_and_evaluate(model, params, X_train, y_train, X_test, y_test)
    metrics['Classifier'] = name
    results.append(metrics)

# Print performance metrics
print("Performance Metrics:")
for result in results:
    print(f"Classifier: {result['Classifier']}")
    print(f"  Accuracy: {result['accuracy']:.4f}")
    print(f"  Precision: {result['precision']:.4f}")
    print(f"  Recall: {result['recall']:.4f}")
    print(f"  F1 Score: {result['f1_score']:.4f}")
    print()

# Print hyperparameters
print("Hyperparameters:")
for result in results:
    print(f"Classifier: {result['Classifier']}")
    print(f"  Best Hyperparameters: {result['best_params']}")
    print()


Performance Metrics:
Classifier: Perceptron
  Accuracy: 0.8496
  Precision: 0.9052
  Recall: 0.8496
  F1 Score: 0.8581

Classifier: MLP
  Accuracy: 0.9425
  Precision: 0.9419
  Recall: 0.9425
  F1 Score: 0.9420

Classifier: SVM
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: Decision Tree
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: Random Forest
  Accuracy: 0.9823
  Precision: 0.9823
  Recall: 0.9823
  F1 Score: 0.9822

Classifier: AdaBoost
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: XGBoost
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: Naïve Bayes
  Accuracy: 0.7965
  Precision: 0.7921
  Recall: 0.7965
  F1 Score: 0.7940

Hyperparameters:
Classifier: Perceptron
  Best Hyperparameters: {'tol': 0.0001, 'max_iter': 2000, 'alpha': 0.001}

Classifier: MLP
  Best Hyperparameters: {'hidden_layer_sizes': (50,), 'alpha': 0.000