<a href="https://colab.research.google.com/github/sripriyakonjarla/Machine_Learning/blob/main/lab_Session_7ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
data = pd.read_excel('training_mathbert.xlsx')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grids for each classifier
param_grids = {
    'perceptron': {
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'max_iter': [1000, 2000, 3000],
        'tol': [1e-4, 1e-3]
    },
    'mlp': {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['tanh', 'relu'],
        'alpha': [0.0001, 0.001, 0.01]
    },
    'svm': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'decision_tree': {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 10, 20, 30],
        'max_features': ['sqrt', 'log2', None],
        'min_samples_leaf': [1, 2, 4],
        'min_samples_split': [2, 5, 10]
    },
    'random_forest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'ada_boost': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 1.0]
    },
    'xgboost': {
        'n_estimators': [50, 100],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.3]
    },
    'naive_bayes': {
        'var_smoothing': [1e-9, 1e-8, 1e-7]  # Added var_smoothing parameter
    }
}

def tune_and_evaluate(model, param_grid, X_train, y_train, X_test, y_test):
    n_iter = min(10, len(param_grid)) if len(param_grid) > 0 else 1
    search = RandomizedSearchCV(model, param_grid, n_iter=n_iter, cv=10, random_state=42, n_jobs=-1)
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)

    return {
        'best_params': search.best_params_,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted'),
    }

results = []

classifiers = {
    'Perceptron': (Perceptron(), param_grids['perceptron']),
    'MLP': (MLPClassifier(max_iter=1000), param_grids['mlp']),
    'SVM': (SVC(probability=True), param_grids['svm']),
    'Decision Tree': (DecisionTreeClassifier(), param_grids['decision_tree']),
    'Random Forest': (RandomForestClassifier(), param_grids['random_forest']),
    'AdaBoost': (AdaBoostClassifier(algorithm='SAMME'), param_grids['ada_boost']),
    'XGBoost': (XGBClassifier(eval_metric='mlogloss'), param_grids['xgboost']),
    'Naïve Bayes': (GaussianNB(), param_grids['naive_bayes'])
}

for name, (model, params) in classifiers.items():
    metrics = tune_and_evaluate(model, params, X_train, y_train, X_test, y_test)
    metrics['Classifier'] = name
    results.append(metrics)

results_df = pd.DataFrame(results)
results_df = results_df[['Classifier', 'best_params', 'accuracy', 'precision', 'recall', 'f1_score']]

# To display in Jupyter Notebook (if applicable)
styled_results

# If running in a standard Python script, use print
print(results_df)


      Classifier                                        best_params  accuracy  \
0     Perceptron  {'tol': 0.0001, 'max_iter': 2000, 'alpha': 0.001}  0.849558   
1            MLP  {'hidden_layer_sizes': (100,), 'alpha': 0.0001...  0.942478   
2            SVM     {'kernel': 'linear', 'gamma': 'auto', 'C': 10}  1.000000   
3  Decision Tree  {'min_samples_split': 2, 'min_samples_leaf': 4...  1.000000   
4  Random Forest  {'n_estimators': 100, 'min_samples_split': 2, ...  0.964602   
5       AdaBoost        {'n_estimators': 100, 'learning_rate': 1.0}  1.000000   
6        XGBoost  {'n_estimators': 50, 'max_depth': 3, 'learning...  1.000000   
7    Naïve Bayes                           {'var_smoothing': 1e-09}  0.796460   

   precision    recall  f1_score  
0   0.905156  0.849558  0.858139  
1   0.941892  0.942478  0.941969  
2   1.000000  1.000000  1.000000  
3   1.000000  1.000000  1.000000  
4   0.964564  0.964602  0.964178  
5   1.000000  1.000000  1.000000  
6   1.000000  1.000000  1

In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
data = pd.read_excel('training_mathbert.xlsx')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grids for each classifier
param_grids = {
    'perceptron': {
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'max_iter': [1000, 2000, 3000],
        'tol': [1e-4, 1e-3]
    },
    'mlp': {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['tanh', 'relu'],
        'alpha': [0.0001, 0.001, 0.01]
    },
    'svm': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'decision_tree': {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 10, 20, 30],
        'max_features': ['sqrt', 'log2', None],
        'min_samples_leaf': [1, 2, 4],
        'min_samples_split': [2, 5, 10]
    },
    'random_forest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'ada_boost': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 1.0]
    },
    'xgboost': {
        'n_estimators': [50, 100],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.3]
    },
    'naive_bayes': {
        'var_smoothing': [1e-9, 1e-8, 1e-7]  # Added var_smoothing parameter
    }
}

def tune_and_evaluate(model, param_grid, X_train, y_train, X_test, y_test):
    n_iter = min(10, len(param_grid)) if len(param_grid) > 0 else 1
    search = RandomizedSearchCV(model, param_grid, n_iter=n_iter, cv=10, random_state=42, n_jobs=-1)
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)

    return {
        'best_params': search.best_params_,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted'),
    }

results = []
hyperparams = []

classifiers = {
    'Perceptron': (Perceptron(), param_grids['perceptron']),
    'MLP': (MLPClassifier(max_iter=1000), param_grids['mlp']),
    'SVM': (SVC(probability=True), param_grids['svm']),
    'Decision Tree': (DecisionTreeClassifier(), param_grids['decision_tree']),
    'Random Forest': (RandomForestClassifier(), param_grids['random_forest']),
    'AdaBoost': (AdaBoostClassifier(algorithm='SAMME'), param_grids['ada_boost']),
    'XGBoost': (XGBClassifier(eval_metric='mlogloss'), param_grids['xgboost']),
    'Naïve Bayes': (GaussianNB(), param_grids['naive_bayes'])
}

for name, (model, params) in classifiers.items():
    metrics = tune_and_evaluate(model, params, X_train, y_train, X_test, y_test)
    metrics['Classifier'] = name
    results.append(metrics)
    hyperparams.append({
        'Classifier': name,
        'Best Hyperparameters': metrics['best_params']
    })

# Creating DataFrames for results and hyperparameters
results_df = pd.DataFrame(results)
results_df = results_df[['Classifier', 'accuracy', 'precision', 'recall', 'f1_score']]

hyperparams_df = pd.DataFrame(hyperparams)

# Displaying the results
print("Performance Metrics:")
print(results_df)
print("\nHyperparameters:")
print(hyperparams_df)


Performance Metrics:
      Classifier  accuracy  precision    recall  f1_score
0     Perceptron  0.849558   0.905156  0.849558  0.858139
1            MLP  0.946903   0.946415  0.946903  0.946267
2            SVM  1.000000   1.000000  1.000000  1.000000
3  Decision Tree  1.000000   1.000000  1.000000  1.000000
4  Random Forest  0.942478   0.943327  0.942478  0.940809
5       AdaBoost  1.000000   1.000000  1.000000  1.000000
6        XGBoost  1.000000   1.000000  1.000000  1.000000
7    Naïve Bayes  0.796460   0.792150  0.796460  0.794023

Hyperparameters:
      Classifier                               Best Hyperparameters
0     Perceptron  {'tol': 0.0001, 'max_iter': 2000, 'alpha': 0.001}
1            MLP  {'hidden_layer_sizes': (100,), 'alpha': 0.0001...
2            SVM     {'kernel': 'linear', 'gamma': 'auto', 'C': 10}
3  Decision Tree  {'min_samples_split': 2, 'min_samples_leaf': 4...
4  Random Forest  {'n_estimators': 100, 'min_samples_split': 2, ...
5       AdaBoost        {'n_est

In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load your dataset
data = pd.read_excel('training_mathbert.xlsx')
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameter grids for each classifier
param_grids = {
    'perceptron': {
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'max_iter': [1000, 2000, 3000],
        'tol': [1e-4, 1e-3]
    },
    'mlp': {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['tanh', 'relu'],
        'alpha': [0.0001, 0.001, 0.01]
    },
    'svm': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'decision_tree': {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 10, 20, 30],
        'max_features': ['sqrt', 'log2', None],
        'min_samples_leaf': [1, 2, 4],
        'min_samples_split': [2, 5, 10]
    },
    'random_forest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'ada_boost': {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 1.0]
    },
    'xgboost': {
        'n_estimators': [50, 100],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1, 0.3]
    },
    'naive_bayes': {
        'var_smoothing': [1e-9, 1e-8, 1e-7]
    }
}

def tune_and_evaluate(model, param_grid, X_train, y_train, X_test, y_test):
    n_iter = min(10, len(param_grid)) if len(param_grid) > 0 else 1
    search = RandomizedSearchCV(model, param_grid, n_iter=n_iter, cv=10, random_state=42, n_jobs=-1)
    search.fit(X_train, y_train)
    best_model = search.best_estimator_
    y_pred = best_model.predict(X_test)

    return {
        'best_params': search.best_params_,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, average='weighted'),
        'recall': recall_score(y_test, y_pred, average='weighted'),
        'f1_score': f1_score(y_test, y_pred, average='weighted'),
    }

results = []

for name, (model, params) in classifiers.items():
    metrics = tune_and_evaluate(model, params, X_train, y_train, X_test, y_test)
    metrics['Classifier'] = name
    results.append(metrics)

# Print performance metrics
print("Performance Metrics:")
for result in results:
    print(f"Classifier: {result['Classifier']}")
    print(f"  Accuracy: {result['accuracy']:.4f}")
    print(f"  Precision: {result['precision']:.4f}")
    print(f"  Recall: {result['recall']:.4f}")
    print(f"  F1 Score: {result['f1_score']:.4f}")
    print()

# Print hyperparameters
print("Hyperparameters:")
for result in results:
    print(f"Classifier: {result['Classifier']}")
    print(f"  Best Hyperparameters: {result['best_params']}")
    print()


Performance Metrics:
Classifier: Perceptron
  Accuracy: 0.8496
  Precision: 0.9052
  Recall: 0.8496
  F1 Score: 0.8581

Classifier: MLP
  Accuracy: 0.9425
  Precision: 0.9419
  Recall: 0.9425
  F1 Score: 0.9420

Classifier: SVM
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: Decision Tree
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: Random Forest
  Accuracy: 0.9823
  Precision: 0.9823
  Recall: 0.9823
  F1 Score: 0.9822

Classifier: AdaBoost
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: XGBoost
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

Classifier: Naïve Bayes
  Accuracy: 0.7965
  Precision: 0.7921
  Recall: 0.7965
  F1 Score: 0.7940

Hyperparameters:
Classifier: Perceptron
  Best Hyperparameters: {'tol': 0.0001, 'max_iter': 2000, 'alpha': 0.001}

Classifier: MLP
  Best Hyperparameters: {'hidden_layer_sizes': (50,), 'alpha': 0.000