# A1

In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the datasets
def load_data():
    abstractive_embeddings = pd.read_excel("C:/Users/Shanmukha Reddy/Desktop/ML/English_Abstractive_Embeddings_Fasttext.xlsx")
    return abstractive_embeddings

# A1: Train Perceptron and MLP and evaluate their performance
def train_models(X_train, y_train, X_test, y_test):
    # Initialize models
    perceptron = Perceptron()
    mlp = MLPClassifier(max_iter=500, early_stopping=True)  # Increase iterations and enable early stopping

    # Train models
    perceptron.fit(X_train, y_train)
    mlp.fit(X_train, y_train)

    # Evaluate models
    perceptron_pred = perceptron.predict(X_test)
    mlp_pred = mlp.predict(X_test)

    perceptron_acc = accuracy_score(y_test, perceptron_pred)
    mlp_acc = accuracy_score(y_test, mlp_pred)

    return {'Perceptron': perceptron_acc, 'MLP': mlp_acc}

# Main execution for A1
abstractive_embeddings = load_data()

X = abstractive_embeddings.iloc[:, :-1]  # all features except last column
y = abstractive_embeddings.iloc[:, -1]   # last column is target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_results = train_models(X_train, y_train, X_test, y_test)
print("A1 - Perceptron and MLP Accuracy:\n", model_results)


A1 - Perceptron and MLP Accuracy:
 {'Perceptron': 0.25, 'MLP': 0.30833333333333335}


# A2

In [5]:
# Import necessary libraries
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron

# A2: Use RandomizedSearchCV to tune hyperparameters for Perceptron and MLP
def tune_hyperparameters(X, y):
    # Hyperparameter grids
    perceptron_param_grid = {
        'penalty': ['l2', 'l1', 'elasticnet'],
        'alpha': [0.0001, 0.001, 0.01, 0.1],
        'max_iter': [500, 1000, 1500],
        'tol': [1e-3, 1e-4]
    }

    mlp_param_grid = {
        'hidden_layer_sizes': [(100,), (50,50,), (100,50,30)],
        'activation': ['tanh', 'relu'],
        'solver': ['adam', 'sgd'],
        'learning_rate': ['constant', 'adaptive'],
        'learning_rate_init': [0.001, 0.01, 0.1],
        'max_iter': [500, 1000],
        'early_stopping': [True],
        'tol': [1e-3, 1e-4]
    }
    
    # Models
    perceptron = Perceptron()
    mlp = MLPClassifier()

    # Perform RandomizedSearchCV
    perceptron_search = RandomizedSearchCV(perceptron, perceptron_param_grid, n_iter=10, scoring='accuracy', cv=5, random_state=42)
    mlp_search = RandomizedSearchCV(mlp, mlp_param_grid, n_iter=10, scoring='accuracy', cv=5, random_state=42)

    # Fit the models
    perceptron_search.fit(X, y)
    mlp_search.fit(X, y)

    return perceptron_search.best_params_, mlp_search.best_params_

# Main execution for A2
perceptron_best_params, mlp_best_params = tune_hyperparameters(X, y)
print("A2 - Best Hyperparameters for Perceptron:\n", perceptron_best_params)
print("A2 - Best Hyperparameters for MLP:\n", mlp_best_params)


A2 - Best Hyperparameters for Perceptron:
 {'tol': 0.001, 'penalty': 'l2', 'max_iter': 500, 'alpha': 0.0001}
A2 - Best Hyperparameters for MLP:
 {'tol': 0.001, 'solver': 'adam', 'max_iter': 500, 'learning_rate_init': 0.1, 'learning_rate': 'constant', 'hidden_layer_sizes': (100, 50, 30), 'early_stopping': True, 'activation': 'relu'}


# A3

In [7]:
!pip install xgboost


Collecting xgboost
  Downloading xgboost-2.1.1-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.1-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/124.9 MB 281.8 kB/s eta 0:07:24
   ---------------------------------------- 0.1/124.9 MB 409.6 kB/s eta 0:05:05
   ---------------------------------------- 0.1/124.9 MB 605.3 kB/s eta 0:03:27
   ---------------------------------------- 0.3/124.9 MB 1.0 MB/s eta 0:02:02
   ---------------------------------------- 0.6/124.9 MB 1.8 MB/s eta 0:01:11
   ---------------------------------------- 1.1/124.9 MB 2.9 MB/s eta 0:00:43
    --------------------------------------- 1.7/124.9 MB 3.9 MB/s eta 0:00:32
    --------------------------------------- 1.7/124.9 MB 4.0 MB/s eta 0:00:31
    -----

In [8]:
!pip install catboost


Collecting catboost
  Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl.metadata (1.2 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)
Downloading catboost-1.2.7-cp312-cp312-win_amd64.whl (101.7 MB)
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/101.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/101.7 MB 325.1 kB/s eta 0:05:13
   ---------------------------------------- 0.1/101.7 MB 409.6 kB/s eta 0:04:09
   ---------------------------------------- 0.1/101.7 MB 544.7 kB/s eta 0:03:07
   ---------------------------------------- 0.2/101.7 MB 740.8 kB/s eta 0:02:18
   ---------------------------------------- 0.3/101.7 MB 1.2 MB/s eta 0:01:27
   ---------------------------------------- 0.5/101.7 MB 1.7 MB/s eta 0:01:01
   ---------------------------------------- 0.9/101.7 MB 2.4 MB/s eta 0:00:42
   ---------------------------------------- 1.3

In [11]:
# Import necessary libraries
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pandas as pd

# A3: Tabulate results using different classifiers
def evaluate_classifiers(X_train, y_train, X_test, y_test):
    # Define classifiers to evaluate
    classifiers = {
        'SVM': SVC(),
        'Decision Tree': DecisionTreeClassifier(),
        'Random Forest': RandomForestClassifier(),
        'AdaBoost': AdaBoostClassifier(algorithm='SAMME'),  # Specify algorithm to avoid deprecation warning
        'XGBoost': XGBClassifier(),
        'CatBoost': CatBoostClassifier(silent=True),
        'Naive Bayes': GaussianNB()
    }

    # Dictionary to store the results
    results = {}

    # Evaluate each classifier
    for name, clf in classifiers.items():
        clf.fit(X_train, y_train)  # Train classifier
        pred = clf.predict(X_test)  # Predict on test set

        # Calculate performance metrics
        acc = accuracy_score(y_test, pred)
        f1 = f1_score(y_test, pred, average='weighted')
        precision = precision_score(y_test, pred, average='weighted')
        recall = recall_score(y_test, pred, average='weighted')

        # Store the results
        results[name] = {
            'Accuracy': acc,
            'F1 Score': f1,
            'Precision': precision,
            'Recall': recall
        }

    # Return results as a DataFrame
    return pd.DataFrame(results).T

# Main execution for A3
classifier_results = evaluate_classifiers(X_train, y_train, X_test, y_test)

# Display the classifier results in tabular format
print("A3 - Classifier Performance:\n", classifier_results)


A3 - Classifier Performance:
                Accuracy  F1 Score  Precision    Recall
SVM            0.258333  0.170279   0.157563  0.258333
Decision Tree  0.250000  0.232565   0.251406  0.250000
Random Forest  0.241667  0.214232   0.228452  0.241667
AdaBoost       0.266667  0.209796   0.207774  0.266667
XGBoost        0.241667  0.223333   0.239329  0.241667
CatBoost       0.208333  0.187780   0.195337  0.208333
Naive Bayes    0.208333  0.165123   0.264510  0.208333
