In [3]:
import os
import cv2
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [4]:
import os
from zipfile import ZipFile


!pip install kaggle


os.environ['KAGGLE_USERNAME'] = "cruk12"
os.environ['KAGGLE_KEY'] = "f6ac77148464b981fcc85de69da4347e"


!kaggle datasets download -d jessicali9530/lfw-dataset


with ZipFile('lfw-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('lfw-dataset')


os.remove('lfw-dataset.zip')


Downloading lfw-dataset.zip to /content
 86% 97.0M/112M [00:01<00:00, 48.2MB/s]
100% 112M/112M [00:01<00:00, 64.9MB/s] 


In [5]:

def extract_features(image_path, model):
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = preprocess(image)
    image = image.unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    features = features.squeeze(0)
    return features


def compute_hog(img):
    resized_img = resize(img, (128*4, 64*4))
    fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualize=True, multichannel=True)
    return fd


def get_pixel(img, center, x, y):
    new_value = 0
    try:
        if img[x][y] >= center:
            new_value = 1
    except:
        pass
    return new_value

def lbp_calculated_pixel(img, x, y):
    center = img[x][y]
    val_ar = []
    val_ar.append(get_pixel(img, center, x-1, y+1))
    val_ar.append(get_pixel(img, center, x, y+1))
    val_ar.append(get_pixel(img, center, x+1, y+1))
    val_ar.append(get_pixel(img, center, x+1, y))
    val_ar.append(get_pixel(img, center, x+1, y-1))
    val_ar.append(get_pixel(img, center, x, y-1))
    val_ar.append(get_pixel(img, center, x-1, y-1))
    val_ar.append(get_pixel(img, center, x-1, y))

    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
    val = 0
    for i in range(len(val_ar)):
        val += val_ar[i] * power_val[i]
    return val

def calcLBP(img):
    height, width, channel = img.shape
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_lbp = np.zeros((height, width,3), np.uint8)
    for i in range(0, height):
        for j in range(0, width):
             img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)
    hist_lbp = cv2.calcHist([img_lbp], [0], None, [256], [0, 256])
    return hist_lbp.flatten()


In [6]:
resnet = models.resnet50(pretrained=True)
resnet = nn.Sequential(*list(resnet.children())[:-1])
resnet.eval()

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 123MB/s]


Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [7]:
lfw_folder = '/content/lfw-dataset/lfw-deepfunneled/lfw-deepfunneled'
X, y = [], []
for folder_name in os.listdir(lfw_folder):
    folder_path = os.path.join(lfw_folder, folder_name)
    if os.path.isdir(folder_path):
        num_images = len(os.listdir(folder_path))
        if num_images > 70:
            for image_name in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_name)
                image = imread(image_path)
                # hog_feature = compute_hog(image)
                # lbp_feature = calcLBP(image)
                cnn_feature = extract_features(image_path, resnet).numpy()
                # hog_feature = hog_feature.reshape(-1)
                # lbp_feature = lbp_feature.reshape(-1)
                cnn_feature = cnn_feature.flatten()
                # combined_feature = np.concatenate((cnn_feature))

                X.append(cnn_feature)
                y.append(folder_name)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)



In [9]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [10]:
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train, y_train_encoded)
y_pred_knn = knn_clf.predict(X_test)
accuracy_knn = accuracy_score(y_test_encoded, y_pred_knn)
print("KNN Classifier")
print("Accuracy:", accuracy_knn)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_knn, target_names=label_encoder.classes_))


KNN Classifier
Accuracy: 0.6317829457364341
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.75      0.38      0.50        16
     Colin_Powell       0.61      0.55      0.58        60
  Donald_Rumsfeld       0.48      0.48      0.48        21
    George_W_Bush       0.66      0.85      0.74       108
Gerhard_Schroeder       0.62      0.26      0.37        19
      Hugo_Chavez       1.00      0.58      0.74        12
       Tony_Blair       0.50      0.45      0.48        22

         accuracy                           0.63       258
        macro avg       0.66      0.51      0.55       258
     weighted avg       0.64      0.63      0.62       258



In [12]:
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold

In [13]:
seed = 42

knn_params = {
    "n_neighbors" : range(1,30,2),
    "weights" : ["uniform", "distance"],
    "metric" : ["euclidean", "manhattan", "minkowski"],
    "leaf_size" : range(1,50,5)
}

cv = RepeatedStratifiedKFold(n_splits = 10 , n_repeats = 3, random_state = seed)
grid_search = GridSearchCV(estimator=knn_clf, param_grid=knn_params, n_jobs = 1, cv = cv, scoring = "accuracy", error_score = 0)
grid_results = grid_search.fit(X_train, y_train)

final_model = knn_clf.set_params(**grid_results.best_params_)
final_model.fit(X_train, y_train)
y_pred = final_model.predict(X_test)

accuracy_knn_gd = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy_knn_gd)
print(grid_results.best_params_)


Accuracy: 0.6162790697674418
{'leaf_size': 1, 'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'distance'}


In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)


knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train_pca, y_train_encoded)
y_pred_knn = knn_clf.predict(X_test_pca)
accuracy_knn = accuracy_score(y_test_encoded, y_pred_knn)

print("KNN Classifier")
print("Accuracy:", accuracy_knn)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_knn, target_names=label_encoder.classes_))

KNN Classifier
Accuracy: 0.3875968992248062
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.09      0.06      0.07        16
     Colin_Powell       0.32      0.47      0.38        38
  Donald_Rumsfeld       0.19      0.15      0.17        26
    George_W_Bush       0.52      0.68      0.59       109
Gerhard_Schroeder       0.17      0.11      0.13        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.08      0.03      0.05        30

         accuracy                           0.39       258
        macro avg       0.19      0.22      0.20       258
     weighted avg       0.31      0.39      0.34       258



In [None]:
svm_clf = SVC()
svm_clf.fit(X_train, y_train_encoded)
y_pred_svm = svm_clf.predict(X_test)
accuracy_svm = accuracy_score(y_test_encoded, y_pred_svm)
print("SVM Classifier")
print("Accuracy:", accuracy_svm)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_svm, target_names=label_encoder.classes_))


SVM Classifier
Accuracy: 0.4263565891472868
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        16
     Colin_Powell       0.38      0.34      0.36        38
  Donald_Rumsfeld       0.00      0.00      0.00        26
    George_W_Bush       0.43      0.89      0.58       109
Gerhard_Schroeder       0.00      0.00      0.00        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.00      0.00      0.00        30

         accuracy                           0.43       258
        macro avg       0.12      0.18      0.13       258
     weighted avg       0.24      0.43      0.30       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
svm_clf = SVC()
svm_clf.fit(X_train_pca, y_train_encoded)
y_pred_svm = svm_clf.predict(X_test_pca)
accuracy_svm = accuracy_score(y_test_encoded, y_pred_svm)
print("SVM Classifier")
print("Accuracy:", accuracy_svm)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_svm, target_names=label_encoder.classes_))


SVM Classifier
Accuracy: 0.4263565891472868
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        16
     Colin_Powell       0.36      0.37      0.36        38
  Donald_Rumsfeld       0.67      0.08      0.14        26
    George_W_Bush       0.44      0.86      0.58       109
Gerhard_Schroeder       0.00      0.00      0.00        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.00      0.00      0.00        30

         accuracy                           0.43       258
        macro avg       0.21      0.19      0.15       258
     weighted avg       0.30      0.43      0.31       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.ensemble import AdaBoostClassifier
adaboost_clf = AdaBoostClassifier()
adaboost_clf.fit(X_train, y_train_encoded)
y_pred_adaboost = adaboost_clf.predict(X_test)
accuracy_adaboost = accuracy_score(y_test_encoded, y_pred_adaboost)

print("AdaBoost Classifier")
print("Accuracy:", accuracy_adaboost)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_adaboost, target_names=label_encoder.classes_))


AdaBoost Classifier
Accuracy: 0.49224806201550386
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        16
     Colin_Powell       0.43      0.47      0.45        38
  Donald_Rumsfeld       0.00      0.00      0.00        26
    George_W_Bush       0.53      0.85      0.65       109
Gerhard_Schroeder       0.83      0.28      0.42        18
      Hugo_Chavez       0.23      0.24      0.23        21
       Tony_Blair       0.55      0.20      0.29        30

         accuracy                           0.49       258
        macro avg       0.37      0.29      0.29       258
     weighted avg       0.43      0.49      0.43       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from sklearn.ensemble import AdaBoostClassifier
adaboost_clf = AdaBoostClassifier()
adaboost_clf.fit(X_train_pca, y_train_encoded)
y_pred_adaboost = adaboost_clf.predict(X_test_pca)
accuracy_adaboost = accuracy_score(y_test_encoded, y_pred_adaboost)

print("AdaBoost Classifier")
print("Accuracy:", accuracy_adaboost)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_adaboost, target_names=label_encoder.classes_))


AdaBoost Classifier
Accuracy: 0.32170542635658916
Classification Report:


  _warn_prf(average, modifier, msg_start, len(result))


                   precision    recall  f1-score   support

     Ariel_Sharon       0.20      0.06      0.10        16
     Colin_Powell       0.22      0.34      0.27        38
  Donald_Rumsfeld       0.07      0.04      0.05        26
    George_W_Bush       0.39      0.62      0.48       109
Gerhard_Schroeder       0.00      0.00      0.00        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.00      0.00      0.00        30

         accuracy                           0.32       258
        macro avg       0.13      0.15      0.13       258
     weighted avg       0.22      0.32      0.25       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train, y_train_encoded)
y_pred_dt = dt_clf.predict(X_test)
accuracy_dt = accuracy_score(y_test_encoded, y_pred_dt)
print("Decision Tree Classifier")
print("Accuracy:", accuracy_dt)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_dt, target_names=label_encoder.classes_))


Decision Tree Classifier
Accuracy: 0.5542635658914729
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.30      0.38      0.33        16
     Colin_Powell       0.47      0.50      0.49        38
  Donald_Rumsfeld       0.45      0.35      0.39        26
    George_W_Bush       0.72      0.77      0.74       109
Gerhard_Schroeder       0.35      0.39      0.37        18
      Hugo_Chavez       0.33      0.29      0.31        21
       Tony_Blair       0.52      0.40      0.45        30

         accuracy                           0.55       258
        macro avg       0.45      0.44      0.44       258
     weighted avg       0.55      0.55      0.55       258



In [None]:
dt_clf = DecisionTreeClassifier()
dt_clf.fit(X_train_pca, y_train_encoded)
y_pred_dt = dt_clf.predict(X_test_pca)
accuracy_dt = accuracy_score(y_test_encoded, y_pred_dt)
print("Decision Tree Classifier")
print("Accuracy:", accuracy_dt)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_dt, target_names=label_encoder.classes_))


Decision Tree Classifier
Accuracy: 0.2713178294573643
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.06      0.06      0.06        16
     Colin_Powell       0.15      0.18      0.16        38
  Donald_Rumsfeld       0.19      0.15      0.17        26
    George_W_Bush       0.47      0.45      0.46       109
Gerhard_Schroeder       0.12      0.17      0.14        18
      Hugo_Chavez       0.18      0.10      0.12        21
       Tony_Blair       0.13      0.13      0.13        30

         accuracy                           0.27       258
        macro avg       0.18      0.18      0.18       258
     weighted avg       0.28      0.27      0.27       258



In [None]:
rf_clf = RandomForestClassifier()
rf_clf.fit(X_train, y_train_encoded)
y_pred_rf = rf_clf.predict(X_test)
accuracy_rf = accuracy_score(y_test_encoded, y_pred_rf)
print("Random Forest Classifier")
print("Accuracy:", accuracy_rf)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_rf, target_names=label_encoder.classes_))


Random Forest Classifier
Accuracy: 0.6472868217054264
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.06      0.12        16
     Colin_Powell       0.87      0.89      0.88        38
  Donald_Rumsfeld       1.00      0.31      0.47        26
    George_W_Bush       0.56      1.00      0.72       109
Gerhard_Schroeder       1.00      0.28      0.43        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       1.00      0.33      0.50        30

         accuracy                           0.65       258
        macro avg       0.78      0.41      0.45       258
     weighted avg       0.71      0.65      0.58       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
rt_clf = RandomForestClassifier()
rt_clf.fit(X_train_pca, y_train_encoded)
y_pred_dt = dt_clf.predict(X_test_pca)
accuracy_dt = accuracy_score(y_test_encoded, y_pred_dt)
print("Decision Tree Classifier")
print("Accuracy:", accuracy_dt)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_dt, target_names=label_encoder.classes_))


Decision Tree Classifier
Accuracy: 0.29069767441860467
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.06      0.06      0.06        16
     Colin_Powell       0.23      0.29      0.26        38
  Donald_Rumsfeld       0.21      0.23      0.22        26
    George_W_Bush       0.48      0.44      0.46       109
Gerhard_Schroeder       0.07      0.11      0.09        18
      Hugo_Chavez       0.25      0.10      0.14        21
       Tony_Blair       0.17      0.17      0.17        30

         accuracy                           0.29       258
        macro avg       0.21      0.20      0.20       258
     weighted avg       0.31      0.29      0.30       258



In [None]:
lr_clf = LogisticRegression(max_iter=1000)
lr_clf.fit(X_train, y_train_encoded)
y_pred_lr = lr_clf.predict(X_test)
accuracy_lr = accuracy_score(y_test_encoded, y_pred_lr)
print("Logistic Regression Classifier")
print("Accuracy:", accuracy_lr)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_lr, target_names=label_encoder.classes_))


Logistic Regression Classifier
Accuracy: 0.5077519379844961
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.58      0.44      0.50        16
     Colin_Powell       0.47      0.53      0.49        38
  Donald_Rumsfeld       0.30      0.12      0.17        26
    George_W_Bush       0.59      0.81      0.68       109
Gerhard_Schroeder       0.22      0.22      0.22        18
      Hugo_Chavez       0.29      0.10      0.14        21
       Tony_Blair       0.39      0.23      0.29        30

         accuracy                           0.51       258
        macro avg       0.40      0.35      0.36       258
     weighted avg       0.47      0.51      0.47       258



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
lr_clf = LogisticRegression(max_iter=500)
lr_clf.fit(X_train_pca, y_train_encoded)
y_pred_lr = lr_clf.predict(X_test_pca)
accuracy_lr = accuracy_score(y_test_encoded, y_pred_lr)
print("Logistic Regression Classifier")
print("Accuracy:", accuracy_lr)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_lr, target_names=label_encoder.classes_))


Logistic Regression Classifier
Accuracy: 0.4263565891472868
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        16
     Colin_Powell       0.37      0.42      0.40        38
  Donald_Rumsfeld       0.38      0.12      0.18        26
    George_W_Bush       0.44      0.83      0.58       109
Gerhard_Schroeder       0.00      0.00      0.00        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.00      0.00      0.00        30

         accuracy                           0.43       258
        macro avg       0.17      0.20      0.16       258
     weighted avg       0.28      0.43      0.32       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
nb_clf = GaussianNB()
nb_clf.fit(X_train, y_train_encoded)
y_pred_nb = nb_clf.predict(X_test)
accuracy_nb = accuracy_score(y_test_encoded, y_pred_nb)
print("Naive Bayes Classifier")
print("Accuracy:", accuracy_nb)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_nb, target_names=label_encoder.classes_))


Naive Bayes Classifier
Accuracy: 0.49612403100775193
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.64      0.44      0.52        16
     Colin_Powell       0.54      0.71      0.61        38
  Donald_Rumsfeld       0.24      0.88      0.37        26
    George_W_Bush       0.97      0.28      0.43       109
Gerhard_Schroeder       0.67      0.33      0.44        18
      Hugo_Chavez       0.83      0.48      0.61        21
       Tony_Blair       0.52      0.83      0.64        30

         accuracy                           0.50       258
        macro avg       0.63      0.56      0.52       258
     weighted avg       0.73      0.50      0.50       258



In [None]:
nb_clf = GaussianNB()
nb_clf.fit(X_train_pca, y_train_encoded)
y_pred_nb = nb_clf.predict(X_test_pca)
accuracy_nb = accuracy_score(y_test_encoded, y_pred_nb)
print("Naive Bayes Classifier")
print("Accuracy:", accuracy_nb)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_nb, target_names=label_encoder.classes_))


Naive Bayes Classifier
Accuracy: 0.40310077519379844
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        16
     Colin_Powell       0.37      0.42      0.40        38
  Donald_Rumsfeld       0.21      0.23      0.22        26
    George_W_Bush       0.44      0.75      0.55       109
Gerhard_Schroeder       0.00      0.00      0.00        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.00      0.00      0.00        30

         accuracy                           0.40       258
        macro avg       0.15      0.20      0.17       258
     weighted avg       0.26      0.40      0.31       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
mlp_clf = MLPClassifier()
mlp_clf.fit(X_train, y_train_encoded)
y_pred_mlp = mlp_clf.predict(X_test)
accuracy_mlp = accuracy_score(y_test_encoded, y_pred_mlp)
print("MLP Classifier")
print("Accuracy:", accuracy_mlp)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_mlp, target_names=label_encoder.classes_))

MLP Classifier
Accuracy: 0.1511627906976744
Classification Report:
                   precision    recall  f1-score   support

     Ariel_Sharon       0.00      0.00      0.00        16
     Colin_Powell       0.00      0.00      0.00        38
  Donald_Rumsfeld       0.00      0.00      0.00        26
    George_W_Bush       0.00      0.00      0.00       109
Gerhard_Schroeder       0.16      0.50      0.24        18
      Hugo_Chavez       0.00      0.00      0.00        21
       Tony_Blair       0.15      1.00      0.26        30

         accuracy                           0.15       258
        macro avg       0.04      0.21      0.07       258
     weighted avg       0.03      0.15      0.05       258



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
mlp_clf = MLPClassifier()
mlp_clf.fit(X_train_pca, y_train_encoded)
y_pred_mlp = mlp_clf.predict(X_test_pca)
accuracy_mlp = accuracy_score(y_test_encoded, y_pred_mlp)
print("MLP Classifier")
print("Accuracy:", accuracy_mlp)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_mlp, target_names=label_encoder.classes_))

NameError: name 'MLPClassifier' is not defined

In [None]:
from xgboost import XGBClassifier

xgb_clf = XGBClassifier()
xgb_clf.fit(X_train, y_train_encoded)
y_pred_xgb = xgb_clf.predict(X_test)
accuracy_xgb = accuracy_score(y_test_encoded, y_pred_xgb)
print("XGBoost Classifier")
print("Accuracy:", accuracy_xgb)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_xgb, target_names=label_encoder.classes_))


In [None]:
from xgboost import XGBClassifier

xgb_clf = XGBClassifier()
xgb_clf.fit(X_train_pca, y_train_encoded)
y_pred_xgb = xgb_clf.predict(X_test_pca)
accuracy_xgb = accuracy_score(y_test_encoded, y_pred_xgb)
print("XGBoost Classifier")
print("Accuracy:", accuracy_xgb)
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_xgb, target_names=label_encoder.classes_))
