In [65]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [66]:
df = pd.read_csv("C:/Users/91727/OneDrive/Desktop/filtered_landmarks2.csv")

In [67]:
df['landmarks'] = df['landmarks'].apply(lambda x: np.array(x.strip('[]').split(',')).astype(float))

In [68]:
X = np.stack(df['landmarks'].values)
y = df['category'].values

In [69]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [71]:
model = XGBClassifier(use_label_encoder=True, eval_metric='mlogloss')

In [84]:
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.



In [85]:
y_pred = model.predict(X_test)

In [86]:
accuracy = accuracy_score(y_test, y_pred)
print(f'XGBoost Accuracy: {accuracy * 100:.2f}%')
print(classification_report(y_test, y_pred,target_names=label_encoder.classes_))

XGBoost Accuracy: 54.30%
              precision    recall  f1-score   support

       angry       0.44      0.35      0.39       347
   disgusted       0.81      0.33      0.47        39
     fearful       0.37      0.28      0.32       366
       happy       0.72      0.81      0.77       719
     neutral       0.45      0.57      0.50       459
         sad       0.41      0.39      0.40       454
   surprised       0.67      0.67      0.67       290

    accuracy                           0.54      2674
   macro avg       0.55      0.49      0.50      2674
weighted avg       0.53      0.54      0.53      2674



In [80]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [95]:
df = pd.read_csv("C:/Users/91727/OneDrive/Desktop/filtered_landmarks.csv")

In [96]:
df['landmarks'] = df['landmarks'].apply(lambda x: np.array(x.strip('[]').split(',')).astype(float))

In [97]:
X = np.stack(df['landmarks'].values)
y = df['category'].values

In [98]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [99]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [100]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [101]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(
    #C=1.0, 
    #penalty='l2', 
    #solver='lbfgs', 
    max_iter=1000, 
    #class_weight='balanced'
)

In [102]:
model.fit(X_train_scaled, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [103]:
y_pred = model.predict(X_test_scaled)

In [104]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Logistic Regression Accuracy: {accuracy * 100:.2f}%')
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Logistic Regression Accuracy: 65.57%
              precision    recall  f1-score   support

       angry       0.55      0.49      0.51       708
   disgusted       0.56      0.12      0.20        81
       happy       0.79      0.85      0.82      1363
     neutral       0.56      0.63      0.60       992
         sad       0.54      0.48      0.51       889
   surprised       0.78      0.77      0.78       576

    accuracy                           0.66      4609
   macro avg       0.63      0.56      0.57      4609
weighted avg       0.65      0.66      0.65      4609



In [79]:
import pandas as pd

# Load the dataset
file_path = "C:/Users/91727/OneDrive/Desktop/filtered_landmarks.csv"  # Adjust this path if necessary
data = pd.read_csv(file_path)


# Remove the 'fearful' category
data_filtered = data[data['category'] != 'disgusted']



# Save the updated dataset to a new CSV file
output_file_path = "C:/Users/91727/OneDrive/Desktop/filtered_landmarks2.csv"
data_filtered.to_csv(output_file_path, index=False)

print(f"\nFiltered data saved to {output_file_path}.")



Filtered data saved to C:/Users/91727/OneDrive/Desktop/filtered_landmarks2.csv.


In [12]:
models = {
    'SVM': SVC(),
    'Multinomial Naive Bayes': MultinomialNB(),
    'K Nearest Neighbors': KNeighborsClassifier()
}

In [13]:
for model_name, model in models.items():
    # Use scaled data for SVM and KNN
    if model_name in ['SVM', 'K Nearest Neighbors']:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f'{model_name} Accuracy: {accuracy * 100:.2f}%')
    
    # Print classification report with zero_division parameter
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_, zero_division=0))

SVM Accuracy: 59.32%
              precision    recall  f1-score   support

       angry       0.56      0.31      0.40       708
   disgusted       0.00      0.00      0.00        81
       happy       0.74      0.82      0.78      1363
     neutral       0.47      0.64      0.55       992
         sad       0.44      0.42      0.43       889
   surprised       0.76      0.66      0.71       576

    accuracy                           0.59      4609
   macro avg       0.50      0.48      0.48      4609
weighted avg       0.59      0.59      0.58      4609

Multinomial Naive Bayes Accuracy: 38.73%
              precision    recall  f1-score   support

       angry       0.19      0.01      0.02       708
   disgusted       0.00      0.00      0.00        81
       happy       0.50      0.71      0.59      1363
     neutral       0.31      0.22      0.26       992
         sad       0.35      0.34      0.34       889
   surprised       0.27      0.51      0.36       576

    accuracy   

In [14]:
from sklearn.model_selection import train_test_split, GridSearchCV

In [15]:
log_reg = LogisticRegression(max_iter=1000)

In [16]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],  # Regularization strength
    'penalty': ['l1', 'l2'],        # Regularization type
    'solver': ['liblinear', 'saga'] # Solvers to use
}

In [None]:
grid_search = GridSearchCV(estimator=log_reg, param_grid=param_grid, scoring='accuracy', cv=5)
grid_search.fit(X_train_scaled, y_train)



In [None]:
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print(f'Best Parameters: {best_params}')
print(f'Best Cross-validation Accuracy: {best_score:.2f}')

In [None]:
best_log_reg = grid_search.best_estimator_
best_log_reg.fit(X_train_scaled, y_train)

In [None]:
y_pred = best_log_reg.predict(X_test_scaled)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Logistic Regression Accuracy: {accuracy * 100:.2f}%')
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))