In [61]:
import pandas as pd
import numpy as np
import os
from PIL import Image
from tqdm import tqdm
def get_maxima_coordinates(image):
    """
    Get the coordinates of the maximum value in the image.
    Assumes the image is a 2D numpy array.
    """
    return list(np.unravel_index(np.argmax(image), image.shape))

def is_point_in_region(binary_image, point):
    """
    Check if the given point is in a region marked as '1' in the binary image.
    Assumes binary_image is a 2D numpy array and point is a tuple (x, y).
    """
    return binary_image[point] == 1

def process_dataframe(df, heat_dir, img_dir):
    """
    Process the dataframe as described.
    """
    gazein_values = []
    
    for imgID in df['imgID']:
        imgID=imgID[:-4]
        # Construct file paths
        heat_image_path = os.path.join(heat_dir, imgID + '.png')
        binary_image_path = os.path.join(img_dir, imgID + '.npy')

        # Load and process the heat image
        heat_image = np.array(Image.open(heat_image_path))
        maxima_coordinates = get_maxima_coordinates(heat_image)
        #print(maxima_coordinates)
        maxima_coordinates[0]=int(maxima_coordinates[0]*720/64)
        maxima_coordinates[1]=int(maxima_coordinates[1]*1280/64)
        # Load the binary image
        binary_image = np.load(binary_image_path)
        #print(maxima_coordinates)

        # Determine if the maxima point is in the region marked '1' in the binary image
        gazein = 1 if is_point_in_region(binary_image, tuple(maxima_coordinates)) else 0
        gazein_values.append(gazein)

    # Add the new column to the dataframe
    df['gazein'] = gazein_values

    return df

# Example usage



In [62]:
import pandas as pd

train_dir='C:\Datasets\labels_and_features_TRAIN.csv'
valid_dir='C:\Datasets\labels_and_features_VAL.csv'
heat_dir='C:\Datasets\Engagement\predict_heatmap'
img_dir='C:\Datasets\Engagement\studentnpy'
train_data = pd.read_csv(train_dir)
validate_data = pd.read_csv(valid_dir)

train_data = process_dataframe(train_data, heat_dir, img_dir)
validate_data = process_dataframe(validate_data, heat_dir, img_dir)

In [63]:


X_train = train_data.drop(columns=['teacherID', 'gaze0', 'gaze1', 'gaze2', 'gaze','imgID',
                                   "distance0","distance1","distance2","relaxation0","relaxation1","relaxation2"
                                  ,"gesture0","gesture1","gesture2"])
y_train = train_data['gaze']
X_validate = validate_data.drop(columns=['teacherID', 'gaze0', 'gaze1', 'gaze2', 'gaze','imgID',
                                   "distance0","distance1","distance2","relaxation0","relaxation1","relaxation2"
                                  ,"gesture0","gesture1","gesture2"])
y_validate = validate_data['gaze']


y_train.replace(1, 3, inplace=True)
y_train.replace(4, 2, inplace=True)
y_validate.replace(1, 3, inplace=True)
y_validate.replace(4, 2, inplace=True)

In [64]:
num_classes = train_data['gaze'].nunique()
print("Number of classes:", num_classes)
y_train

Number of classes: 2


0       3
1       2
2       3
3       3
4       3
       ..
2446    3
2447    3
2448    3
2449    2
2450    2
Name: gaze, Length: 2451, dtype: int64

In [65]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_validate = label_encoder.transform(y_validate)

import lightgbm as lgb

d_train = lgb.Dataset(X_train, label=y_train)
params = {
    'objective': 'multiclass',
    'num_class': 2, 
    # Other parameters
}
model = lgb.train(params, d_train)

y_pred = model.predict(X_validate)
y_pred_labels = y_pred.argmax(axis=1)  # Get the predicted labels
original_labels = label_encoder.inverse_transform(y_pred_labels)


from sklearn.metrics import classification_report

report = classification_report(y_validate, y_pred_labels)
print('Accuracy Report:\n', report)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 15553
[LightGBM] [Info] Number of data points in the train set: 2451, number of used features: 61
[LightGBM] [Info] Start training from score -1.078018
[LightGBM] [Info] Start training from score -0.415924
Accuracy Report:
               precision    recall  f1-score   support

           0       0.91      0.64      0.75       223
           1       0.82      0.96      0.89       382

    accuracy                           0.84       605
   macro avg       0.86      0.80      0.82       605
weighted avg       0.85      0.84      0.84       605



In [66]:
from sklearn.base import BaseEstimator, ClassifierMixin

class LGBMClassifierWrapper(BaseEstimator, ClassifierMixin):  
    """A scikit-learn wrapper for LightGBM classifier."""

    def __init__(self, lgb_model):
        self.lgb_model = lgb_model

    def fit(self, X, y):
        # LightGBM model is already trained.
        pass

    def predict(self, X):
        y_pred = self.lgb_model.predict(X)
        return y_pred.argmax(axis=1)

    def predict_proba(self, X):
        return self.lgb_model.predict(X)

# Wrap the LightGBM model
lgb_wrapper = LGBMClassifierWrapper(model)


from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Initialize the models
xgb_model = XGBClassifier()
rf_model = RandomForestClassifier()
svm_model = SVC(probability=True)

# Train the models
xgb_model.fit(X_train, y_train)
print("xgb_model")
rf_model.fit(X_train, y_train)
print("rf_model")
svm_model.fit(X_train, y_train)
print("svm_model")

# Predictions and accuracies
y_pred_xgb = xgb_model.predict(X_validate)
y_pred_rf = rf_model.predict(X_validate)
y_pred_svm = svm_model.predict(X_validate)

acc_xgb = accuracy_score(y_validate, y_pred_xgb)
acc_rf = accuracy_score(y_validate, y_pred_rf)
acc_svm = accuracy_score(y_validate, y_pred_svm)
acc_lgb= accuracy_score(y_validate, y_pred_labels)
print("XGBoost Accuracy:", acc_xgb)
print("Random Forest Accuracy:", acc_rf)
print("SVM Accuracy:", acc_svm)
print("LGB Accuracy:", acc_lgb)
# Ensemble
ensemble = VotingClassifier(estimators=[
    ('xgb', xgb_model), 
    ('rf', rf_model), 
    ('svm', svm_model),
    ('lgb', lgb_wrapper)
], voting='soft')

ensemble.fit(X_train, y_train)
y_pred_ensemble = ensemble.predict(X_validate)
acc_ensemble = accuracy_score(y_validate, y_pred_ensemble)

print("Ensemble Accuracy:", acc_ensemble)


xgb_model
rf_model
svm_model
XGBoost Accuracy: 0.8429752066115702
Random Forest Accuracy: 0.824793388429752
SVM Accuracy: 0.631404958677686
LGB Accuracy: 0.8429752066115702
Ensemble Accuracy: 0.8347107438016529
