
1. Load and preprocess the tabular data, excluding the columns you mentioned as unnecessary.
2. Resize and preprocess the images to feed into a convolutional neural network (CNN) for feature extraction.
3. Concatenate the CNN-extracted image features with the tabular data.
4. Encode the `gaze` labels using `LabelEncoder`.
5. Train the LightGBM model on the combined dataset.
6. Validate the model on the validation set.


In [None]:
!pip install tensorflow


In [15]:

 
### Step 1: Preprocessing Tabular Data


import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import numpy as np
from keras.preprocessing import image
from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.models import Model
train_dir='C:\Datasets\labels_and_features_TRAIN.csv'
valid_dir='C:\Datasets\labels_and_features_VAL.csv'

# Load the dataset
train_df = pd.read_csv(train_dir)
validate_df = pd.read_csv(valid_dir)

# Drop unwanted columns
columns_to_drop = ['teacherID', 'gaze0', 'gaze1', 'gaze2']
train_df = train_df.drop(columns=columns_to_drop)
validate_df = validate_df.drop(columns=columns_to_drop)

# Encode the 'gaze' column
le = LabelEncoder()
train_df['gaze'] = le.fit_transform(train_df['gaze'])
validate_df['gaze'] = le.transform(validate_df['gaze'])

# Separate features and labels
X_train_tabular = train_df.drop('gaze', axis=1)
y_train = train_df['gaze']
X_validate_tabular = validate_df.drop('gaze', axis=1)
y_validate = validate_df['gaze']


### Step 2: Preprocessing Images


# Load and preprocess images
def load_and_preprocess_image(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array_expanded)

# Extract features using a pre-trained CNN
base_model = ResNet50(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

def extract_features(img_paths):
    all_features = []
    for img_path in img_paths:
        img_features = model.predict(load_and_preprocess_image(img_path))
        img_features_flatten = img_features.flatten()
        all_features.append(img_features_flatten)
    return np.array(all_features)

train_img_paths = [f'C:\\Datasets\\Talis_frames15_v2\\{img_id}' for img_id in train_df['imgID']]
validate_img_paths = [f'C:\\Datasets\\Talis_frames15_v2\\{img_id}' for img_id in validate_df['imgID']]


In [None]:

# Extract image features
X_train_images = extract_features(train_img_paths)
X_validate_images = extract_features(validate_img_paths)


### Step 3: Combine Image Features with Tabular Data


# Concatenate tabular features with image features
X_train_combined = np.concatenate((X_train_tabular, X_train_images), axis=1)[:100]
X_validate_combined = np.concatenate((X_validate_tabular, X_validate_images), axis=1)


### Step 4: Train LightGBM Model


# Create the LightGBM dataset
d_train = lgb.Dataset(X_train_combined, label=y_train)

# Define parameters
params = {
    'objective': 'multiclass',
    'num_class': len(np.unique(y_train)),
    'learning_rate': 0.1,
    'metric': 'multi_logloss'
}

# Train the model
lgb_model = lgb.train(params, d_train, num_boost_round=100)




In [18]:
# Predict on validation set
y_pred = lgb_model.predict(X_validate_combined)
y_pred_labels = np.argmax(y_pred, axis=1)


### Step 5: Validate the Model


# Calculate the accuracy
accuracy = accuracy_score(y_validate, y_pred_labels)
print('Accuracy:', accuracy)


NameError: name 'lgb_model' is not defined