# 2. Feature Engineering

## Objectives
- Extract facial features using OpenCV and dlib
- Process text data (tokenization, stemming, etc.)
- Extract text features (TF-IDF, word embeddings)
- Combine features from both modalities

In [None]:
# Import libraries
import cv2
import dlib
from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import AutoTokenizer
import numpy as np

In [None]:
# Example: Extract facial landmarks
def extract_landmarks(image):
    # Initialize dlib's face detector and landmark predictor
    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Detect faces
    faces = detector(gray)
    if len(faces) > 0:
        landmarks = predictor(gray, faces[0])
        return np.array([(p.x, p.y) for p in landmarks.parts()])
    return None

## Feature Extraction Pipeline
1. Image features:
   - Facial landmarks
   - HOG features
   - Deep features from pre-trained models

2. Text features:
   - TF-IDF vectors
   - BERT embeddings
   - Sentiment scores