This file is responsible to create and assign Label to the frame that we extracted from the videos.

**Create a JSON file**, each JSON object will contain
- Imagefile name
- Facial coordinates
- Eye Gazing coordinates
- emotion
- emotion_confidence
- is_drowsy
- gazing_at_screen
- attention_score
- head_direction
- eye_direction
- computer_label

**Create a CSV file**

In [None]:
# !pip install opencv-python

In [None]:
# !pip install tensorflow

In [None]:
# !pip install deepface

In [None]:
# pip install mediapipe

Need to upload zip file that contains all the frames, then we need to upzip that zip file.
The below code extracts the content of zip file and create a folder name dataset which contains all the extracted file which is extracted frame from videos.

In [None]:
from zipfile import ZipFile
import os

# Get the uploaded filename
zip_filename = './All_extracted_frames.zip' #next(iter(uploaded))

# Unzip to a specific directory (creates if doesn't exist)
extract_path = './dataset'
os.makedirs(extract_path, exist_ok=True)

with ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Unzipped {zip_filename} to {extract_path}")

In [None]:
import os

folder_path = "./dataset/All_extracted_frames/"  

# Count image files
image_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
image_count = len([f for f in os.listdir(folder_path) if f.lower().endswith(image_extensions)])

print(f"Total images in '{folder_path}': {image_count}")

In [None]:
import cv2
import os
import json
import numpy as np
import mediapipe as mp
from deepface import DeepFace

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True,
                                 max_num_faces=1,
                                 refine_landmarks=True)

# Reference 3D model for head pose estimation
FACE_3D = np.array([
    (0.0, 0.0, 0.0),
    (0.0, -330.0, -65.0),
    (-225.0, 170.0, -135.0),
    (225.0, 170.0, -135.0),
    (-150.0, -150.0, -125.0),
    (150.0, -150.0, -125.0)
], dtype=np.float64)
LANDMARK_IDS = [1, 152, 33, 263, 61, 291]

def convert_to_serializable(obj):
    """Convert numpy types to native Python types for JSON serialization"""
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, (np.int32, np.int64)):
        return int(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {k: convert_to_serializable(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(v) for v in obj]
    return obj

# Configuration
image_folder = './dataset/All_extracted_frames/'
output_json = 'all_attention_results.json'
max_images = 2400  # we can define number of frames to be extracted from all the videos, this will automatically calculated the frames to be extracted from all videos.

# Drowsiness detection parameters
EYE_AR_THRESH = 0.25  # Eye aspect ratio threshold

def detect_emotion(face_roi):
    """Detect emotion using DeepFace with proper error handling"""
    try:
        face_rgb = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)
        analysis = DeepFace.analyze(face_rgb, actions=['emotion'], enforce_detection=False, silent=True)
        if analysis and isinstance(analysis, list):
            emotion = analysis[0]['dominant_emotion']
            confidence = float(analysis[0]['emotion'][emotion])/100  # Convert to float
            return emotion, confidence
        return None, 0.0
    except Exception as e:
        print(f"Emotion detection error: {str(e)}")
        return None, 0.0

def is_drowsy(eye_aspect_ratio):
    """Determine if person is drowsy"""
    return eye_aspect_ratio < EYE_AR_THRESH if eye_aspect_ratio else False

def calculate_attention_score(features):
    """Calculate comprehensive attention score (0-1)"""
    if not features.get('face'):
        return 0.0

    score = 0.0

    # Face orientation (30% weight)
    if features['face'].get('is_frontal', False):
        score += 0.3

    # Eye gaze (25% weight)
    if features.get('gazing_at_screen', False):
        score += 0.25

    # Emotion (10% weight)
    emotion = features.get('emotion')
    if emotion in ['happy', 'neutral', 'surprise']:
        score += 0.1
    elif emotion in ['angry', 'disgust', 'fear', 'sad']:
        score -= 0.05

    # Drowsiness (25% weight)
    if not features.get('is_drowsy', True):
        score += 0.25

    return float(max(0.0, min(1.0, score)))

def detect_face_and_gaze(image):
    """Enhanced face and gaze detection using MediaPipe"""
    h, w = image.shape[:2]
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = face_mesh.process(rgb)

    face_data = {
        'box': None,
        'is_frontal': False,
        'gazing_at_screen': False,
        'eyes': [],
        'head_direction': None,
        'eye_direction': None,
        'drowsy': False
    }

    if result.multi_face_landmarks:
        mesh = result.multi_face_landmarks[0].landmark

        # Head pose estimation
        image_points = np.array([(int(mesh[i].x * w), int(mesh[i].y * h)) for i in LANDMARK_IDS], dtype="double")
        cam_matrix = np.array([[w, 0, w / 2],
                             [0, w, h / 2],
                             [0, 0, 1]])
        dist_coeffs = np.zeros((4, 1))
        success, rvec, tvec = cv2.solvePnP(FACE_3D, image_points, cam_matrix, dist_coeffs)
        rmat, _ = cv2.Rodrigues(rvec)
        angles, _, _, _, _, _ = cv2.RQDecomp3x3(rmat)
        yaw = angles[1]  # horizontal head rotation

        # Face direction classification
        if yaw < -15:
            face_data['head_direction'] = "left"
            face_data['is_frontal'] = False
        elif yaw > 15:
            face_data['head_direction'] = "right"
            face_data['is_frontal'] = False
        else:
            face_data['head_direction'] = "front"
            face_data['is_frontal'] = True

        # Get face bounding box
        x_coords = [int(lm.x * w) for lm in mesh]
        y_coords = [int(lm.y * h) for lm in mesh]
        face_data['box'] = (min(x_coords), min(y_coords), max(x_coords), max(y_coords))

        # Iris-based eye direction
        left_iris = mesh[468]
        right_iris = mesh[473]
        iris_x = (left_iris.x + right_iris.x) / 2 * w
        face_center_x = (mesh[33].x + mesh[263].x) / 2 * w
        gaze_offset = iris_x - face_center_x

        if gaze_offset < -30:
            face_data['eye_direction'] = "left"
            face_data['gazing_at_screen'] = False
        elif gaze_offset > 30:
            face_data['eye_direction'] = "right"
            face_data['gazing_at_screen'] = False
        else:
            face_data['eye_direction'] = "center"
            face_data['gazing_at_screen'] = True

        # Eye boxes and drowsiness detection
        for eye_id in [(33, 133), (362, 263)]:  # Left, Right eyes
            x1 = int(min(mesh[eye_id[0]].x, mesh[eye_id[1]].x) * w)
            y1 = int(min(mesh[eye_id[0]].y, mesh[eye_id[1]].y) * h)
            x2 = int(max(mesh[eye_id[0]].x, mesh[eye_id[1]].x) * w)
            y2 = int(max(mesh[eye_id[0]].y, mesh[eye_id[1]].y) * h)

            eye_data = {
                'x': x1,
                'y': y1,
                'width': x2 - x1,
                'height': y2 - y1,
                'center': ((x1 + x2) // 2, (y1 + y2) // 2)
            }
            face_data['eyes'].append(eye_data)

        # Drowsiness detection using eye aspect ratio
        if len(face_data['eyes']) >= 2:
            eye1 = face_data['eyes'][0]
            eye2 = face_data['eyes'][1]
            eye_dist = abs(eye1['center'][0] - eye2['center'][0])
            if eye_dist > 0:
                eye_ar = float((eye1['height'] + eye2['height']) / (2 * eye_dist))
                face_data['drowsy'] = is_drowsy(eye_ar)

    return face_data

results = []

for count, filename in enumerate(os.listdir(image_folder)):
    if count >= max_images:
        break

    if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue

    img_path = os.path.join(image_folder, filename)
    img = cv2.imread(img_path)
    if img is None:
        print(f"Could not read image: {filename}")
        continue

    # Initialize features with default values
    features = {
        'image_name': filename,
        'face': None,
        'eyes': [],
        'emotion': None,
        'emotion_confidence': 0.0,
        'is_drowsy': False,
        'gazing_at_screen': False,
        'attention_score': 0.0,
        'head_direction': None,
        'eye_direction': None
    }

    try:
        # Detect face and gaze using MediaPipe
        face_data = detect_face_and_gaze(img)

        if face_data['box']:
            x1, y1, x2, y2 = face_data['box']
            w = x2 - x1
            h = y2 - y1

            # Store face features
            features['face'] = {
                'x': x1,
                'y': y1,
                'width': w,
                'height': h,
                'is_frontal': face_data['is_frontal']
            }

            features['eyes'] = face_data['eyes']
            features['gazing_at_screen'] = face_data['gazing_at_screen']
            features['is_drowsy'] = face_data['drowsy']
            features['head_direction'] = face_data['head_direction']
            features['eye_direction'] = face_data['eye_direction']

            # Emotion detection
            face_roi = img[y1:y2, x1:x2]
            emotion, confidence = detect_emotion(face_roi)
            features['emotion'] = emotion
            features['emotion_confidence'] = confidence

            # Calculate attention score
            features['attention_score'] = calculate_attention_score(features)

            # # Label:
            if features['attention_score'] >= 0.75:
              features['computer_label'] = 'Attentive'
            else:
              features['computer_label'] = 'Not Attentive'

            # if features['attention_score'] >= 0.8:
            #   feature['computer_label'] = 'Highly Attentive'
            # elif features['attention_score'] >= 0.6:
            #   feature['computer_label'] = 'Moderately Attentive'
            # elif features['attention_score'] >= 0.4:
            #   feature['computer_label'] = 'Slightly Attentive'
            # else:
            #   feature['computer_label'] = 'Not Attentive'

        # Add to results (with type conversion)
        results.append(convert_to_serializable(features.copy()))

    except Exception as e:
        print(f"Error processing {filename}: {str(e)}")
        results.append(convert_to_serializable(features))

# Save results
with open(output_json, 'w') as f:
    json.dump([r for r in results if r is not None], f, indent=2, default=str)

print(f"Analysis complete! Processed {len([r for r in results if r.get('face')])} faces with {len(results)} total entries.")
print(f"Results saved to {output_json}")
if results:
    valid_results = [r for r in results if r.get('face')]
    sample = valid_results[0] if valid_results else results[0]
    print("\nSample output:")
    print(json.dumps(sample, indent=2))

In [None]:
This code stores extracted features from an image like Head position, Eye detection, classification in drowsy or not drowsy all those feature the defines whether the person is sleepy or not sleepy.
import json

def update_json_records(filepath):
    """
    Reads a JSON file, updates records with computer_label based on attention_score,
    and saves the updated data back to the same file.
    """
    try:
        # Open the JSON file in read mode
        with open(filepath, 'r') as file:
            data = json.load(file)

        # Check if the loaded data is a list (multiple records)
        if isinstance(data, list):
            print(f"Successfully loaded {len(data)} records from '{filepath}'")

            # Update each record
            updated_count = 0
            for record in data:
                if 'attention_score' in record:
                    # Add computer_label based on attention_score
                    if record['attention_score'] >= 0.50:
                        record['computer_label'] = 'Attentive'
                    else:
                        record['computer_label'] = 'Not Attentive'
                    updated_count += 1

            print(f"Updated {updated_count} records with computer_label")

            # Save the updated data back to the file
            with open(filepath, 'w') as file:
                json.dump(data, file, indent=2)
            print(f"Successfully saved updated records to '{filepath}'")

            # Print sample records (first 3)
            print("\nSample updated records:")
            for i, record in enumerate(data[:50]):
                print(f"\n--- Record {i+1} ---")
                print(record)

        elif isinstance(data, dict):
            print("The JSON file contains a single record (dictionary).")
            if 'attention_score' in data:
                if data['attention_score'] >= 0.50:
                    data['computer_label'] = 'Attentive'
                else:
                    data['computer_label'] = 'Not Attentive'

                # Save the updated data back to the file
                with open(filepath, 'w') as file:
                    json.dump(data, file, indent=2)
                print(f"Updated single record with computer_label and saved to '{filepath}'")
                print("\nUpdated record:")
                print(data)
            else:
                print("Record doesn't contain attention_score - no updates made")
        else:
            print(f"The JSON file '{filepath}' contains data of an unexpected type: {type(data)}")

    except FileNotFoundError:
        print(f"Error: The file '{filepath}' was not found.")
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from '{filepath}'. Check file format.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# Main execution
if __name__ == "__main__":
    json_file_name = "./all_attention_results.json"
    update_json_records(json_file_name)

In [None]:
This code converts the JSON file to CSV file.
import csv
import json
import os
import base64
from PIL import Image

# Configuration
json_file = "all_attention_results.json"
folder_path = "./dataset/All_extracted_frames/"
output_csv = "all_attention_results.csv"

# Load JSON data
with open(json_file) as f:
    data = json.load(f)

# Create CSV file
with open(output_csv, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write header
    writer.writerow([
        'Serial No',
        'Image Name',
        'Computer Label',
        'Image Path',
        # 'Image (Base64)'
    ])

    # Process each entry
    for idx, entry in enumerate(data, 1):
        image_name = entry['image_name']
        image_path = os.path.join(folder_path, image_name)

        # Get computer label (default to "Unknown" if not present)
        computer_label = entry.get('computer_label', 'Unknown')

        # # Read and encode image
        # try:
        #     with Image.open(image_path) as img:
        #         # Convert image to base64
        #         img_buffer = img.tobytes()
        #         img_base64 = base64.b64encode(img_buffer).decode('utf-8')
        # except Exception as e:
        #     print(f"Error processing image {image_name}: {str(e)}")
        #     img_base64 = ""

        # Write row to CSV
        writer.writerow([
            idx,                          # Serial No
            image_name,                   # Image Name
            computer_label,               # Computer Label
            image_path,                   # Image Path
            # img_base64                    # Base64 encoded image
        ])

print(f"CSV file created successfully: {output_csv}")

In [None]:
# This code help us to visualize the image based on the detected feature along with matrix that defines "Attentive or Not Attentive"
import cv2
import os
import json
import random
import numpy as np
from matplotlib import pyplot as plt

# Configuration
results_file = 'all_attention_results.json'
image_folder = './dataset/All_extracted_frames/'
output_folder = './visualized_results/'
num_images_to_visualize = 25  # Number of random images to visualize
display_first_n = 25  # Number of images to display in notebook

# Create output directory
os.makedirs(output_folder, exist_ok=True)

# Color definitions
COLORS = {
    'face': (0, 255, 255),        # Yellow
    'eyes': (0, 255, 0),          # Green
    'gaze_line': (255, 0, 255),   # Purple
    'text': (255, 255, 255),      # White
    'attention_high': (0, 255, 0), # Green
    'attention_med': (255, 255, 0),# Yellow
    'attention_low': (255, 0, 0),  # Red
    'metric_label': (200, 200, 0), # Light yellow
    'metric_value': (200, 255, 255) # Light cyan
}

# Load results
with open(results_file) as f:
    results = json.load(f)

# Create a dictionary mapping image names to their results
result_dict = {result['image_name']: result for result in results}

# Get list of all available images
all_images = [f for f in os.listdir(image_folder)
             if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

# Filter to only images that have analysis results
valid_images = [img for img in all_images if img in result_dict]

# Randomly select images
random.seed(42)  # For reproducibility
selected_images = random.sample(valid_images, min(num_images_to_visualize, len(valid_images)))

def visualize_result(result):
    """Draw visualization with all metrics on the image"""
    img_path = os.path.join(image_folder, result['image_name'])
    img = cv2.imread(img_path)
    if img is None:
        print(f"Could not read image: {result['image_name']}")
        return None

    # Convert to RGB for matplotlib
    img_viz = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_h, img_w = img_viz.shape[:2]

    # Draw face bounding box
    if result.get('face'):
        face = result['face']
        cv2.rectangle(img_viz,
                     (face['x'], face['y']),
                     (face['x'] + face['width'], face['y'] + face['height']),
                     COLORS['face'], 2)

    # Draw eyes and gaze line
    if result.get('eyes') and len(result['eyes']) >= 2:
        eye1 = result['eyes'][0]
        eye2 = result['eyes'][1]

        # Eye bounding boxes
        for eye in result['eyes']:
            cv2.rectangle(img_viz,
                         (eye['x'], eye['y']),
                         (eye['x'] + eye['width'], eye['y'] + eye['height']),
                         COLORS['eyes'], 1)

        # Gaze line
        cv2.line(img_viz,
                (eye1['center'][0], eye1['center'][1]),
                (eye2['center'][0], eye2['center'][1]),
                COLORS['gaze_line'], 1)

    # Create metrics panel
    y_start = 30
    line_height = 25
    panel_width = 300

    # Draw semi-transparent panel
    overlay = img_viz.copy()
    cv2.rectangle(overlay, (10, 10), (panel_width, 300), (50, 50, 50), -1)
    cv2.addWeighted(overlay, 0.7, img_viz, 0.3, 0, img_viz)

    # Display all metrics
    def put_metric(label, value, y_pos, value_color=COLORS['metric_value']):
        cv2.putText(img_viz, f"{label}:",
                   (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS['metric_label'], 1)
        cv2.putText(img_viz, f"{value}",
                   (150, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.5, value_color, 1)

    y_pos = y_start

    # Face metrics
    if result.get('face'):
        face = result['face']
        put_metric("Face Position", f"{face['x']}, {face['y']}", y_pos)
        y_pos += line_height
        put_metric("Face Size", f"{face['width']}x{face['height']}", y_pos)
        y_pos += line_height
        put_metric("Frontal Face", str(face['is_frontal']), y_pos)
        y_pos += line_height

    # Eye metrics
    if result.get('eyes'):
        put_metric("Eyes Detected", str(len(result['eyes'])), y_pos)
        y_pos += line_height
        if len(result['eyes']) >= 2:
            put_metric("Gazing at Screen", str(result.get('gazing_at_screen', False)), y_pos)
            y_pos += line_height

    # Emotion metrics
    if result.get('emotion'):
        put_metric("Emotion", result['emotion'], y_pos)
        y_pos += line_height
        put_metric("Confidence", f"{result['emotion_confidence']*100:.1f}%", y_pos)
        y_pos += line_height

    # Drowsiness
    if 'is_drowsy' in result:
        drowsy_color = COLORS['attention_low'] if result['is_drowsy'] else COLORS['attention_high']
        put_metric("Drowsy", str(result['is_drowsy']), y_pos, drowsy_color)
        y_pos += line_height

    # Attention score
    if 'attention_score' in result:
        attention_color = COLORS['attention_high'] if result['attention_score'] > 0.7 else \
                         COLORS['attention_med'] if result['attention_score'] > 0.4 else \
                         COLORS['attention_low']
        put_metric("Attention Score", f"{result['attention_score']:.2f}", y_pos, attention_color)
        y_pos += line_height

    # Computer label
    if 'computer_label' in result:
        label_color = COLORS['attention_high'] if result['computer_label'] == 'Attentive' else COLORS['attention_low']
        put_metric("Computer Label", result['computer_label'], y_pos, label_color)
        y_pos += line_height

    return img_viz

# Process selected images
print(f"Selected {len(selected_images)} random images for visualization")
for i, img_name in enumerate(selected_images, 1):
    result = result_dict[img_name]
    print(f"\nProcessing {i}/{len(selected_images)}: {img_name}")

    img_viz = visualize_result(result)
    if img_viz is None:
        continue

    # Save visualization
    output_path = os.path.join(output_folder, f"viz_{img_name}")
    cv2.imwrite(output_path, cv2.cvtColor(img_viz, cv2.COLOR_RGB2BGR))

    # Display first few results
    if i <= display_first_n:
        plt.figure(figsize=(15, 10))
        plt.imshow(img_viz)
        plt.axis('off')
        plt.title(f"Analysis Visualization: {img_name}")
        plt.show()

print("\n" + "="*80)
print(f"Visualization complete! Processed {len(selected_images)} random images")
print(f"Saved visualizations to {output_folder}")
print("="*80)