### **NEW**

In [None]:
pip install tensorflow pandas numpy scikit-learn matplotlib opencv-python


In [None]:
import os
import json
import numpy as np
import pandas as pd
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Adjust the path to point to your specific dataset location
LEFT_EYE_DIR = '/content/drive/My Drive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/left_eye/'
RIGHT_EYE_DIR = '/content/drive/My Drive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/right_eye/'


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


In [None]:
LEFT_EYE_DIR = '/content/drive/MyDrive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/UnityEyes_Windows/left_eye/'
RIGHT_EYE_DIR = '/content/drive/MyDrive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/UnityEyes_Windows/right_eye/'


In [None]:
import os
import random
import json
import cv2
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define Google Drive paths
LEFT_EYE_DIR = '/content/drive/MyDrive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/UnityEyes_Windows/left_eye'
RIGHT_EYE_DIR = '/content/drive/MyDrive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/UnityEyes_Windows/right_eye'

# Initialize lists to store data
images = []
json_features = []
labels = []

# Function to parse JSON features
def parse_json(json_path):
    try:
        with open(json_path, 'r') as f:
            data = json.load(f)
        features = []
        head_pose = data.get('head_pose', "(0,0,0)").strip("()").split(",")
        head_pose = [float(angle) for angle in head_pose]
        features.extend(head_pose)
        eye_details = data.get('eye_details', {})
        pupil_size = float(eye_details.get('pupil_size', 0.0))
        iris_size = float(eye_details.get('iris_size', 0.0))
        features.extend([pupil_size, iris_size])
        return features
    except (json.JSONDecodeError, FileNotFoundError) as e:
        print(f"Error loading JSON file {json_path}: {e}")
        return [0.0, 0.0, 0.0, 0.0, 0.0]

# Function to process a single file (image and corresponding JSON)
def process_file(directory, file):
    img_path = os.path.join(directory, file)
    json_path = os.path.join(directory, file.replace('.jpg', '.json'))

    # Load and preprocess image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error loading image {img_path}")
        return None, None, None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))  # Resize to match model input size
    img = img / 255.0  # Normalize to range [0, 1]

    # Load and parse JSON features
    features = parse_json(json_path)

    # Placeholder label (replace with actual labels if available)
    label = [0, 0]

    return img, features, label

# Function to randomly select and process 1000 images
def process_random_images(directory, num_images=500):
    try:
        files = [file for file in os.listdir(directory) if file.endswith('.jpg')]
        if len(files) > num_images:
            selected_files = random.sample(files, num_images)  # Randomly select 1000 images
        else:
            selected_files = files

        for file in selected_files:
            img, features, label = process_file(directory, file)
            if img is not None:
                images.append(img)
                json_features.append(features)
                labels.append(label)
    except OSError as e:
        print(f"Error processing files in directory {directory}: {e}")

# Process 1000 random images from each directory
print("Processing 500 random images from left_eye directory")
process_random_images(LEFT_EYE_DIR, num_images=500)

print("Processing 500 random images from right_eye directory")
process_random_images(RIGHT_EYE_DIR, num_images=500)

# Convert lists to numpy arrays
images = np.array(images)
json_features = np.array(json_features)
labels = np.array(labels)

# Print shapes for confirmation
print(f'Images shape: {images.shape}')
print(f'JSON features shape: {json_features.shape}')
print(f'Labels shape: {labels.shape}')


In [None]:
# Standardize JSON features
scaler = StandardScaler()
json_features = scaler.fit_transform(json_features)


In [None]:
# Split into training and testing sets
X_img_train, X_img_temp, X_json_train, X_json_temp, y_train, y_temp = train_test_split(
    images, json_features, labels, test_size=0.3, random_state=42
)

# Further split temp into validation and testing
X_img_val, X_img_test, X_json_val, X_json_test, y_val, y_test = train_test_split(
    X_img_temp, X_json_temp, y_temp, test_size=0.5, random_state=42
)

print(f'Training images: {X_img_train.shape}')
print(f'Validation images: {X_img_val.shape}')
print(f'Testing images: {X_img_test.shape}')


In [None]:
# Image input
image_input = layers.Input(shape=(224, 224, 3), name='image_input')

# Pre-trained CNN for image feature extraction
base_model = tf.keras.applications.ResNet50(
    include_top=False, weights='imagenet', input_tensor=image_input
)
base_model.trainable = False  # Freeze the base model

# Add global pooling
x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
image_features = layers.Dense(256, activation='relu')(x)

# JSON features input
json_input = layers.Input(shape=(X_json_train.shape[1],), name='json_input')
y = layers.Dense(128, activation='relu')(json_input)
y = layers.Dropout(0.3)(y)
json_features_dense = layers.Dense(64, activation='relu')(y)

# Combine image and JSON features
combined = layers.concatenate([image_features, json_features_dense])

# Add fully connected layers
z = layers.Dense(256, activation='relu')(combined)
z = layers.Dropout(0.5)(z)
z = layers.Dense(128, activation='relu')(z)

# Output layer
# For regression (e.g., gaze x and y coordinates)
output = layers.Dense(2, activation='linear', name='output')(z)

# Define the model
model = models.Model(inputs=[image_input, json_input], outputs=output)

# Compile the model
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='mean_squared_error',
    metrics=['mae']
)

model.summary()


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define ImageDataGenerator for augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    horizontal_flip=True,
    brightness_range=[0.8,1.2]
)

# Example of applying augmentation
# Note: When using multiple inputs, custom generators might be needed


In [None]:
# Define callbacks
checkpoint = callbacks.ModelCheckpoint(
    'best_model.h5', monitor='val_loss', save_best_only=True, mode='min'
)
early_stop = callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True
)

# Train the model
history = model.fit(
    {'image_input': X_img_train, 'json_input': X_json_train},
    y_train,
    epochs=100,
    batch_size=32,
    validation_data=(
        {'image_input': X_img_val, 'json_input': X_json_val},
        y_val
    ),
    callbacks=[checkpoint, early_stop]
)


In [None]:
# Load the best model
model.load_weights('best_model.h5')

# Evaluate on test data
test_loss, test_mae = model.evaluate(
    {'image_input': X_img_test, 'json_input': X_json_test},
    y_test
)

print(f'Test Loss: {test_loss}')
print(f'Test MAE: {test_mae}')


In [None]:
# Plot training & validation loss values
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('MSE Loss')
plt.xlabel('Epoch')
plt.legend(loc='upper right')

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Model MAE')
plt.ylabel('Mean Absolute Error')
plt.xlabel('Epoch')
plt.legend(loc='upper right')

plt.tight_layout()
plt.show()


In [None]:
import os
import random
import cv2
import numpy as np
import json

# Paths to your left and right eye data in Google Drive
LEFT_EYE_DIR = '/content/drive/MyDrive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/UnityEyes_Windows/left_eye'
RIGHT_EYE_DIR = '/content/drive/MyDrive/AllGalaxy/allgalaxy-webgazer/Data collection/Data/UnityEyes_Windows/UnityEyes_Windows/right_eye'

# Function to parse JSON features
def parse_json(json_path):
    try:
        with open(json_path, 'r') as f:
            data = json.load(f)
        features = []
        head_pose = data.get('head_pose', "(0,0,0)").strip("()").split(",")
        head_pose = [float(angle) for angle in head_pose]
        features.extend(head_pose)
        eye_details = data.get('eye_details', {})
        pupil_size = float(eye_details.get('pupil_size', 0.0))
        iris_size = float(eye_details.get('iris_size', 0.0))
        features.extend([pupil_size, iris_size])
        return features
    except (json.JSONDecodeError, FileNotFoundError) as e:
        print(f"Error loading JSON file {json_path}: {e}")
        return None

# Function to load and preprocess an image
def load_and_preprocess_image(image_path):
    image = cv2.imread(image_path)

    if image is None:
        print(f"Error loading image at {image_path}")
        return None

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (224, 224))  # Resize to match model input size
    image = image / 255.0  # Normalize to range [0, 1]
    image = np.expand_dims(image, axis=0)  # Expand dims to match input shape

    return image

# Select a random file from the left_eye directory
def get_random_image_and_json(directory):
    try:
        # Get all .jpg files in the directory
        files = [f for f in os.listdir(directory) if f.endswith('.jpg')]
        if not files:
            print(f"No images found in directory: {directory}")
            return None, None

        # Select a random file
        random_file = random.choice(files)

        # Get the corresponding JSON path
        image_path = os.path.join(directory, random_file)
        json_path = os.path.join(directory, random_file.replace('.jpg', '.json'))

        # Check if the corresponding JSON file exists
        if not os.path.exists(json_path):
            print(f"Error: Corresponding JSON file {json_path} not found.")
            return None, None

        return image_path, json_path
    except Exception as e:
        print(f"Error in get_random_image_and_json: {e}")
        return None, None

# Load a random image and JSON from the left_eye directory for prediction
image_path, json_path = get_random_image_and_json(LEFT_EYE_DIR)

if image_path and json_path:
    # Load and preprocess the image
    new_image = load_and_preprocess_image(image_path)

    # Load and preprocess the JSON data
    new_json = parse_json(json_path)

    if new_image is not None and new_json is not None:
        # Assuming you have a trained scaler (replace 'scaler' with your actual scaler)
        new_json = scaler.transform([new_json])

        # Predict gaze coordinates (assuming your model expects 'image_input' and 'json_input' as inputs)
        prediction = model.predict({'image_input': new_image, 'json_input': new_json})
        print(f'Predicted Gaze Coordinates: {prediction[0]}')
    else:
        print("Error: Could not load image or JSON data.")
else:
    print("Error: Could not retrieve a random image and its corresponding JSON.")


In [None]:
# Install necessary libraries (if not pre-installed)
!pip install opencv-python-headless
import cv2
import numpy as np
import tensorflow as tf
from google.colab.patches import cv2_imshow  # Import cv2_imshow for displaying in Colab

# Load your pre-trained model (upload the model file first to Colab)
model = tf.keras.models.load_model('/content/best_model.h5')

# Function to preprocess video frame before feeding to the model
def preprocess_frame(frame):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    resized_frame = cv2.resize(gray_frame, (64, 64))  # Resize to the model's input size
    normalized_frame = resized_frame / 255.0  # Normalize the pixel values
    return np.expand_dims(np.expand_dims(normalized_frame, axis=-1), axis=0)

# Function to process video stream and track eye movements
def process_and_track_eye(video_source=0, save_to_file='eye_tracking_data.csv'):
    # Initialize video capture (video_source=0 for webcam)
    cap = cv2.VideoCapture(video_source)

    # Open a CSV file to store the tracking data
    with open(save_to_file, 'w') as f:
        f.write('Frame, Eye_X, Eye_Y\n')  # CSV header

        while True:
            ret, frame = cap.read()  # Read each frame
            if not ret:
                break

            # Preprocess the frame for the model
            processed_frame = preprocess_frame(frame)

            # Predict eye position using the model (assumed output is (x, y) coordinates)
            eye_position = model.predict(processed_frame)  # Model predicts the eye's x, y position

            # Convert normalized eye coordinates to actual frame coordinates
            eye_x = int(eye_position[0][0] * frame.shape[1])
            eye_y = int(eye_position[0][1] * frame.shape[0])

            # Draw a green dot at the predicted eye position
            cv2.circle(frame, (eye_x, eye_y), 5, (0, 255, 0), -1)

            # Display the frame with the green dot
            cv2_imshow(frame)  # Use cv2_imshow in Colab

            # Save the eye-tracking data (frame number, eye_x, eye_y) to the CSV file
            f.write(f'{int(cap.get(cv2.CAP_PROP_POS_FRAMES))}, {eye_x}, {eye_y}\n')

            # Stop the loop manually by breaking out (optional)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    # Release the video capture
    cap.release()

# Run the eye tracking on webcam stream
process_and_track_eye()


In [None]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
import base64
import cv2
import numpy as np
import tensorflow as tf
import time

# JavaScript to handle the video stream and snapshot
def video_stream():
    js = Javascript('''
    var video;
    var div = null;
    var stream;

    async function streamVideo() {
        div = document.createElement('div');
        document.body.appendChild(div);
        div.style.textAlign = 'center';

        video = document.createElement('video');
        video.style.display = 'block';
        div.appendChild(video);

        stream = await navigator.mediaDevices.getUserMedia({video: true});
        video.srcObject = stream;
        await video.play();

        window.imgElement = document.createElement('img');
        div.appendChild(window.imgElement);

        window.captureCanvas = document.createElement('canvas');
        captureCanvas.width = 224;
        captureCanvas.height = 224;
        captureCanvas.style.display = 'block';
        div.appendChild(captureCanvas);

        window.labelElement = document.createElement('div');
        labelElement.innerText = 'Model output will appear here';
        div.appendChild(labelElement);

        var shutdown = false;
        var pendingResolve = null;

        function removeDom() {
            stream.getVideoTracks()[0].stop();
            if (div !== null) {
                div.remove();
                div = null;
                video = null;
                captureCanvas = null;
                imgElement = null;
                labelElement = null;
            }
        }

        function onAnimationFrame() {
            if (!shutdown) {
                captureCanvas.getContext('2d').drawImage(video, 0, 0, 224, 224);
                requestAnimationFrame(onAnimationFrame);
            }
        }

        requestAnimationFrame(onAnimationFrame);

        // Define takeSnapshot as a method of window
        window.takeSnapshot = async function() {
            return captureCanvas.toDataURL('image/jpeg', 0.8);
        };

        await new Promise((resolve) => {
            pendingResolve = resolve;
        });
        shutdown = true;
        removeDom();
    }

    streamVideo();
    ''')
    display(js)

def get_frame():
    data = eval_js('takeSnapshot()')
    binary = base64.b64decode(data.split(',')[1])
    image = np.frombuffer(binary, dtype=np.uint8)
    image = cv2.imdecode(image, flags=cv2.IMREAD_COLOR)
    return image

# Load your pre-trained model
model = tf.keras.models.load_model('/content/best_model.h5')

# Start streaming video from webcam
video_stream()

# Wait a bit to let JavaScript code initialize
time.sleep(2)

# Perform a continuous loop to process the camera frames
try:
    while True:
        frame = get_frame()
        # Assuming model takes 224x224 RGB images
        input_frame = cv2.resize(frame, (224, 224))
        input_frame = np.expand_dims(input_frame, axis=0)

        # Generate JSON input (dummy for example purposes)
        json_input = np.zeros((1, 5))

        # Combine inputs and predict
        prediction = model.predict([input_frame, json_input])

        # Assume model predicts x, y coordinates scaled [0, 1]
        pred_x = int(prediction[0][0] * frame.shape[1])
        pred_y = int(prediction[0][1] * frame.shape[0])

        # Draw prediction on frame
        cv2.circle(frame, (pred_x, pred_y), 10, (0, 255, 0), -1)

        # Convert frame to JPEG format
        _, jpeg_image = cv2.imencode('.jpg', frame)
        data_url_image = 'data:image/jpeg;base64,' + base64.b64encode(jpeg_image).decode('utf-8')
        eval_js(f'imgElement.src="{data_url_image}"; labelElement.innerText="Model predicted coordinates: ({pred_x}, {pred_y})";')

        # Add a short delay to control frame rate
        time.sleep(0.1)

except KeyboardInterrupt:
    pass
