# "FaceDrive: Hands-Free Vehicle Control Using Facial Gestures in CARLA"
This project implements a real-time facial gesture control system for driving in the CARLA simulator. Using MediaPipe, it detects head tilts, mouth movements, and nods to control steering, acceleration, braking, and gear shifting—enabling intuitive, hands-free vehicle interaction with potential use in assistive driving.

## Imports

In [2]:
import mediapipe as mp
import cv2
import numpy as np

import carla
from carla import ColorConverter as cc

import datetime
import logging
import math
import random
import re
import weakref
import os
import sys
import pygame

try:
    import pygame
    from pygame.locals import KMOD_CTRL
    from pygame.locals import K_BACKSPACE
    from pygame.locals import K_COMMA
    from pygame.locals import K_DOWN
    from pygame.locals import K_ESCAPE
    from pygame.locals import K_LEFT
    from pygame.locals import K_PERIOD
    from pygame.locals import K_RIGHT
    from pygame.locals import K_SPACE
    from pygame.locals import K_UP
    from pygame.locals import K_a
    from pygame.locals import K_d
    from pygame.locals import K_m
    from pygame.locals import K_q
    from pygame.locals import K_s
    from pygame.locals import K_w
    from pygame.locals import K_f
except ImportError:
    raise RuntimeError('cannot import pygame, make sure pygame package is installed')


pygame 2.6.1 (SDL 2.28.4, Python 3.9.21)
Hello from the pygame community. https://www.pygame.org/contribute.html


## Gesture detection 

The `FaseGestureDetector` class analyzes facial gesture and features using Mediapipe landmarks. 
It provides methods to extract 2D coordinates of ears, nose, and lips, and determines head direction 
(`left`, `right`, `center`, or `undifined`) and lips position ( either mouth is open or close, or `undifined`).

In [29]:
class FaseGestureDetector():
    def __init__(self, head_dir_fact=0.5, lips_pos_fact = 1, frown_fact=1):
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_holistic = mp.solutions.holistic
        self.holistic = self.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

        self.draw_fasemesh_landmarks = False
        self.draw_pose_landmarks = True
        self.head_dir_fact = head_dir_fact  # determines the head direction detection sensitivity. (0< and >1) 
        self.lips_pos_fact = lips_pos_fact  # determins the lips position detection sensitivity. (<0 )
        self.frown_fact = frown_fact  # determines the frowning detection sensitivity. (0<)
    
    def eye(self, facemesh_landmarks, image_shape):
        """ Returns 2D coordinates of left and right eyes based on the facemesh landmarks estimated by mediapipe"""
        if facemesh_landmarks:
            landmarks = facemesh_landmarks.landmark
            left_eye_2d = np.array([landmarks[159].x * image_shape[1],
                                landmarks[159].y * image_shape[0]], dtype=int)      # Index 33 in the facemesh landmarks belongs to the left eye
            right_eye_2d = np.array([landmarks[386].x * image_shape[1],
                                landmarks[386].y * image_shape[0]], dtype=int)     # Index 263 in the facemesh landmarks belongs to the right eye
            return left_eye_2d, right_eye_2d
        return None, None
    
    def eye_brows(self, facemesh_landmarks, image_shape):
        """ Returns 2D coordinates of left and right eyebrows based on the facemesh landmarks estimated by mediapipe"""
        if facemesh_landmarks:
            landmarks = facemesh_landmarks.landmark
            left_eyebrow_2d = np.array([landmarks[107].x * image_shape[1],
                                landmarks[107].y * image_shape[0]], dtype=int)      # Index 107 in the facemesh landmarks belongs to the left eyebrow
            right_eyebrow_2d = np.array([landmarks[336].x * image_shape[1],
                                landmarks[336].y * image_shape[0]], dtype=int)     # Index 336 in the facemesh landmarks belongs to the right eyebrow
            return left_eyebrow_2d, right_eyebrow_2d
        return None, None
    
    def ears(self, pose_landmarks, image_shape):
        """ Returns 2D coordinates of left and right ears based on the pose landmarks estimated by mediapipe"""
        if pose_landmarks:
            landmarks = pose_landmarks.landmark
            left_ear_2d = np.array([landmarks[mp.solutions.pose.PoseLandmark.LEFT_EAR].x * image_shape[1],
                                landmarks[mp.solutions.pose.PoseLandmark.LEFT_EAR].y * image_shape[0]], dtype=int)
            right_ear_2d = np.array([landmarks[mp.solutions.pose.PoseLandmark.RIGHT_EAR].x * image_shape[1],
                                landmarks[mp.solutions.pose.PoseLandmark.RIGHT_EAR].y * image_shape[0]], dtype=int)
            return left_ear_2d, right_ear_2d
        return None, None
    
    def nose(self, pose_landmarks, image_shape):
        """ Returns 2D coordinates of the nose based on the pose landmarks estimated by mediapipe"""
        if pose_landmarks:
            landmarks = pose_landmarks.landmark
            nose_2d = np.array([landmarks[mp.solutions.pose.PoseLandmark.NOSE].x * image_shape[1],
                                landmarks[mp.solutions.pose.PoseLandmark.NOSE].y * image_shape[0]], dtype=int)
            return nose_2d
        return None
    
    def lips(self, facemesh_landmarks, image_shape):
        """ Returns 2D coordinates of upper and lower lips base of the facemesh landmarks estimated by mediapipe"""
        if facemesh_landmarks:
            landmarks = facemesh_landmarks.landmark
            upper_lip_2d = np.array([landmarks[0].x * image_shape[1],
                                landmarks[0].y * image_shape[0]], dtype=int)      # Index 0 in the facemesh landmarks belongs to the upper lip
            lower_lip_2d = np.array([landmarks[17].x * image_shape[1],
                                landmarks[17].y * image_shape[0]], dtype=int)     # Index 17 in the facemesh landmarks belongs to the lower lip
            return upper_lip_2d, lower_lip_2d
        return None, None
    
    def detect_face_gesture(self, frame):
        """
        Detects the face gesture based on the pose and facemesh landmarks provided by Mediapipe.

        Returns:
            A dictionary containing:
            - 'head_detected' : boolean
            - 'face_direction': The direction of the head ('left', 'right', 'center', or None).
            - 'mouth_open': if True, mouth is open.
            An Image with the drawings of the landmarks.
        """
        face_gesture = {'face_detected': False,
                        'face_direction': None, 
                        'mouth_open': None,
                        'eye_brows_up': None,}
        
        # Recolor the frame from GBR to RGB
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # Make Detections
        results = self.holistic.process(frame)
        # Recolor image back to BGR for rendering
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        # (image height, image width) of the image in pixels
        image_shape = (frame.shape[0], frame.shape[1])
        
        if not results.pose_landmarks:
            return face_gesture, frame
        
        # drawing the landmarks on the image
        if self.draw_pose_landmarks:
            self.mp_drawing.draw_landmarks(frame, results.pose_landmarks, self.mp_holistic.POSE_CONNECTIONS)
        if self.draw_fasemesh_landmarks:
            self.mp_drawing.draw_landmarks(frame, results.face_landmarks, self.mp_holistic.FACEMESH_TESSELATION)
        
        # 2D coordinates of the left and right ears in pixels: (px, py)
        left_ear_2d, right_ear_2d = self.ears(results.pose_landmarks, image_shape)
        # 2D coordinates of the nose in pixels: (px, py)
        nose_2d = self.nose(results.pose_landmarks, image_shape)
        # 2D coordinates of the upper and lower lips in pixels: (px, py)
        upper_lip_2d, lower_lip_2d = self.lips(results.face_landmarks, image_shape)
        # 2D coordinates of the left and right eyebrows in pixels: (px, py)
        left_eyebrow_2d, right_eyebrow_2d = self.eye_brows(results.face_landmarks, image_shape)
        # 2D coordinates of the left and right eyes in pixels: (px, py)
        left_eye_2d, right_eye_2d = self.eye(results.face_landmarks, image_shape)

        # detecting the head direction
        if left_ear_2d is not None and right_ear_2d is not None and nose_2d is not None: 
            face_gesture['face_detected'] = True
            # when the face is turned to left, from camera perspective (x-y plane), left ear is closer to the nose
            if abs(left_ear_2d[0] - nose_2d[0]) / abs(right_ear_2d[0] - nose_2d[0]) < 1 - self.head_dir_fact:
                face_gesture['face_direction'] = "left"
            # when the face is turned to write, from camera perspective (x-y plane), right ear is closer to the nose
            elif abs(left_ear_2d[0] - nose_2d[0]) / abs(right_ear_2d[0] - nose_2d[0]) > 1 + self.head_dir_fact:
                face_gesture['face_direction'] = "right"
            # when the face is looking forward, left and right ears are in a same distance from the noce from the camera perspective
            else:
                face_gesture['face_direction'] = "center"   
        else: 
            face_gesture['face_detected'] = False
            face_gesture['face_direction'] = None

        # detecting the lips' position
        if upper_lip_2d is not None and lower_lip_2d is not None:
            if lower_lip_2d[1] - upper_lip_2d[1] > 30 * self.lips_pos_fact:
                face_gesture['mouth_open'] = True
            else:
                face_gesture['mouth_open'] = False
        else:
            face_gesture['mouth_open'] = None

        # detecting the eyebrows up 
        if left_eyebrow_2d is not None and right_eyebrow_2d is not None:
            if abs(left_eyebrow_2d[1] - left_eye_2d[1]) > 25  or abs(right_eyebrow_2d[1] - right_eye_2d[1]) > 25:
                face_gesture['eye_brows_up'] = True
            else:
                face_gesture['eye_brows_up'] = False
        else:
            face_gesture['eye_brows_up'] = None
        return face_gesture, frame
    

The belowing cell, just takes the frames from the camera, detects the fase gesture and shows the results.

In [30]:
# Initialize the face gesture detector
face_gesture_detector = FaseGestureDetector()
face_gesture_detector.draw_pose_landmarks = True
face_gesture_detector.draw_fasemesh_landmarks = False

cap = cv2.VideoCapture(0)
    
while cap.isOpened():
    ret, frame = cap.read()
        
    face_gesture, image = face_gesture_detector.detect_face_gesture(frame)

    if face_gesture['face_detected']:
        cv2.putText(image, f"face direction: {face_gesture['face_direction']}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
    if face_gesture['mouth_open'] is not None and face_gesture['face_detected']:
        cv2.putText(image, "mouth position: open" if face_gesture['mouth_open'] else "mouth position: close", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
    if face_gesture['eye_brows_up'] is not None and face_gesture['face_detected']:
        cv2.putText(image, "eye_brows position: up" if face_gesture['eye_brows_up'] else "eye_brows position: down", (20, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
    cv2.imshow('Raw Webcam Feed', image)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

KeyboardInterrupt: 

In [31]:
cap.release()
cv2.destroyAllWindows()

## Carla

### Core Components

In [32]:
###############################
###     Global Functions    ###
###############################    

def get_actor_display_name(actor, truncate=250):
    name = ' '.join(actor.type_id.replace('_', '.').title().split('.')[1:])
    return (name[:truncate - 1] + u'\u2026') if len(name) > truncate else name

def get_actor_blueprints(world, filter, generation):
    bps = world.get_blueprint_library().filter(filter)

    if generation.lower() == "all":
        return bps

    # If the filter returns only one bp, we assume that this one needed
    # and therefore, we ignore the generation
    if len(bps) == 1:
        return bps

    try:
        int_generation = int(generation)
        # Check if generation is in available generations
        if int_generation in [1, 2, 3]:
            bps = [x for x in bps if int(x.get_attribute('generation')) == int_generation]
            return bps
        else:
            print("   Warning! Actor Generation is not valid. No actor will be spawned.")
            return []
    except:
        print("   Warning! Actor Generation is not valid. No actor will be spawned.")
        return []

###############################
###     Camera Manager      ###
###############################   

class CameraManager(object):
    def __init__(self, parent_actor, hud, gamma_correction):
        self.sensor = None
        self.surface = None
        self._parent = parent_actor
        self.hud = hud
        self.recording = False
        bound_x = 0.5 + self._parent.bounding_box.extent.x
        bound_y = 0.5 + self._parent.bounding_box.extent.y
        bound_z = 0.5 + self._parent.bounding_box.extent.z
        Attachment = carla.AttachmentType

        
        self._camera_transforms = [
            (carla.Transform(carla.Location(x=-2.0*bound_x, y=+0.0*bound_y, z=2.0*bound_z), carla.Rotation(pitch=8.0)), Attachment.SpringArmGhost),
            (carla.Transform(carla.Location(x=+0.8*bound_x, y=+0.0*bound_y, z=1.3*bound_z)), Attachment.Rigid),
            (carla.Transform(carla.Location(x=+1.9*bound_x, y=+1.0*bound_y, z=1.2*bound_z)), Attachment.SpringArmGhost),
            (carla.Transform(carla.Location(x=-2.8*bound_x, y=+0.0*bound_y, z=4.6*bound_z), carla.Rotation(pitch=6.0)), Attachment.SpringArmGhost),
            (carla.Transform(carla.Location(x=-1.0, y=-1.0*bound_y, z=0.4*bound_z)), Attachment.Rigid)]
        

        self.transform_index = 1
        self.sensors = [
            ['sensor.camera.rgb', cc.Raw, 'Camera RGB', {}]
        ]

        world = self._parent.get_world()
        bp_library = world.get_blueprint_library()
        for item in self.sensors:
            bp = bp_library.find(item[0])
            if item[0].startswith('sensor.camera'):
                bp.set_attribute('image_size_x', str(hud.dim[0]))
                bp.set_attribute('image_size_y', str(hud.dim[1]))
                if bp.has_attribute('gamma'):
                    bp.set_attribute('gamma', str(gamma_correction))
                for attr_name, attr_value in item[3].items():
                    bp.set_attribute(attr_name, attr_value)

                for attr_name, attr_value in item[3].items():
                    bp.set_attribute(attr_name, attr_value)
                    if attr_name == 'range':
                        self.lidar_range = float(attr_value)

            item.append(bp)
        self.index = None

    def toggle_camera(self):
        self.transform_index = (self.transform_index + 1) % len(self._camera_transforms)
        self.set_sensor(self.index, notify=False, force_respawn=True)

    def set_sensor(self, index, notify=True, force_respawn=False):
        index = index % len(self.sensors)
        needs_respawn = True if self.index is None else \
            (force_respawn or (self.sensors[index][2] != self.sensors[self.index][2]))
        if needs_respawn:
            if self.sensor is not None:
                self.sensor.destroy()
                self.surface = None
            self.sensor = self._parent.get_world().spawn_actor(
                self.sensors[index][-1],
                self._camera_transforms[self.transform_index][0],
                attach_to=self._parent,
                attachment_type=self._camera_transforms[self.transform_index][1])
            # We need to pass the lambda a weak reference to self to avoid
            # circular reference.
            weak_self = weakref.ref(self)
            self.sensor.listen(lambda image: CameraManager._parse_image(weak_self, image))
        if notify:
            self.hud.notification(self.sensors[index][2])
        self.index = index

    def next_sensor(self):
        self.set_sensor(self.index + 1)

    def toggle_recording(self):
        self.recording = not self.recording
        self.hud.notification('Recording %s' % ('On' if self.recording else 'Off'))

    def render(self, display):
        if self.surface is not None:
            display.blit(self.surface, (0, 0))

    @staticmethod
    def _parse_image(weak_self, image):
        self = weak_self()
        if not self:
            return
        
        image.convert(self.sensors[self.index][1])
        array = np.frombuffer(image.raw_data, dtype=np.dtype("uint8"))
        array = np.reshape(array, (image.height, image.width, 4))
        array = array[:, :, :3]
        array = array[:, :, ::-1]
        self.surface = pygame.surfarray.make_surface(array.swapaxes(0, 1))

        if self.recording:
            image.save_to_disk('_out/%08d' % image.frame)

###############################
###          HUD            ###
############################### 

class FadingText(object):
    def __init__(self, font, dim, pos):
        self.font = font
        self.dim = dim
        self.pos = pos
        self.seconds_left = 0
        self.surface = pygame.Surface(self.dim)

    def set_text(self, text, color=(255, 255, 255), seconds=2.0):
        text_texture = self.font.render(text, True, color)
        self.surface = pygame.Surface(self.dim)
        self.seconds_left = seconds
        self.surface.fill((0, 0, 0, 0))
        self.surface.blit(text_texture, (10, 11))

    def tick(self, _, clock):
        delta_seconds = 1e-3 * clock.get_time()
        self.seconds_left = max(0.0, self.seconds_left - delta_seconds)
        self.surface.set_alpha(500.0 * self.seconds_left)

    def render(self, display):
        display.blit(self.surface, self.pos)

class HelpText(object):
    """Helper class to handle text output using pygame"""
    def __init__(self, font, width, height):
        lines = __doc__.split('\n')
        self.font = font
        self.line_space = 18
        self.dim = (780, len(lines) * self.line_space + 12)
        self.pos = (0.5 * width - 0.5 * self.dim[0], 0.5 * height - 0.5 * self.dim[1])
        self.seconds_left = 0
        self.surface = pygame.Surface(self.dim)
        self.surface.fill((0, 0, 0, 0))
        for n, line in enumerate(lines):
            text_texture = self.font.render(line, True, (255, 255, 255))
            self.surface.blit(text_texture, (22, n * self.line_space))
            self._render = False
        self.surface.set_alpha(220)

    def toggle(self):
        self._render = not self._render

    def render(self, display):
        if self._render:
            display.blit(self.surface, self.pos)

class HUD(object):
    def __init__(self, width, height):
        self.dim = (width, height)
        font = pygame.font.Font(pygame.font.get_default_font(), 20)
        font_name = 'courier' if os.name == 'nt' else 'mono'
        fonts = [x for x in pygame.font.get_fonts() if font_name in x]
        default_font = 'ubuntumono'
        mono = default_font if default_font in fonts else fonts[0]
        mono = pygame.font.match_font(mono)
        self._font_mono = pygame.font.Font(mono, 12 if os.name == 'nt' else 14)
        self._notifications = FadingText(font, (width, 40), (0, height - 40))
        self.help = HelpText(pygame.font.Font(mono, 16), width, height)
        self.server_fps = 0
        self.frame = 0
        self.simulation_time = 0
        self._show_info = True
        self._info_text = []
        self._server_clock = pygame.time.Clock()

        self._ackermann_control = carla.VehicleAckermannControl()

    def on_world_tick(self, timestamp):
        self._server_clock.tick()
        self.server_fps = self._server_clock.get_fps()
        self.frame = timestamp.frame
        self.simulation_time = timestamp.elapsed_seconds

    def tick(self, world, clock):
        self._notifications.tick(world, clock)
        if not self._show_info:
            return
        t = world.player.get_transform()
        v = world.player.get_velocity()
        c = world.player.get_control()

        self._info_text = [
            'Server:  % 16.0f FPS' % self.server_fps,
            'Client:  % 16.0f FPS' % clock.get_fps(),
            '',
            'Vehicle: % 20s' % get_actor_display_name(world.player, truncate=20),
            'Map:     % 20s' % world.map.name.split('/')[-1],
            'Simulation time: % 12s' % datetime.timedelta(seconds=int(self.simulation_time)),
            '',
            'Speed:   % 15.0f km/h' % (3.6 * math.sqrt(v.x**2 + v.y**2 + v.z**2)),
            'Location:% 20s' % ('(% 5.1f, % 5.1f)' % (t.location.x, t.location.y)),
            'Height:  % 18.0f m' % t.location.z,
            '']
        
        self._info_text += [
            ('Throttle:', c.throttle, 0.0, 1.0),
            ('Steer:', c.steer, -1.0, 1.0),
            ('Brake:', c.brake, 0.0, 1.0),
            ('Reverse:', c.reverse),
            ('Manual:', c.manual_gear_shift),
            'Gear:        %s' % {-1: 'R', 0: 'N'}.get(c.gear, c.gear)]

    def update_ackermann_control(self, ackermann_control):
        self._ackermann_control = ackermann_control

    def toggle_info(self):
        self._show_info = not self._show_info

    def notification(self, text, seconds=2.0):
        self._notifications.set_text(text, seconds=seconds)

    def error(self, text):
        self._notifications.set_text('Error: %s' % text, (255, 0, 0))

    def render(self, display):
        if self._show_info:
            info_surface = pygame.Surface((220, self.dim[1]))
            info_surface.set_alpha(100)
            display.blit(info_surface, (0, 0))
            v_offset = 4
            bar_h_offset = 100
            bar_width = 106
            for item in self._info_text:
                if v_offset + 18 > self.dim[1]:
                    break
                if isinstance(item, list):
                    if len(item) > 1:
                        points = [(x + 8, v_offset + 8 + (1.0 - y) * 30) for x, y in enumerate(item)]
                        pygame.draw.lines(display, (255, 136, 0), False, points, 2)
                    item = None
                    v_offset += 18
                elif isinstance(item, tuple):
                    if isinstance(item[1], bool):
                        rect = pygame.Rect((bar_h_offset, v_offset + 8), (6, 6))
                        pygame.draw.rect(display, (255, 255, 255), rect, 0 if item[1] else 1)
                    else:
                        rect_border = pygame.Rect((bar_h_offset, v_offset + 8), (bar_width, 6))
                        pygame.draw.rect(display, (255, 255, 255), rect_border, 1)
                        f = (item[1] - item[2]) / (item[3] - item[2])
                        if item[2] < 0.0:
                            rect = pygame.Rect((bar_h_offset + f * (bar_width - 6), v_offset + 8), (6, 6))
                        else:
                            rect = pygame.Rect((bar_h_offset, v_offset + 8), (f * bar_width, 6))
                        pygame.draw.rect(display, (255, 255, 255), rect)
                    item = item[0]
                if item:  # At this point has to be a str.
                    surface = self._font_mono.render(item, True, (255, 255, 255))
                    display.blit(surface, (8, v_offset))
                v_offset += 18
        self._notifications.render(display)
        self.help.render(display)

###############################
###         World           ###
############################### 

class World(object):
    def __init__(self, carla_world, hud, args):
        self.world = carla_world
        self.sync = args['sync']
        self.render_camera = args['rend_cam']
        try:
            self.map = self.world.get_map()
        except RuntimeError as error:
            print('RuntimeError: {}'.format(error))
            print('  The server could not send the OpenDRIVE (.xodr) file:')
            print('  Make sure it exists, has the same name of your town, and is correct.')
            sys.exit(1)
        self.hud = hud
        self.player = None
        self.camera_manager = None
        self._actor_filter = args['filter']
        self._actor_generation = args['generation']
        self._gamma = args['gamma']
        self.restart()
        self.world.on_tick(hud.on_world_tick)

    def restart(self):
        self.player_max_speed = 1.589
        self.player_max_speed_fast = 3.713
        # Keep same camera config if the camera manager exists.
        cam_index = self.camera_manager.index if self.camera_manager is not None else 0
        cam_pos_index = self.camera_manager.transform_index if self.camera_manager is not None else 0
        # Get a random blueprint.
        blueprint_list = get_actor_blueprints(self.world, self._actor_filter, self._actor_generation)
        if not blueprint_list:
            raise ValueError("Couldn't find any blueprints with the specified filters")
        blueprint = random.choice(blueprint_list)
        if blueprint.has_attribute('terramechanics'):
            blueprint.set_attribute('terramechanics', 'true')
        if blueprint.has_attribute('color'):
            color = random.choice(blueprint.get_attribute('color').recommended_values)
            blueprint.set_attribute('color', color)
        if blueprint.has_attribute('driver_id'):
            driver_id = random.choice(blueprint.get_attribute('driver_id').recommended_values)
            blueprint.set_attribute('driver_id', driver_id)
        if blueprint.has_attribute('is_invincible'):
            blueprint.set_attribute('is_invincible', 'true')
        # set the max speed
        if blueprint.has_attribute('speed'):
            self.player_max_speed = float(blueprint.get_attribute('speed').recommended_values[1])
            self.player_max_speed_fast = float(blueprint.get_attribute('speed').recommended_values[2])

        # Spawn the player.
        if self.player is not None:
            spawn_point = self.player.get_transform()
            spawn_point.location.z += 2.0
            spawn_point.rotation.roll = 0.0
            spawn_point.rotation.pitch = 0.0
            self.destroy()
            self.player = self.world.try_spawn_actor(blueprint, spawn_point)
            self.modify_vehicle_physics(self.player)
        while self.player is None:
            if not self.map.get_spawn_points():
                print('There are no spawn points available in your map/town.')
                print('Please add some Vehicle Spawn Point to your UE4 scene.')
                sys.exit(1)
            spawn_points = self.map.get_spawn_points()
            spawn_point = random.choice(spawn_points) if spawn_points else carla.Transform()
            self.player = self.world.try_spawn_actor(blueprint, spawn_point)
            self.show_vehicle_telemetry = False
            self.modify_vehicle_physics(self.player)
        # set sensors
        self.camera_manager = CameraManager(self.player, self.hud, self._gamma)
        self.camera_manager.transform_index = cam_pos_index
        self.camera_manager.set_sensor(cam_index, notify=False)
        actor_type = get_actor_display_name(self.player)
        self.hud.notification(actor_type)

        if self.sync:
            self.world.tick()
        else:
            self.world.wait_for_tick()

    def modify_vehicle_physics(self, actor):
        #If actor is not a vehicle, we cannot use the physics control
        try:
            physics_control = actor.get_physics_control()
            physics_control.use_sweep_wheel_collision = True
            actor.apply_physics_control(physics_control)
        except Exception:
            pass

    def tick(self, clock):
        self.hud.tick(self, clock)

    def render(self, display):
        if self.render_camera:
            self.camera_manager.render(display)
        self.hud.render(display)

    def destroy_sensors(self):
        self.camera_manager.sensor.destroy()
        self.camera_manager.sensor = None
        self.camera_manager.index = None

    def destroy(self):
        sensors = [self.camera_manager.sensor]
        for sensor in sensors:
            if sensor is not None:
                sensor.stop()
                sensor.destroy()
        if self.player is not None:
            self.player.destroy()

###############################
###   Face Controller   ###
############################### 

class FaceControl(object):
    """Class that handles keyboard input."""
    def __init__(self, world):
        self._ackermann_enabled = False
        self._ackermann_reverse = 1
        # face control flag
        self._face_control_enabled = False

        self._control = carla.VehicleControl()
        self._ackermann_control = carla.VehicleAckermannControl()
        self._lights = carla.VehicleLightState.NONE

        world.player.set_light_state(self._lights)
        
        self._steer_cache = 0.0
        world.hud.notification("Press 'H' or '?' for help.", seconds=4.0)

        # Initialize the face gesture detector
        self.face_gesture_detector = FaseGestureDetector()
        self.face_gesture_detector.draw_pose_landmarks = True
        self.face_gesture_detector.draw_fasemesh_landmarks = False

    def parse_events(self, frame, world, clock):
        # Detecting the face gesture with mediapipe 
        face_gesture, image = self.face_gesture_detector.detect_face_gesture(frame)
        cv2.putText(image, f"head direction: {face_gesture['face_direction']}", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
        cv2.putText(image, f"lips position: {face_gesture['mouth_open']}", (20, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)
        cv2.imshow('Raw Webcam Feed', image)

        current_lights = self._lights

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                return True
            elif event.type == pygame.KEYUP:
                # quit
                if self._is_quit_shortcut(event.key):
                    return True
                
                elif event.key == K_q:
                    if not self._ackermann_enabled:
                        self._control.gear = 1 if self._control.reverse else -1
                    else:
                        self._ackermann_reverse *= -1
                        # Reset ackermann control
                        self._ackermann_control = carla.VehicleAckermannControl()
                
                # Switch the manuel gear shift
                elif event.key == K_m:
                    self._control.manual_gear_shift = not self._control.manual_gear_shift
                    self._control.gear = world.player.get_control().gear
                    world.hud.notification('%s Transmission' % ('Manual' if self._control.manual_gear_shift else 'Automatic'))
                # shift down
                elif self._control.manual_gear_shift and event.key == K_COMMA:
                    self._control.gear = max(-1, self._control.gear - 1)
                # shift up
                elif self._control.manual_gear_shift and event.key == K_PERIOD:
                    self._control.gear = self._control.gear + 1

                # switch the face control enable
                elif event.key == K_f:
                    self._face_control_enabled = not self._face_control_enabled
        
        # if face_drive is enabled, use face control, otherwise use manuel control 
        if not self._face_control_enabled:
            self._parse_vehicle_keys(pygame.key.get_pressed(), clock.get_time())
        else:
            self._parse_face_gesture(face_gesture, clock.get_time())

        self._control.reverse = self._control.gear < 0
        # Set automatic control-related vehicle lights
        if self._control.brake:
            current_lights |= carla.VehicleLightState.Brake
        else: # Remove the Brake flag
            current_lights &= ~carla.VehicleLightState.Brake
        if self._control.reverse:
            current_lights |= carla.VehicleLightState.Reverse
        else: # Remove the Reverse flag
            current_lights &= ~carla.VehicleLightState.Reverse
        if current_lights != self._lights: # Change the light state only if necessary
            self._lights = current_lights
            world.player.set_light_state(carla.VehicleLightState(self._lights))
        # Apply control
        if not self._ackermann_enabled:
            world.player.apply_control(self._control)
        else:
            world.player.apply_ackermann_control(self._ackermann_control)
            # Update control to the last one applied by the ackermann controller.
            self._control = world.player.get_control()
            # Update hud with the newest ackermann control
            world.hud.update_ackermann_control(self._ackermann_control)

    def _parse_face_gesture(self, face_gesture, milliseconds): 
        # if any face is detected, accelerate
        if face_gesture['mouth_open'] and not face_gesture['eye_brows_up']:
            if not self._ackermann_enabled:
                self._control.throttle = min(self._control.throttle + 0.1, 1.00)
            else:
                self._ackermann_control.speed += round(milliseconds * 0.005, 2) * self._ackermann_reverse
        else:
            if not self._ackermann_enabled:
                self._control.throttle = 0.0
        # if mouth is open, break
        if face_gesture['eye_brows_up']:
            if not self._ackermann_enabled:
                self._control.brake = min(self._control.brake + 0.2, 1)
            else:
                self._ackermann_control.speed -= min(abs(self._ackermann_control.speed), round(milliseconds * 0.005, 2)) * self._ackermann_reverse
                self._ackermann_control.speed = max(0, abs(self._ackermann_control.speed)) * self._ackermann_reverse
        else:
            if not self._ackermann_enabled:
                self._control.brake = 0

        steer_increment = 5e-4 * milliseconds
        # if face is looking to the left, steer to the left
        if face_gesture['face_detected'] and face_gesture['face_direction'] == 'left':
            if self._steer_cache > 0:
                self._steer_cache = 0
            else:
                self._steer_cache -= steer_increment
        # if face is looking to the right, steer to the right
        elif face_gesture['face_detected'] and face_gesture['face_direction'] == 'right':
            if self._steer_cache < 0:
                self._steer_cache = 0
            else:
                self._steer_cache += steer_increment
        else:
            self._steer_cache = 0.0
        self._steer_cache = min(0.7, max(-0.7, self._steer_cache))
        if not self._ackermann_enabled:
            self._control.steer = round(self._steer_cache, 1)
        else:
            self._ackermann_control.steer = round(self._steer_cache, 1)

    def _parse_vehicle_keys(self, keys, milliseconds):
        if keys[K_UP] or keys[K_w]:
            if not self._ackermann_enabled:
                self._control.throttle = min(self._control.throttle + 0.1, 1.00)
            else:
                self._ackermann_control.speed += round(milliseconds * 0.005, 2) * self._ackermann_reverse
        else:
            if not self._ackermann_enabled:
                self._control.throttle = 0.0

        if keys[K_DOWN] or keys[K_s]:
            if not self._ackermann_enabled:
                self._control.brake = min(self._control.brake + 0.2, 1)
            else:
                self._ackermann_control.speed -= min(abs(self._ackermann_control.speed), round(milliseconds * 0.005, 2)) * self._ackermann_reverse
                self._ackermann_control.speed = max(0, abs(self._ackermann_control.speed)) * self._ackermann_reverse
        else:
            if not self._ackermann_enabled:
                self._control.brake = 0

        steer_increment = 5e-4 * milliseconds
        if keys[K_LEFT] or keys[K_a]:
            if self._steer_cache > 0:
                self._steer_cache = 0
            else:
                self._steer_cache -= steer_increment
        elif keys[K_RIGHT] or keys[K_d]:
            if self._steer_cache < 0:
                self._steer_cache = 0
            else:
                self._steer_cache += steer_increment
        else:
            self._steer_cache = 0.0
        self._steer_cache = min(0.7, max(-0.7, self._steer_cache))
        if not self._ackermann_enabled:
            self._control.steer = round(self._steer_cache, 1)
            self._control.hand_brake = keys[K_SPACE]
        else:
            self._ackermann_control.steer = round(self._steer_cache, 1)

    @staticmethod
    def _is_quit_shortcut(key):
        return (key == K_ESCAPE) or (key == K_q and pygame.key.get_mods() & KMOD_CTRL)
    


Use ARROWS or WASD keys for control.

    Up           : throttle
    Down         : brake
    Left/Right   : steer left/right
    Q            : toggle reverse
    Space        : hand-brake
    M            : toggle manual transmission
    ,/.          : gear up/down
    CTRL + W     : toggle constant velocity mode at 60 km/h

    F1           : toggle HUD
    H/?          : toggle help
    ESC          : quit


### Game loop

In [None]:
#################################
###   Face drive game loop    ###
#################################

def face_drive_game_loop(args):
    pygame.init()
    pygame.font.init()
    world = None
    original_settings = None

    try:
        client = carla.Client(args['host'], args['port'])
        client.set_timeout(2000.0)

        sim_world = client.get_world()
        
        display = pygame.display.set_mode(
            (args['width'], args['height']),
            pygame.HWSURFACE | pygame.DOUBLEBUF)
        display.fill((0,0,0))
        pygame.display.flip()

        hud = HUD(args['width'], args['height'])
        world = World(sim_world, hud, args)

        controller = FaceControl(world)
        
        sim_world.wait_for_tick()

        clock = pygame.time.Clock()
        
        cap = cv2.VideoCapture(0)
    
        while cap.isOpened():
            ret, frame = cap.read()

            clock.tick_busy_loop(60)

            if controller.parse_events(frame, world, clock):
                return
            
            world.tick(clock)
            world.render(display)
            pygame.display.flip()

    finally:
        if original_settings:
            sim_world.apply_settings(original_settings)

        if world is not None:
            world.destroy()
        
        cap.release()
        cv2.destroyAllWindows()

        pygame.quit()

### Main

In [9]:
def main():
    args = {'debug': False,
            'host': '35.199.85.236',
            'port': 2000,
            'res': '640x360',
            'filter': 'vehicle.*',
            'generation': '2',
            'gamma': 2.2,
            'sync': False,
            'rend_cam' : True}
    
    args['width'], args['height'] = [int(x) for x in args['res'].split('x')]

    log_level = logging.DEBUG if args['debug'] else logging.INFO
    logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)

    logging.info('listening to server %s:%s', args['host'], args['port'])

    print(__doc__)

    try:
        #manuel_game_loop(args)
        face_drive_game_loop(args)
        
    except KeyboardInterrupt:
        print('\nCancelled by user. Bye!')

In [None]:
main()

INFO: listening to server 35.199.85.236:2000


Automatically created module for IPython interactive environment

Cancelled by user. Bye!


: 

NameError: name 'cap' is not defined