In [None]:
import logging

import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from deep_translator import GoogleTranslator
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (0, 255, 0)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)


def translate_to_arabic(text):
    """
    This function translate the text from detected language to arabic
    :param text:
    :return:
    """
    return GoogleTranslator(source='auto', target='ar').translate(text.lower())


def convert_text_to_image(text, background_color=(255, 255, 255)):
    # Create a PIL image with the specified background color
    image = Image.new('RGB', (100, 100), background_color)

    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a PIL image draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Calculate the position to place the text in the middle of the image
    text_x = (image.width - text_width) // 2
    text_y = (image.height - text_height) // 2

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array


def get_arabic_text(text):
    """
    This function converts arabic text on the frame

    :param text:
    :return:
    """

    reshaped_text = arabic_reshaper.reshape(text)  # correct its shape
    bidi_text = get_display(reshaped_text)  # correct its direction
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    # Calculate the position to place the text image in the middle of the frame
    x = (frame.shape[1] - text_image.shape[1]) // 2
    y = (frame.shape[0] - text_image.shape[0]) // 2

    # Overlay the text image on the frame
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame


def detect_objects(frame):
    """
    This function detects the objects from the camera and sent it to the yolo model to extract the objects
    about of it.
    :param frame:
    :return:
    """
    # Perform YOLO object detection on the frame
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    # Process the results and draw bounding boxes
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return

        # text_size, _ = cv2.getTextSize(expected_text, font_face, font_scale, thickness)
        # text_width, text_height = text_size
        #
        # x = int((frame.shape[1] - text_width) / 2)  # Center x-coordinate
        # y = frame.shape[0] - 10  # Bottom margin

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
# # Submit the process_frames function to the executor
# executor.submit(process_frames)
# # Wait for the 'q' key to be pressed
# while cv2.waitKey(1) & 0xFF != ord('q'):
#     pass
#
# # Shutdown the executor
# executor.shutdown()

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()


In [None]:
import logging

import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont


yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}
 


def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image
    

    
    return frame

def detect_objects(frame):
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)
        
        
    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
# # Submit the process_frames function to the executor
# executor.submit(process_frames)
# # Wait for the 'q' key to be pressed
# while cv2.waitKey(1) & 0xFF != ord('q'):
#     pass
#
# # Shutdown the executor
# executor.shutdown()

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()



In [None]:
import logging
import time

import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont
from gtts import gTTS
import os

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

# Time limit for word formation (in seconds)
formation_time_limit = 1

formed_word = ""  # Variable to store the formed word
start_time = time.time()  # Variable to track the start time for word formation


def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame

def detect_objects(frame):
    global formed_word, start_time  # Indicate that we want to modify the global variables

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.8:
            return        

        text_image = convert_text_to_image(formed_word)
        frame_with_text = overlay_text_on_frame(frame, text_image)

#         # Convert the Arabic text to speech
#         tts = gTTS(text=translated_text, lang='ar')
#         audio_file = "/Users/mernaziad/Desktop/text_to_speech.mp3"
#         tts.save(audio_file)

#         # Play the Arabic text as speech
#         os.system('mpg123 ' + audio_file)
#         os.remove(audio_file)

#         texts_to_speak.append(translated_text)
        
        # Add the detected word to the formed word
        formed_word += expected_text

        # Check if the formed word has reached the formation time limit
        if time.time() - start_time >= formation_time_limit:
            # Display the formed word
            logging.info("Formed Word: {}".format(formed_word))
            
            # Reset the formed word and start time
            formed_word = ""
            start_time = time.time()


    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


    
def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

##تجمييع

In [None]:
import concurrent
import logging

import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from deep_translator import GoogleTranslator
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Downloads/yolo8/runs/detect/train/weights/best.pt",
                  "/Users/mernaziad/Downloads/yolo8/datasets/Alphabets-1/data.yaml")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (0, 255, 0)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)


def translate_to_arabic(text):
    """
    This function translate the text from detected language to arabic
    :param text:
    :return:
    """
    return GoogleTranslator(source='auto', target='ar').translate(text.lower())


def convert_text_to_image(text, background_color=(255, 255, 255)):
    # Create a PIL image with the specified background color
    image = Image.new('RGB', (100, 100), background_color)

    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a PIL image draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Calculate the position to place the text in the middle of the image
    text_x = (image.width - text_width) // 2
    text_y = (image.height - text_height) // 2

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array


def get_arabic_text(text):
    """
    This function converts arabic text on the frame

    :param text:
    :return:
    """

    reshaped_text = arabic_reshaper.reshape(text)  # correct its shape
    bidi_text = get_display(reshaped_text)  # correct its direction
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    # Calculate the position to place the text image in the middle of the frame
    x = (frame.shape[1] - text_image.shape[1]) // 2
    y = (frame.shape[0] - text_image.shape[0]) // 2

    # Overlay the text image on the frame
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame


def detect_objects(frame):
    """
    This function detects the objects from the camera and sent it to the yolo model to extract the objects
    about of it.
    :param frame:
    :return:
    """
    # Perform YOLO object detection on the frame
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    # Process the results and draw bounding boxes
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return

        # text_size, _ = cv2.getTextSize(expected_text, font_face, font_scale, thickness)
        # text_width, text_height = text_size
        #
        # x = int((frame.shape[1] - text_width) / 2)  # Center x-coordinate
        # y = frame.shape[0] - 10  # Bottom margin

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
# # Submit the process_frames function to the executor
# executor.submit(process_frames)
# # Wait for the 'q' key to be pressed
# while cv2.waitKey(1) & 0xFF != ord('q'):
#     pass

# # Shutdown the executor
# executor.shutdown()

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

In [None]:
import logging
import time

import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont
from gtts import gTTS
import os

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/path/to/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

translation_dict = {
    '0': '0',
    '1': '1',
    '2': '2',
    '3': '3',
    '4': '4',
    '5': '5',
    '6': '6',
    '7': '7',
    '8': '8',
    '9': '9',
    'ALIF': 'أ',
    'AYN': 'ع',
    'Atman lak 7aya sa3eeda': 'أتمنى لك حياة سعيدة',
    'BAA': 'ب',
    'DAD': 'ض',
    'DELL': 'د',
    'DHAA': 'ظ',
    'DHELL': 'ذ',
    'FAA': 'ف',
    'GHAYN': 'غ',
    'HA': 'هـ',
    'HAA': 'ح',
    'JEEM': 'ج',
    'KAAF': 'ك',
    'KHAA': 'خ',
    'LAAM': 'ل',
    'MEEM': 'م',
    'QAAF': 'ق',
    'RAA': 'ر',
    'SAD': 'ص',
    'SEEN': 'س',
    'SHEEN': 'ش',
    'TA': 'ت',
    'TAA': 'ط',
    'THA': 'ث',
    'WAW': 'و',
    'YA': 'ي',
    'ZAY': 'ز',
    'bad': 'سيء',
    'del': 'حذف',
    'eqtibas': 'اقتباس',
    'good': 'جيد',
    'law sama7t': 'لو سمحت',
    'merhaba': 'مرحبا',
    'nothing': 'لا شيء',
    'o7ebok': 'أحبك',
    'oraqebak': 'أراقبك',
    'space': 'مسافة',
    'you': 'أنت',
}

# Time limit for word formation (in seconds)
formation_time_limit = 5

formed_word = ""  # Variable to store the formed word
start_time = time.time()  # Variable to track the start time for word formation


def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array


def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame


def detect_objects(frame):
    global formed_word, start_time  # Indicate that we want to modify the global variables

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return

        text_image = convert_text_to_image(formed_word)
        frame_with_text = overlay_text_on_frame(frame, text_image)

        # Convert the Arabic text to speech
        tts = gTTS(text=translated_text, lang='ar')
        audio_file = "/path/to/text_to_speech.mp3"
        tts.save(audio_file)

        # Play the Arabic text as speech
        os.system('mpg123 ' + audio_file)
        os.remove(audio_file)

        texts_to_speak.append(translated_text)

        # Add the detected word to the formed word
        formed_word += expected_text

       
        # Check if the time limit for word formation has exceeded
        elapsed_time = time.time() - start_time
        if elapsed_time >= formation_time_limit:
            logging.info("Formed Word: {}".format(formed_word))
            formed_word = ""
            start_time = time.time()

    # Display the formed word on the frame
    text_image = convert_text_to_image(formed_word)
    frame_with_text = overlay_text_on_frame(frame, text_image)

    return frame_with_text, texts_to_speak


def main():
    while True:
        ret, frame = cap.read()

        # Resize the frame to a suitable size for processing
        resized_frame = cv2.resize(frame, (640, 480))

        # Convert the frame from BGR to RGB
        rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)

        # Detect objects and overlay text on the frame
        processed_frame, texts_to_speak = detect_objects(rgb_frame)

        # Convert the frame back to BGR for display
        bgr_frame = cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR)

        # Display the frame
        cv2.imshow("Frame", bgr_frame)

        # Exit the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the video capture and destroy the windows
    cap.release()
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()


In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont
from gtts import gTTS
import os

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)




translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

# # Time limit for word formation (in seconds)
# formation_time_limit = 5

# formed_word = ""  # Variable to store the formed word
# start_time = time.time()  # Variable to track the start time for word formation


def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame

# Define variables for word formation
formation_time_limit = 2  # Time limit for word formation in seconds
formed_word = ""  # Variable to store the formed word
start_time = time.time()  # Variable to track the start time for word formation

def detect_objects(frame):
    global formed_word, start_time  # Indicate that we want to modify the global variables

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return        

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

        # Convert the Arabic text to speech
        tts = gTTS(text=translated_text, lang='ar')
        audio_file = "/Users/mernaziad/Desktop/text_to_speech.mp3"
        tts.save(audio_file)

        # Play the Arabic text as speech
        os.system('mpg123 ' + audio_file)
        os.remove(audio_file)

        texts_to_speak.append(translated_text)
        

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


    
def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))

    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont
from gtts import gTTS
import os

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)




translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

# Variables for word formation
formation_time_limit = 2  # Time limit for word formation in seconds
formed_word = ""  # Variable to store the formed word
start_time = time.time()  # Variable to track the start time for word formation
previous_character = ""  # Variable to store the previous character

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame

# Define variables for word formation
formation_time_limit = 2  # Time limit for word formation in seconds
formed_word = ""  # Variable to store the formed word
start_time = time.time()  # Variable to track the start time for word formation

def detect_objects(frame):
    global formed_word, start_time  # Indicate that we want to modify the global variables

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []
    formed_word = ""  # Reset the formed word for each frame

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return        

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

        # Convert the Arabic text to speech
        tts = gTTS(text=translated_text, lang='ar')
        audio_file = "/Users/mernaziad/Desktop/text_to_speech.mp3"
        tts.save(audio_file)

        # Play the Arabic text as speech
        os.system('mpg123 ' + audio_file)
        os.remove(audio_file)
        texts_to_speak.append(translated_text)


    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


    
def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))

    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

# Time limit for word formation (in seconds)
formation_time_limit = 5

formed_sentence = ""  # المتغير لتخزين الجملة المتكونة
start_time = time.time()  # المتغير لتتبع وقت ظهور أول حرف في الجملة
sentence_formation_time_limit = 5  # الوقت المسموح به لتجميع الجملة (بالثواني)

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame

def detect_objects(frame):
    global formed_sentence, start_time  # Indicate that we want to modify the global variables

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

        if translated_text != "":  # التأكد من وجود حرف مترجم
            formed_sentence += translated_text  # إضافة الحرف المترجم إلى الجملة المتكونة

            elapsed_time = time.time() - start_time  # حساب الوقت المنقضي
            if elapsed_time >= sentence_formation_time_limit:  # التحقق من تجاوز الوقت المحدد
                logging.info("Formed Sentence: {}".format(formed_sentence))
                # قم بإجراء الإجراء المناسب عند تجميع الجملة
                # ...
                # إعادة تعيين المتغيرات للجملة التالية
                formed_sentence = ""
                start_time = time.time()
        texts_to_speak.append(expected_text)

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))

    # Perform object detection and modification on the frame
    detect_objects(frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True

while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()


In [None]:
import logging
import concurrent.futures
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont
from gtts import gTTS
import os

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

translation_dict = {
    '0': '0',
    '1': '1',
    '2': '2',
    '3': '3',
    '4': '4',
    '5': '5',
    '6': '6',
    '7': '7',
    '8': '8',
    '9': '9',
    'ALIF': 'أ',
    'AYN': 'ع',
    'Atman lak 7aya sa3eeda': 'أتمنى لك حياة سعيدة',
    'BAA': 'ب',
    'DAD': 'ض',
    'DELL': 'د',
    'DHAA': 'ظ',
    'DHELL': 'ذ',
    'FAA': 'ف',
    'GHAYN': 'غ',
    'HA': 'هـ',
    'HAA': 'ح',
    'JEEM': 'ج',
    'KAAF': 'ك',
    'KHAA': 'خ',
    'LAAM': 'ل',
    'MEEM': 'م',
    'QAAF': 'ق',
    'RAA': 'ر',
    'SAD': 'ص',
    'SEEN': 'س',
    'SHEEN': 'ش',
    'TA': 'ت',
    'TAA': 'ط',
    'THA': 'ث',
    'WAW': 'و',
    'YA': 'ي',
    'ZAY': 'ز',
    'bad': 'سيء',
    'del': 'حذف',
    'eqtibas': 'اقتباس',
    'good': 'جيد',
    'law sama7t': 'لو سمحت',
    'merhaba': 'مرحبا',
    'nothing': 'لا شيء',
    'o7ebok': 'أحبك',
    'oraqebak': 'أراقبك',
    'space': 'مسافة',
    'you': 'أنت',
}


def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array


def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image

    return frame


def detect_objects(frame):
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

        # Convert the Arabic text to speech
        tts = gTTS(text=translated_text, lang='ar')
        audio_file = "/Users/mernaziad/Desktop/text_to_speech.mp3"
        tts.save(audio_file)

        # Play the Arabic text as speech
        os.system('mpg123 ' + audio_file)
        os.remove(audio_file)

        texts_to_speak.append(translated_text)

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
# executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
# # Submit the process_frames function to the executor
# executor.submit(process_frames)
# # Wait for the 'q' key to be pressed
# while cv2.waitKey(1) & 0xFF != ord('q'):
#     pass

# # Shutdown the executor
# executor.shutdown()
# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()



In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}



def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

def overlay_text_on_frame(frame, words):
    padding = 10
    text_widths = []
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        text_image = convert_text_to_image(word)
        text_width, text_height = text_image.shape[1], text_image.shape[0]
        text_widths.append(text_width)
        text_heights.append(text_height)

    if len(text_widths) == 0 or len(text_heights) == 0:
        return frame

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))
    x = (frame.shape[1] - max(text_widths)) // 2

    for word, width, height in zip(words, text_widths, text_heights):
        text_image = convert_text_to_image(word)
        frame[y:y + height, x:x + width] = text_image
        y += height + padding

    return frame
word_combinations = {}
formed_sentence = ""

def detect_objects(frame):
    global formed_sentence, start_time, word_combinations
    
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
#         frame_with_text = overlay_text_on_frame(frame, text_image)
        frame_with_text = overlay_text_on_frame(frame_with_text, list(word_combinations.keys()))


        if translated_text != "":
            formed_sentence += translated_text
            if len(formed_sentence) >= 2:  # Check if there are at least two characters in the formed sentence
                word_combinations[formed_sentence] = True

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    
    # Perform object detection and modification on the frame
    detect_objects(frame)
    frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))

#     print("Word Combinations:")
#     for word in word_combinations:
#         print(word)
    combined_words = ' '.join(word_combinations.keys())
    print("Combined Words:", combined_words)
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True

while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()
# Combine the words from the dictionary
combined_words = ' '.join(word_combinations.keys())
print("Combined Words:", combined_words)


In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}



def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

def overlay_text_on_frame(frame, words):
    padding = 10
    text_widths = []
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        text_image = convert_text_to_image(word)
        text_width, text_height = text_image.shape[1], text_image.shape[0]
        text_widths.append(text_width)
        text_heights.append(text_height)

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))
    
    if len(text_widths) > 0:
        max_width = max(text_widths)
        max_height = max(text_heights)
    else:
        max_width = 0
        max_height = 0
    
    x = (frame.shape[1] - max_width) // 2

    for word, width, height in zip(words, text_widths, text_heights):
        text_image = convert_text_to_image(word)
        frame[y:y + height, x:x + width] = text_image
        y += height + padding

    # Add the combined words to the list of words
    combined_words = ' '.join(words)
    combined_words_image = convert_text_to_image(combined_words)
    combined_words_width, combined_words_height = combined_words_image.shape[1], combined_words_image.shape[0]

    # Calculate the position to place the combined words
    combined_words_x = (frame.shape[1] - combined_words_width) // 2
    combined_words_y = y + max_height + padding

    # Overlay the combined words on the frame
    frame[combined_words_y:combined_words_y + combined_words_height, combined_words_x:combined_words_x + combined_words_width] = combined_words_image

    return frame
word_combinations = {}

def detect_objects(frame):
    global formed_sentence, start_time, word_combinations
    
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)

        if translated_text != "":
            formed_sentence += translated_text
            if len(formed_sentence) >= 2:  # Check if there are at least two characters in the formed sentence
                word_combinations[formed_sentence] = True

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    
    # Perform object detection and modification on the frame
    detect_objects(frame)
    frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))

#     print("Word Combinations:")
#     for word in word_combinations:
#         print(word)
    combined_words = ' '.join(word_combinations.keys())
    print("Combined Words:", combined_words)
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True

while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()
# Combine the words from the dictionary
combined_words = ' '.join(word_combinations.keys())
print("Combined Words:", combined_words)


In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

# Word dictionary
word_dictionary = {
    'أب': True,
    'أبي': True,
}

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

def overlay_text_on_frame(frame, words):
    padding = 10
    text_widths = []
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        text_image = convert_text_to_image(word)
        text_width, text_height = text_image.shape[1], text_image.shape[0]
        text_widths.append(text_width)
        text_heights.append(text_height)

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))
    
    if len(text_widths) > 0:
        max_width = max(text_widths)
        max_height = max(text_heights)
    else:
        max_width = 0
        max_height = 0
    
    x = (frame.shape[1] - max_width) // 2

    for word, width, height in zip(words, text_widths, text_heights):
        text_image = convert_text_to_image(word)
        frame[y:y + height, x:x + width] = text_image
        y += height + padding

        # Check if the formed sentence is in the word dictionary
        if formed_sentence in word_dictionary:
            word_combinations[formed_sentence] = True

    # Add the combined words to the list of words
    combined_words = ' '.join(words)
    combined_words_image = convert_text_to_image(combined_words)
    combined_words_width, combined_words_height = combined_words_image.shape[1], combined_words_image.shape[0]

    # Calculate the position to place the combined words
    combined_words_x = (frame.shape[1] - combined_words_width) // 2
    combined_words_y = y + max_height + padding

    # Overlay the combined words on the frame
    frame[combined_words_y:combined_words_y + combined_words_height, combined_words_x:combined_words_x + combined_words_width] = combined_words_image

    return frame

word_combinations = {}

def detect_objects(frame):
    global formed_sentence, start_time, word_combinations
    
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    texts_to_speak = []

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, [expected_text])

        if translated_text != "":
            formed_sentence += translated_text

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    
    # Perform object detection and modification on the frame
    detect_objects(frame)
    frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))

    # Print the word combinations
    combined_words = ' '.join(word_combinations.keys())
    print("Combined Words:", combined_words)
    
    # Process frames continuously
    while True:
        # Read the frame from the camera
        ret, frame = cap.read()
        if frame.shape[:2] != (800, 600):
            frame = cv2.resize(frame, (800, 600))

        # Perform object detection and modification on the frame
        detect_objects(frame)
        frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))

        # Print the word combinations
        combined_words = ' '.join(word_combinations.keys())
        print("Combined Words:", combined_words)

        # Display the modified frame
        cv2.imshow("Object Detection", frame_with_text)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the capture and close windows
    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    formed_sentence = ""
    start_time = time.time()
    process_frames()


In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

# Replace with the path to your YOLO model weights
yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0': '0',
    '1': '1',
    '2': '2',
    '3': '3',
    '4': '4',
    '5': '5',
    '6': '6',
    '7': '7',
    '8': '8',
    '9': '9',
    'ALIF': 'أ',
    'AYN': 'ع',
    'Atman lak 7aya sa3eeda': 'أتمنى لك حياة سعيدة',
    'BAA': 'ب',
    'DAD': 'ض',
    'DELL': 'د',
    'DHAA': 'ظ',
    'DHELL': 'ذ',
    'FAA': 'ف',
    'GHAYN': 'غ',
    'HA': 'هـ',
    'HAA': 'ح',
    'JEEM': 'ج',
    'KAAF': 'ك',
    'KHAA': 'خ',
    'LAAM': 'ل',
    'MEEM': 'م',
    'QAAF': 'ق',
    'RAA': 'ر',
    'SAD': 'ص',
    'SEEN': 'س',
    'SHEEN': 'ش',
    'TA': 'ت',
    'TAA': 'ط',
    'THA': 'ث',
    'WAW': 'و',
    'YA': 'ي',
    'ZAY': 'ز',
    'bad': 'سيء',
    'del': 'حذف',
    'eqtibas': 'اقتباس',
    'good': 'جيد',
    'law sama7t': 'لو سمحت',
    'merhaba': 'مرحبا',
    'nothing': 'لا شيء',
    'o7ebok': 'أحبك',
    'oraqebak': 'أراقبك',
    'space': 'مسافة',
    'you': 'أنت',
}

# Word dictionary
word_dictionary = {
    'أب': True,
    'أبي': True,
}

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

def overlay_text_on_frame(frame, words):
    padding = 10
    text_widths = []
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        text_image = convert_text_to_image(word)
        text_width, text_height = text_image.shape[1], text_image.shape[0]
        text_widths.append(text_width)
        text_heights.append(text_height)

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))

    if len(text_widths) > 0:
        max_width = max(text_widths)
        max_height = max(text_heights)
    else:
        max_width = 0
        max_height = 0

    x = (frame.shape[1] - max_width) // 2

    for word, width, height in zip(words, text_widths, text_heights):
        text_image = convert_text_to_image(word)
        frame[y:y + height, x:x + width] = text_image
        y += height + padding

        # Check if the formed sentence is in the word dictionary
        if formed_sentence in word_dictionary:
            word_combinations[formed_sentence] = True

    # Add the combined words to the list of words
    combined_words = ' '.join(words)
    combined_words_image = convert_text_to_image(combined_words)
    combined_words_width, combined_words_height = combined_words_image.shape[1], combined_words_image.shape[0]

    # Calculate the position to place the combined words
    combined_words_x = (frame.shape[1] - combined_words_width) // 2
    combined_words_y = y + max_height + padding

    # Overlay the combined words on the frame
    frame[combined_words_y:combined_words_y + combined_words_height, combined_words_x:combined_words_x + combined_words_width] = combined_words_image

    return frame

word_combinations = {}

def detect_objects(frame):
    results = yolo_model(frame)

    # Extract the bounding box coordinates and class labels
    boxes = results.xyxy[0][:, :4].tolist()
    labels = results.xyxy[0][:, -1].tolist()

    # Filter out non-object classes and get their corresponding labels
    object_labels = [translate_to_arabic(str(label)) for label in labels]

    return boxes, object_labels

def process_frame(frame):
    # Convert the frame from BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect objects in the frame
    boxes, object_labels = detect_objects(rgb_frame)

    # Create a list to store the detected objects
    detected_objects = []

    for label in object_labels:
        # Add the label to the detected objects list
        detected_objects.append(label)

    # Combine the detected words into a sentence
    formed_sentence = ' '.join(detected_objects)

    # Overlay the formed sentence on the frame
    frame_with_text = overlay_text_on_frame(frame, detected_objects)

    return frame_with_text



In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

# Replace with the path to your YOLO model weights
yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0': '0',
    '1': '1',
    '2': '2',
    '3': '3',
    '4': '4',
    '5': '5',
    '6': '6',
    '7': '7',
    '8': '8',
    '9': '9',
    'ALIF': 'أ',
    'AYN': 'ع',
    'Atman lak 7aya sa3eeda': 'أتمنى لك حياة سعيدة',
    'BAA': 'ب',
    'DAD': 'ض',
    'DELL': 'د',
    'DHAA': 'ظ',
    'DHELL': 'ذ',
    'FAA': 'ف',
    'GHAYN': 'غ',
    'HA': 'هـ',
    'HAA': 'ح',
    'JEEM': 'ج',
    'KAAF': 'ك',
    'KHAA': 'خ',
    'LAAM': 'ل',
    'MEEM': 'م',
    'QAAF': 'ق',
    'RAA': 'ر',
    'SAD': 'ص',
    'SEEN': 'س',
    'SHEEN': 'ش',
    'TA': 'ت',
    'TAA': 'ط',
    'THA': 'ث',
    'WAW': 'و',
    'YA': 'ي',
    'ZAY': 'ز',
    'bad': 'سيء',
    'del': 'حذف',
    'eqtibas': 'اقتباس',
    'good': 'جيد',
    'law sama7t': 'لو سمحت',
    'merhaba': 'مرحبا',
    'nothing': 'لا شيء',
    'o7ebok': 'أحبك',
    'oraqebak': 'أراقبك',
    'space': 'مسافة',
    'you': 'أنت',
}

# Word dictionary
word_dictionary = {
    'أب': True,
    'أبي': True,
}

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

def overlay_text_on_frame(frame, words):
    padding = 10
    text_widths = []
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        text_image = convert_text_to_image(word)
        text_width, text_height = text_image.shape[1], text_image.shape[0]
        text_widths.append(text_width)
        text_heights.append(text_height)

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))

    if len(text_widths) > 0:
        max_width = max(text_widths)
        max_height = max(text_heights)
    else:
        max_width = 0
        max_height = 0

    x = (frame.shape[1] - max_width) // 2

    for word, width, height in zip(words, text_widths, text_heights):
        text_image = convert_text_to_image(word)
        frame[y:y + height, x:x + width] = text_image
        y += height + padding

        # Check if the formed sentence is in the word dictionary
        if formed_sentence in word_dictionary:
            word_combinations[formed_sentence] = True

    # Add the combined words to the list of words
    combined_words = ' '.join(words)
    combined_words_image = convert_text_to_image(combined_words)
    combined_words_width, combined_words_height = combined_words_image.shape[1], combined_words_image.shape[0]

    # Calculate the position to place the combined words
    combined_words_x = (frame.shape[1] - combined_words_width) // 2
    combined_words_y = y + max_height + padding

    # Overlay the combined words on the frame
    frame[combined_words_y:combined_words_y + combined_words_height, combined_words_x:combined_words_x + combined_words_width] = combined_words_image

    return frame

word_combinations = {}

def detect_objects(frame):
    results = yolo_model(frame)

    if isinstance(results, list):
        # Handle the case when results is a list
        results = results[0]

    # Extract the bounding box coordinates and class labels
    boxes = results.xyxy[:, :4].tolist()
    labels = results.xyxy[:, -1].tolist()

    # Filter out non-object classes and get their corresponding labels
    object_labels = [translate_to_arabic(str(label)) for label in labels]

    return boxes, object_labels

def process_frame(frame):
    # Convert the frame from BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect objects in the frame
    boxes, object_labels = detect_objects(rgb_frame)

    # Create a list to store the detected objects
    detected_objects = []

    for label in object_labels:
        # Add the label to the detected objects list
        detected_objects.append(label)

    # Combine the detected words into a sentence
    formed_sentence = ' '.join(detected_objects)

    # Overlay the formed sentence on the frame
    frame_with_text = overlay_text_on_frame(frame, detected_objects)

    return frame_with_text

# Main loop
while True:
    # Read a frame from the video capture
    ret, frame = cap.read()

    if not ret:
        break

    # Process the frame
    processed_frame = process_frame(frame)

    # Display the frame
    cv2.imshow('Frame', processed_frame)

    # Check for key press
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()







In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2
word_combinations = {}
formed_sentence = ""
# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}



def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding


    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

# Define the dictionary of combinable words
combinable_words = {
    'ALEFBAAYAA': 'أبي',
    'ALEFBAA': 'أب',
   
}

def overlay_text_on_frame(frame, words):
    padding = 10
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)
        
        text_height = text_image.shape[0]
        text_heights.append(text_height)

    if len(text_heights) == 0:
        return frame

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))
    x = (frame.shape[1] - max([convert_text_to_image(word).shape[1] for word in words])) // 2

    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image
        y += text_image.shape[0] + padding

    return frame




def detect_objects(frame):
    global formed_sentence, word_combinations

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame.copy()

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame_with_text, list(word_combinations.keys()))

        if translated_text != "":
            formed_sentence += translated_text
            if len(formed_sentence) >= 2:  # Check if there are at least two characters in the formed sentence
                word_combinations[formed_sentence] = True
                formed_sentence = ""  # Reset the formed sentence for the next combination

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    
    # Perform object detection and modification on the frame
    detect_objects(frame)

    if len(word_combinations) > 0:
        frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))
    else:
        frame_with_text = frame

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True
while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

# Combine the words from the dictionary
combined_words = ' '.join(word_combinations.keys())
print("Combined Words:", combined_words)


In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2
word_combinations = {}
formed_sentence = ""
# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

# Define the dictionary of combinable words
combinable_words = {
    'أ': 'أبي',
    'ب': 'أبي',
    'ي': 'أبي',
    # يمكنك إضافة المزيد من الكلمات المركبة هنا
}

def overlay_text_on_frame(frame, words):
    padding = 10
    text_heights = []

    # Calculate the dimensions of each text image
    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        text_height = text_image.shape[0]
        text_heights.append(text_height)

    if len(text_heights) == 0:
        return frame

    y = frame.shape[0] - sum(text_heights) - (padding * len(words))
    x = (frame.shape[1] - max([convert_text_to_image(word).shape[1] for word in words])) // 2

    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image
        y += text_image.shape[0] + padding

    return frame

def detect_objects(frame):
    global formed_sentence, word_combinations

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame.copy()

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame_with_text, list(word_combinations.keys()))

#         if translated_text != "":
#             formed_sentence += translated_text
#             if len(formed_sentence) >= 2:  # Check if there are at least two characters in the formed sentence
#                 word_combinations[formed_sentence] = True
#                 formed_sentence = ""  # Reset the formed sentence for the next combination
        if translated_text and translated_text in word_combinations:
            formed_sentence += translated_text
            if len(formed_sentence) >= 2:
                word_combinations[formed_sentence] = True
                formed_sentence = ""
    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    
    # Perform object detection and modification on the frame
    detect_objects(frame)

    if len(word_combinations) > 0:
        frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))
    else:
        frame_with_text = frame

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True

while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

# Combine the words from the dictionary
combined_words = ' '.join(word_combinations.keys())
print("Combined Words:", combined_words)
    



In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2
word_combinations = {}
formed_sentence = ""
# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}



def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding


    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

# Define the dictionary of combinable words
combinable_words = {
    'أبي':'True',
    'أب':'True'
}

def overlay_text_on_frame(frame, words):
    padding = 10
    text_heights = []

    # Calculate the total width and height of all the text
    total_text_width = 0
    max_text_height = 0
    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        text_height = text_image.shape[0]
        text_heights.append(text_height)

        total_text_width += text_image.shape[1] + padding
        max_text_height = max(max_text_height, text_height)

    # Calculate the starting position to center the text
    x = (frame.shape[1] - total_text_width) // 2
    y = frame.shape[0] - max_text_height - padding

    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image
        x += text_image.shape[1] + padding

    return frame






def detect_objects(frame):
    global formed_sentence, word_combinations

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame.copy()

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame_with_text, list(word_combinations.keys()))

        if translated_text != "":
            formed_sentence += translated_text
            if len(formed_sentence) >= 2:  # Check if there are at least two characters in the formed sentence
                word_combinations[formed_sentence] = True
                formed_sentence = ""  # Reset the formed sentence for the next combination

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    
    # Perform object detection and modification on the frame
    detect_objects(frame)

    if len(word_combinations) > 0:
        frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))
    else:
        frame_with_text = frame

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True
while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

# Combine the words from the dictionary
combined_words = ' '.join(word_combinations.keys())
print("Combined Words:", combined_words)


In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2
word_combinations = {}
formed_word = ""
time_threshold = 5  # Time threshold in seconds
last_char_time = time.time()

# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0': '0',
    '1': '1',
    '2': '2',
    '3': '3',
    '4': '4',
    '5': '5',
    '6': '6',
    '7': '7',
    '8': '8',
    '9': '9',
    'ALIF': 'أ',
    'AYN': 'ع',
    'Atman lak 7aya sa3eeda': 'أتمنى لك حياة سعيدة',
    'BAA': 'ب',
    'DAD': 'ض',
    'DELL': 'د',
    'DHAA': 'ظ',
    'DHELL': 'ذ',
    'FAA': 'ف',
    'GHAYN': 'غ',
    'HA': 'هـ',
    'HAA': 'ح',
    'JEEM': 'ج',
    'KAAF': 'ك',
    'KHAA': 'خ',
    'LAAM': 'ل',
    'MEEM': 'م',
    'QAAF': 'ق',
    'RAA': 'ر',
    'SAD': 'ص',
    'SEEN': 'س',
    'SHEEN': 'ش',
    'TA': 'ت',
    'TAA': 'ط',
    'THA': 'ث',
    'WAW': 'و',
    'YA': 'ي',
    'ZAY': 'ز',
    'bad': 'سيء',
    'del': 'حذف',
    'eqtibas': 'اقتباس',
    'good': 'جيد',
    'law sama7t': 'لو سمحت',
    'merhaba': 'مرحبا',
    'nothing': 'لا شيء',
    'o7ebok': 'أحبك',
    'oraqebak': 'أراقبك',
    'space': 'مسافة',
    'you': 'أنت',
}

# Define the dictionary of combinable words
combinable_words = {
    'أبي': 'أ ب ي',
    'أمي': 'أ م ي',
    'أخي': 'أ خ ي',
    'أختي': 'أ خ ت ي',
    # إضافة المزيد من الكلمات المجمعة هنا
}
def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text
    
def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def convert_text_to_image(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    font = ImageFont.truetype(font_path, font_size)
    text_width, text_height = font.getsize(bidi_text)
    image = Image.new('RGB', (text_width, text_height), color=(0, 0, 0))
    draw = ImageDraw.Draw(image)
    draw.text((0, 0), bidi_text, font=font, fill=(255, 255, 255))
    return np.array(image)

def overlay_text_on_frame(frame, words):
    padding = 10
    total_text_width = 0
    max_text_height = 0

    # Combine only the relevant words
    relevant_words = [word for word in words if word in combinable_words]

    for word in relevant_words:
        text_image = convert_text_to_image(combinable_words[word])
        total_text_width += text_image.shape[1] + padding
        max_text_height = max(max_text_height, text_image.shape[0])

    # Calculate the starting x position to center the text
    x = (frame.shape[1] - total_text_width) // 2
    y = frame.shape[0] - max_text_height - padding

    for word in relevant_words:
        text_image = convert_text_to_image(combinable_words[word])
        frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image
        x += text_image.shape[1] + padding

    return frame

def detect_objects(frame):
    global formed_word, last_char_time

    # Convert the frame to PIL Image format
    pil_image = Image.fromarray(frame)

    # Perform object detection using YOLO
    results = yolo_model(pil_image)
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame.copy()

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)

        if conf > 0.5:
            formed_word += translated_text

            # Check if enough time has passed since the last character
            if time.time() - last_char_time > time_threshold:
                if formed_word in combinable_words:
                    formed_word = combinable_words[formed_word]
                word_combinations[formed_word] = True
                formed_word = ""

            last_char_time = time.time()


def process_frames():
    ret, frame = cap.read()

    if not ret:
        return False

    frame = cv2.resize(frame, (800, 600))

    detect_objects(frame)

    if len(word_combinations) > 0:
        frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))
        cv2.imshow('Frame', frame_with_text)
    else:
        cv2.imshow('Frame', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True


while True:
    if not process_frames():
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
import logging
import time
import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO
import numpy as np
from PIL import Image, ImageDraw, ImageFont

yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")  # Replace with the path to your YOLO model weights

cap = cv2.VideoCapture(0)

# Define the font properties
font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"  # Replace with the path to your Arabic font file
font_size = 32
font_scale = 1
font_thickness = 2
word_combinations = {}
formed_sentence = ""
# Set up logging
logging.basicConfig(level=logging.INFO)

# Translation dictionary
translation_dict = {
    '0':'0',
    '1':'1', 
    '2':'2', 
    '3':'3', 
    '4':'4', 
    '5':'5', 
    '6':'6', 
    '7':'7', 
    '8':'8', 
    '9':'9', 
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}

def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # If the word is not in the translation dictionary, use the original text

def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text

# Define the dictionary of combinable words
combinable_words = {
    'ALEFBAAYAA': 'أبي',
    'ALEFBAA': 'أب',
}

def overlay_text_on_frame(frame, words):
    padding = 10
    text_height = 0

    # Calculate the total width of the text
    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        text_height = max(text_height, text_image.shape[0])

    if text_height == 0:
        return frame

    x = 0
    frame_height, frame_width, _ = frame.shape

    for word in words:
        if word in combinable_words:
            text = combinable_words[word]
            text_image = convert_text_to_image(text)
        else:
            text = word
            text_image = convert_text_to_image(word)

        text_width = text_image.shape[1]
        text_image = cv2.resize(text_image, (text_width, text_height))  # Resize the text image

        text_image = np.pad(text_image, ((0, 0), (padding, padding), (0, 0)), constant_values=(0, 0))
        text_image_height, text_image_width, _ = text_image.shape

        if x + text_image_width > frame_width:
            break

        text_image = cv2.putText(text_image, text, (padding, text_image_height - padding),
                                 font_face, font_scale, font_color, thickness, cv2.LINE_AA)

        frame[text_height:, x:x + text_image_width] = text_image
        x += text_image_width

    return frame


def detect_objects(frame):
    global formed_sentence, word_combinations

    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame.copy()

    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)

        if conf < 0.5:
            continue

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame_with_text, list(word_combinations.keys()))

        if translated_text != "":
            formed_sentence += translated_text
            if len(formed_sentence) >= 2:  # Check if there are at least two characters in the formed sentence
                if formed_sentence in combinable_words.values():
                    word_combinations[formed_sentence] = True
                    formed_sentence = ""  # Reset the formed sentence for the next combination
                else:
                    word_combinations[translated_text] = True
                    formed_sentence = ""

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))

    # Perform object detection and modification on the frame
    detect_objects(frame)

    if len(word_combinations) > 0:
        frame_with_text = overlay_text_on_frame(frame, list(word_combinations.keys()))
    else:
        frame_with_text = frame

    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        return False

    return True


while True:
    if not process_frames():
        break

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

# Combine the words from the dictionary
combined_words = ' '.join(word_combinations.keys())
print("Combined Words:", combined_words)




In [None]:
import logging

import arabic_reshaper
import cv2
from bidi.algorithm import get_display
from ultralytics import YOLO

import numpy as np
from PIL import Image, ImageDraw, ImageFont


yolo_model = YOLO("/Users/mernaziad/Desktop/final_train/runs/detect/train/weights/best.pt")
cap = cv2.VideoCapture(0)

font_face = cv2.FONT_HERSHEY_SIMPLEX
font_color = (255, 255, 255)
thickness = 2
font_path = "/Users/mernaziad/PycharmProjects/GraduationProject/arabic_font.ttf"
font_size = 32  # Adjust the font size as needed
# Set the font properties
font_scale = 1
font_thickness = 2

# Set up logging
logging.basicConfig(level=logging.INFO)

translation_dict = {
    'ALIF':'أ', 
    'AYN':'ع', 
    'Atman lak 7aya sa3eeda' :'أتمنى لك حياة سعيدة', 
    'BAA':'ب', 
    'DAD':'ض', 
    'DELL':'د', 
    'DHAA':'ظ', 
    'DHELL':'ذ', 
    'FAA':'ف', 
    'GHAYN':'غ', 
    'HA':'هـ', 
    'HAA':'ح', 
    'JEEM':'ج', 
    'KAAF':'ك', 
    'KHAA':'خ', 
    'LAAM':'ل', 
    'MEEM':'م', 
    'QAAF':'ق', 
    'RAA':'ر', 
    'SAD':'ص', 
    'SEEN':'س', 
    'SHEEN':'ش', 
    'TA':'ت', 
    'TAA':'ط', 
    'THA':'ث', 
    'WAW':'و', 
    'YA':'ي', 
    'ZAY':'ز', 
    'bad':'سيء', 
    'del':'حذف', 
    'eqtibas':'اقتباس', 
    'good':'جيد', 
    'law sama7t':'لو سمحت', 
    'merhaba':'مرحبا', 
    'nothing':'لا شيء', 
    'o7ebok':'أحبك', 
    'oraqebak':'أراقبك', 
    'space':'مسافة', 
    'you':'أنت', 
}
 


def translate_to_arabic(text):
    if text in translation_dict:
        return translation_dict[text]
    else:
        return text  # إذا لم تكن الكلمة موجودة في القاموس، استخدم النص الأصلي


def convert_text_to_image(text, background_color=(0, 0, 0, 0)):
    # Load the specified font
    font = ImageFont.truetype(font_path, font_size)

    # Create a new image with a transparent background
    image = Image.new('RGBA', (1, 1), background_color)

    # Create a draw object
    draw = ImageDraw.Draw(image)

    # Calculate the text size
    text_width, text_height = draw.textsize(text, font=font)

    # Add some padding to the text size
    padding = 10
    image_width = text_width + padding
    image_height = text_height + padding

    # Create a new image with the adjusted dimensions and background color
    image = Image.new('RGBA', (image_width, image_height), background_color)

    # Create a new draw object with the adjusted image
    draw = ImageDraw.Draw(image)

    # Calculate the position to place the text at the bottom of the image
    text_x = (image.width - text_width) // 2
    text_y = image.height - text_height - padding

    # Draw the text on the image
    draw.text((text_x, text_y), text, font=font, fill=font_color)

    # Convert the PIL image to RGB mode
    image = image.convert("RGB")

    # Convert the PIL image to a NumPy array
    image_array = np.array(image)

    return image_array

def get_arabic_text(text):
    reshaped_text = arabic_reshaper.reshape(text)
    bidi_text = get_display(reshaped_text)
    return bidi_text


def overlay_text_on_frame(frame, text_image):
    padding = 10
    y = frame.shape[0] - text_image.shape[0] - padding
    x = (frame.shape[1] - text_image.shape[1]) // 2
    frame[y:y + text_image.shape[0], x:x + text_image.shape[1]] = text_image
    

    
    return frame

def detect_objects(frame):
    results = yolo_model.predict(frame)
    result = results[0]
    frame_with_text = frame
    for box in results[0].boxes:
        cords = box.xyxy[0].tolist()
        cords = [round(x) for x in cords]
        class_id = result.names[box.cls[0].item()]
        translated_text = translate_to_arabic(class_id)
        expected_text = get_arabic_text(translated_text)

        conf = round(box.conf[0].item(), 2)
        logging.info("Object type:{} {}".format(expected_text, class_id))
        logging.info("Coordinates: {}".format(cords))
        logging.info("Probability: {}".format(conf))

        if conf < 0.5:
            return

        text_image = convert_text_to_image(expected_text)
        frame_with_text = overlay_text_on_frame(frame, text_image)
        
        
    # Display the modified frame
    cv2.imshow("Object Detection", frame_with_text)


def process_frames():
    ret, frame = cap.read()
    if frame.shape[:2] != (800, 600):
        frame = cv2.resize(frame, (800, 600))
    # Perform object detection and modification on the frame
    detect_objects(frame)
    # Break the loop if 'q' is pressed


while True:
    process_frames()
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# executor = concurrent.futures.ThreadPoolExecutor(max_workers=4)
# # Submit the process_frames function to the executor
# executor.submit(process_frames)
# # Wait for the 'q' key to be pressed
# while cv2.waitKey(1) & 0xFF != ord('q'):
#     pass

# # Shutdown the executor
# executor.shutdown()

# Release the video capture and close the window
cap.release()
cv2.destroyAllWindows()

