## CODE TO TRAIN THE OBJECT DETECTION MODEL

In [None]:
!pip install ultralytics

In [2]:

from ultralytics import YOLO


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [4]:
!ls "/content/drive/MyDrive/cs478ModelTraining"

 300_epochs_model_results	   failing_image_OD_training
 300epochs_predictions		   failing_image_retrained_results
 collection_2			   final_predictions
'Collection 3'			   gradescope_final_results
'CS 473 ERD Paper.gdoc'		   gradescope_final_results_retrained
'CS 473 ERD Paper.pdf'		   gradescope_predictions_first_model
 CS473_ObjectDetection_OCR.ipynb   gradescope_predictions_retrained_2nd_model
 CS473_ObjectDetection.py	   TA_graded_final
 CS473_Stage2.ipynb		   test2.v1i.yolov8
'CS473 Stage 2 Presentation.mp4'


In [5]:
project_dir = '/drive/MyDrive/cs478ModelTraining'
def runModel(runName, predictionName,  epochs, batch_size, early_stop):

  # TRAINING THE MODEL

  model = YOLO('yolov8n.yaml')
  model.train(data='/content/drive/MyDrive/cs478ModelTraining/test2.v1i.yolov8/data.yaml', epochs = epochs,
            imgsz=640,
            project = '/content/drive/MyDrive/cs478ModelTraining',
            name = runName,
            resume=True,
            exist_ok=True,
            patience=early_stop)
  model.export(format='onnx')

  # RUNNING MODEL ON TEST

  results = model.predict(
    source='drive/MyDrive/cs478ModelTraining/test2.v1i.yolov8/test/images',
    save=True,
    save_conf=False,
    save_txt=True,
    show_conf = True,
    show_labels = True,
    show_boxes = True,
    exist_ok = True,
    name=predictionName,
  )


  # GETTING METRICS

  # Validate the model and print metrics
  metrics = model.val()

  print(dir(metrics))
  # Assuming `maps` holds mAP values for each class
  print("mAP for each class:", metrics.maps)
  print(dir(metrics.box))
  print(f"mAP@0.5: {metrics.box.map50}")
  print(f"mAP@0.5:0.95: {metrics.box.map}")
  print(f"Precision: {metrics.box.p}")
  print(f"Recall: {metrics.box.r}")


In [6]:
# Final function
# input: path to an ERD image
# output: bounding box on image + coordinates
def predictOneImage(image_path, model, predictionName):

  results = model.predict(
    source = image_path,
    imgsz = 640,
    save=True,
    save_txt=True,
    show_conf = True,
    show_labels = True,
    show_boxes = True,
    name=predictionName,
  )

In [7]:
# TRAINING CELL
# runModel('300_epochs_model_results', '300_epochs_predictions',  300, 0, 15)

In [8]:
# TRAINING CELL
# model = YOLO("/content/drive/MyDrive/cs478ModelTraining/300_epochs_model_results/weights/last.pt")
# model.train(resume=True)
# model.export(format='onnx')

In [None]:
# TRAINING CELL
# model = YOLO('yolov8n.yaml')
# model.train(data='/content/drive/MyDrive/cs478ModelTraining/failing_image_OD_training/data.yaml', epochs = 300,
#             imgsz=640,
#             project = '/content/drive/MyDrive/cs478ModelTraining',
#             name = "failing_image_retrained_results",
#             resume=True,
#             exist_ok=True,
#             patience=15)
# model.export(format='onnx')

In [10]:
# TRAINING CELL
# model = YOLO("/content/drive/MyDrive/cs478ModelTraining/failing_image_retrained_results/weights/last.pt")
# model.train(resume=True, patience=15)
# model.export(format='onnx')


## LOADING FINAL OBJECT DETECTION MODEL WITH BEST WEIGHTS & GETTING RESULTS

In [11]:
model = YOLO("/content/drive/MyDrive/cs478ModelTraining/failing_image_retrained_results/weights/best.pt")


In [None]:
# RUNNING MODEL ON TEST


results = model.predict(
  source='drive/MyDrive/cs478ModelTraining/test2.v1i.yolov8/test/images',
  save=True,
  save_conf=False,
  save_txt=True,
  show_conf = True,
  show_labels = True,
  show_boxes = True,
  exist_ok = True,
  name="300epochs_predictions",
)


# GETTING METRICS

# Validate the model and print metrics
metrics = model.val()

print(dir(metrics))
# Assuming `maps` holds mAP values for each class
print("mAP for each class:", metrics.maps)
print(dir(metrics.box))
print(f"mAP@0.5: {metrics.box.map50}")
print(f"mAP@0.5:0.95: {metrics.box.map}")
print(f"Precision: {metrics.box.p}")
print(f"Recall: {metrics.box.r}")

## OCR TO EXTRACT TEXT FROM DIAGRAMS

In [None]:
pip install git+https://github.com/JaidedAI/EasyOCR.git

In [None]:
!pip install easyocr


In [15]:
import os
import cv2
import easyocr

In [None]:
reader = easyocr.Reader(['en'])

In [18]:
# Function to get image dimensions
def get_image_dimensions(image_path):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to load the image at {image_path}")
        return None
    height, width = image.shape[:2]
    return width, height

In [19]:
# Function to read YOLO prediction files (extracts class ID and bounding box)
def read_yolo_predictions(file_path):
    predictions = []
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            predictions.append({
                'class_id': class_id,
                'x_center': x_center,
                'y_center': y_center,
                'width': width,
                'height': height
            })
    return predictions

In [20]:
# Function to crop image using YOLO bounding box
def crop_image_using_yolo(image, x_center, y_center, width, height, img_width, img_height):
  x1 = int((x_center - width / 2) * img_width)
  y1 = int((y_center - height / 2) * img_height)
  x2 = int((x_center + width / 2) * img_width)
  y2 = int((y_center + height / 2) * img_height)

  x1, y1 = max(0, x1), max(0, y1)
  x2, y2 = min(img_width, x2), min(img_height, y2)

  cropped_image = image[y1:y2, x1:x2]

  return cropped_image

In [21]:
# Function to apply OCR on the cropped image
def ocr_on_cropped_image(cropped_image):
  ocr_result = reader.readtext(cropped_image, min_size=5, contrast_ths=0.7, adjust_contrast=0.5, add_margin=0.2)
  text = [res[1] for res in ocr_result]
  return text

In [22]:
# Load the trained YOLOv8 model with our weights
model = YOLO('/content/drive/MyDrive/cs478ModelTraining/failing_image_retrained_results/weights/best.pt')


In [23]:
results = model.predict(
    source='drive/MyDrive/cs478ModelTraining/collection_2',
    save=True,
    save_conf=False,
    save_txt=True,
    show_conf = True,
    show_labels = True,
    show_boxes = True,
    exist_ok = True,
    project="drive/MyDrive/cs478ModelTraining/",
    name="gradescope_predictions_retrained_2nd_model",
)



image 1/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/1.png: 224x640 3 entitys, 2 rels, 7 rel_attrs, 87.6ms
image 2/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/2.png: 576x640 3 entitys, 5 rels, 1 weak_entity, 203.1ms
image 3/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/3.png: 544x640 4 entitys, 1 ident_rel, 3 rels, 10 rel_attrs, 1 weak_entity, 176.1ms
image 4/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/4.png: 640x640 3 entitys, 6 ident_rels, 3 rels, 3 rel_attrs, 3 weak_entitys, 196.6ms
image 5/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/5.png: 640x640 3 entitys, 2 ident_rels, 3 rels, 3 weak_entitys, 196.4ms
image 6/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/6.png: 320x640 3 entitys, 5 rels, 2 weak_entitys, 118.3ms
image 7/8 /content/drive/MyDrive/cs478ModelTraining/collection_2/7.png: 640x640 4 entitys, 1 ident_rel, 5 rels, 1 weak_entity, 319.0ms
image 8/8 /content/drive/MyDrive/cs478ModelTraining/collection_2

In [24]:
import os

# THE FILE PATH TO FOLDER OF OUTPUT OF YOLOV8 MODEL THAT IS LOADED in ABOVE CELL. FORMAT IS OBJECT NUMBER DETECTED, then the coordinates of the object
prediction_folder = '/content/drive/MyDrive/cs478ModelTraining/gradescope_predictions_retrained_2nd_model/labels/'
# THE FILE PATH TO FOLDER OF IMAGES YOU WANT TO RUN OCR ON
image_folder = '/content/drive/MyDrive/cs478ModelTraining/collection_2/'
# WHERE TO STORE RESULTS FROM OCR
results_folder = '/content/drive/MyDrive/cs478ModelTraining/TA_graded_final'

if os.path.exists(prediction_folder):
    print("Folder exists!")
else:
    print("Folder does not exist!")

if os.path.exists(image_folder):
    print("Folder exists!")
else:
    print("Folder does not exist!")

os.makedirs(results_folder, exist_ok=True)
# Dictionary mapping Class IDs to labels
class_id_to_label = {
    0: 'entity',
    1: 'ident_rel',
    2: 'rel',
    3: 'rel_attr',
    4: 'weak_entity'
}

Folder exists!
Folder exists!


# Going through all test images and combining results from Object Detection and OCR to get all the different entities, relationships, and attributes AND the text in the diagrams

In [25]:
prediction_files = [f for f in os.listdir(prediction_folder) if f.endswith('.txt')]
j = 0
for prediction_file in prediction_files:
    prediction_file_path = os.path.join(prediction_folder, prediction_file)
    print(prediction_folder)
    print(prediction_file)
    print(prediction_file_path)
    image_file = prediction_file.replace('.txt', '.png')
    image_path1 = os.path.join(image_folder, image_file)

    if os.path.exists(image_path1):
        image_dimensions = get_image_dimensions(image_path1)
        if image_dimensions is None:
            continue
        image_width, image_height = image_dimensions

        predictions = read_yolo_predictions(prediction_file_path)

        image = cv2.imread(image_path1)

        ocr_results = []

        i = 0
        # Iterate over all predictions and crop based on bounding boxes
        print(predictions)
        for prediction in predictions:
            x_center = prediction['x_center']
            y_center = prediction['y_center']
            width = prediction['width']
            height = prediction['height']

            # Step 1: Crop the image based on YOLO bounding boxes
            cropped_image1 = crop_image_using_yolo(image, x_center, y_center, width, height, image_width, image_height)

            # Step 2: Apply OCR on the cropped image
            text = ocr_on_cropped_image(cropped_image1)

            ocr_results.append({
                'class_id': prediction['class_id'],
                'coordinates': (x_center, y_center, width, height),
                'text': text
            })

            i = i + 1

        j = j + 1

        # 1. Create a new file with image_file name replaced with .txt at end and save it into results_folder
        result_file_name = image_file.replace('.png', '.txt')
        result_file_path = os.path.join(results_folder, result_file_name)
        print(ocr_results)
        with open(result_file_path, 'w') as file:
          print(f"OCR results for {image_file}:")
          for result in ocr_results:
              # get the class label as text from the map
              class_label = class_id_to_label.get(result['class_id'], 'unknown')

              # POST PROCESSING STEP 1 --> remove leading and trailing white spaces
              result['text'] = [text.strip() for text in result['text']]

              # POST PROCESSING STEP 2 --> if a rel, rel_attr or ident_rel got split into 2 words, combine into one
              if ((class_label == "rel" or class_label == "rel_attr" or class_label == "ident_rel") and (len(result['text']) > 1)):
                  final_string = result['text'][0]
                  for text in result['text'][1:]:
                    # if previous word ends with - or _ OR current word starts with those
                    if (final_string and text and (final_string[-1] == '_' or final_string[-1] == '-' or text[0] == '_' or text[0] == '-')):
                      # if previous ends with _ or - and current starts with that
                      if ((final_string[-1] == '_' and text[0] == '_') or (final_string[-1] == '-' and text[0] == '-')):
                        # don't double count the - or _
                        final_string = final_string + text[1:]
                      else:
                        final_string = final_string + text
                    else:
                      final_string = final_string + " " + text

                  result['text'] = [final_string]

              # format the final text
              formatted_text = f"['{class_label}', " + ", ".join(f"'{text}'" for text in result['text']) + "]\n"
              # write the final results to the file
              file.write(formatted_text)

              print(f"Class ID: {result['class_id']}, Coordinates: {result['coordinates']}, Extracted Text: {result['text']}")
          print("-" * 40)
    else:
        print(f"Image not found for {prediction_file}")

['1.txt', '2.txt', '3.txt', '4.txt', '5.txt', '6.txt', '7.txt', '8.txt']
/content/drive/MyDrive/cs478ModelTraining/gradescope_predictions_retrained_2nd_model/labels/
1.txt
/content/drive/MyDrive/cs478ModelTraining/gradescope_predictions_retrained_2nd_model/labels/1.txt
[{'class_id': 3, 'x_center': 0.250663, 'y_center': 0.719573, 'width': 0.0886889, 'height': 0.174882}, {'class_id': 3, 'x_center': 0.379076, 'y_center': 0.726323, 'width': 0.0889798, 'height': 0.169576}, {'class_id': 3, 'x_center': 0.775246, 'y_center': 0.0848242, 'width': 0.0887994, 'height': 0.169648}, {'class_id': 3, 'x_center': 0.674847, 'y_center': 0.081588, 'width': 0.087662, 'height': 0.163176}, {'class_id': 3, 'x_center': 0.319521, 'y_center': 0.918596, 'width': 0.0905732, 'height': 0.161333}, {'class_id': 0, 'x_center': 0.904086, 'y_center': 0.437399, 'width': 0.177846, 'height': 0.314782}, {'class_id': 0, 'x_center': 0.0982464, 'y_center': 0.443289, 'width': 0.1958, 'height': 0.332732}, {'class_id': 3, 'x_center