In [83]:
# import libraries
import os 
import shutil
import random
import pandas as pd
import cv2
import yaml
import numpy as np
import matplotlib.pyplot as plt

In [30]:
data_dir = '../data'

In [31]:
raw_data_dir = os.path.join(data_dir, 'raw')
processed_data_dir = os.path.join(data_dir, 'processed')

In [32]:
os.makedirs(raw_data_dir, exist_ok=True)
os.makedirs(processed_data_dir, exist_ok=True)

In [33]:
sudoku_dataset_dir = os.path.join(raw_data_dir, 'sudoku_dataset')
# ! git clone https://github.com/wichtounet/sudoku_dataset.git ../data/raw/sudoku_dataset

In [34]:
sudoku_ocr_dataset_dir = os.path.join(processed_data_dir, 'sudoku_ocr_dataset')
os.makedirs(sudoku_ocr_dataset_dir, exist_ok=True)

In [44]:
localization_dataset_dir = os.path.join(processed_data_dir, 'localization_dataset')
os.makedirs(localization_dataset_dir, exist_ok=True)

In [51]:
localization_image_dir = os.path.join(localization_dataset_dir, 'images')
localization_label_dir = os.path.join(localization_dataset_dir, 'labels')

os.makedirs(localization_image_dir, exist_ok=True)
os.makedirs(localization_label_dir, exist_ok=True)

In [35]:
os.listdir(sudoku_dataset_dir)

['.git',
 '.gitignore',
 'baptiste.sh',
 'datasets',
 'images',
 'jean.sh',
 'mixed',
 'mixed_incomplete',
 'mixed_natural',
 'original',
 'outlines_sorted.csv',
 'README.rst',
 'tools',
 'wip']

In [36]:
image_dir = os.path.join(sudoku_dataset_dir, 'images')

In [37]:
annotation_path = os.path.join(sudoku_dataset_dir, 'outlines_sorted.csv')
annotation_df = pd.read_csv(annotation_path)
annotation_df.head()

Unnamed: 0,filepath,p1_x,p1_y,p2_x,p2_y,p3_x,p3_y,p4_x,p4_y
0,./images/image32.jpg,112,35,583,35,600,435,105,444
1,./images/image1082.jpg,101,270,885,272,872,1060,105,1053
2,./images/image125.jpg,13,11,409,0,423,415,10,427
3,./images/image50.jpg,41,10,552,4,568,443,30,446
4,./images/image188.jpg,14,10,534,9,538,447,20,457


In [38]:
orientation_dict = {
    "image1083": 1,
    "image1024": 2,
    "image1031": 2,
    "image1036": 2,
    "image1037": 2,
    "image1039": 2,
    "image1040": 2,
}

In [40]:
def create_ocr_dataset(): 
        
    for ann_idx, row in annotation_df.iterrows():
        file_path = row['filepath']
        coord_dict = row[row.keys()[1:]].to_dict()
        base_name = os.path.basename(file_path)
        image_path = os.path.join(image_dir, base_name)
        file_name = os.path.splitext(base_name)[0]
    
        orientation = orientation_dict.get(file_name)
            
        data_file_path = os.path.join(image_dir, f'{file_name}.dat')
        image = cv2.imread(image_path)
    
        data = pd.read_csv(data_file_path, delimiter='\t', skiprows=2, names=['data'])
        
        x_min = min(coord_dict['p1_x'], coord_dict['p2_x'], coord_dict['p3_x'], coord_dict['p4_x'])
        x_max = max(coord_dict['p1_x'], coord_dict['p2_x'], coord_dict['p3_x'], coord_dict['p4_x'])
        y_min = min(coord_dict['p1_y'], coord_dict['p2_y'], coord_dict['p3_y'], coord_dict['p4_y'])
        y_max = max(coord_dict['p1_y'], coord_dict['p2_y'], coord_dict['p3_y'], coord_dict['p4_y'])
        
        image = image[y_min: y_max, x_min: x_max]
    
        if orientation is not None:
            image = cv2.rotate(image, orientation)
        
        H, W, _ = image.shape
        
        h = int(H/9)
        w = int(W/9)
        
        for idx in range(9):
            file_name = f"{data['data'].iloc[idx].replace(' ', '')}_{ann_idx}.jpg"
            file_path = os.path.join(sudoku_ocr_dataset_dir, file_name)
            row_image = image[idx*h: (idx+1)*h, :]
            cv2.imwrite(file_path, row_image)
        

In [11]:
def get_width_height_stat():
    width_list, height_list = [], []
    
    for ann_idx, row in annotation_df.iterrows():
        coord_dict = row[row.keys()[1:]].to_dict()
        
        x_min = min(coord_dict['p1_x'], coord_dict['p2_x'], coord_dict['p3_x'], coord_dict['p4_x'])
        x_max = max(coord_dict['p1_x'], coord_dict['p2_x'], coord_dict['p3_x'], coord_dict['p4_x'])
        y_min = min(coord_dict['p1_y'], coord_dict['p2_y'], coord_dict['p3_y'], coord_dict['p4_y'])
        y_max = max(coord_dict['p1_y'], coord_dict['p2_y'], coord_dict['p3_y'], coord_dict['p4_y'])
    
        width = x_max - x_min
        height = y_max - y_min
        
        width_list.append(width)
        height_list.append(height)
    
    annotation_df['width'] = width_list
    annotation_df['height'] = height_list
    
    print('average weight:', annotation_df['width'].mean())
    print('average height:', annotation_df['height'].mean())

In [64]:
def create_annotations():
    
    for ann_idx, row in annotation_df.iterrows():
    
        file_path = row['filepath']
        coord_dict = row[row.keys()[1:]].to_dict()
    
        base_name = os.path.basename(file_path)
        source_path = os.path.join(image_dir, base_name)
        destination_path = os.path.join(localization_image_dir, base_name)
        file_name = os.path.splitext(base_name)[0]
    
        x_min = min(coord_dict['p1_x'], coord_dict['p2_x'], coord_dict['p3_x'], coord_dict['p4_x'])
        x_max = max(coord_dict['p1_x'], coord_dict['p2_x'], coord_dict['p3_x'], coord_dict['p4_x'])
        y_min = min(coord_dict['p1_y'], coord_dict['p2_y'], coord_dict['p3_y'], coord_dict['p4_y'])
        y_max = max(coord_dict['p1_y'], coord_dict['p2_y'], coord_dict['p3_y'], coord_dict['p4_y'])
    
        image = cv2.imread(source_path)
        H, W, _ = image.shape
        
        ann_class = 0
        width = x_max - x_min
        height = y_max - y_min
        
        x_center = x_min + (x_max-x_min)/2
        y_center = y_min + (y_max-y_min)/2
    
        x_center = x_center/W
        y_center = y_center/H
        width = width/W
        height = height/H
        
        label_path = os.path.join(localization_label_dir, f'{file_name}.txt')
        fh = open(label_path, 'w')
        fh.write(f'{ann_class} {x_center} {y_center} {width} {height}')
        fh.close()
    
        shutil.copyfile(src=source_path, dst=destination_path)

In [91]:
def split_dataset():

    image_files = os.listdir(localization_image_dir)[1:]
    random.shuffle(image_files)
    dataset_length = len(image_files)
    val_index = int(dataset_length/10)
    val_images, train_images = image_files[:val_index], image_files[val_index:]
    
    len(val_images), len(train_images)
    
    fh = open(os.path.join(localization_dataset_dir, 'train.txt'), 'w')
    for image in train_images:
        fh.write(f'{os.path.abspath(os.path.join(localization_image_dir, image))}\n')
    fh.close()
    
    fh = open(os.path.join(localization_dataset_dir, 'val.txt'), 'w')
    for image in val_images:
        fh.write(f'{os.path.abspath(os.path.join(localization_image_dir, image))}\n')
    fh.close()

In [92]:
# split_dataset()

In [102]:
def create_yaml_file():
    yaml_file = os.path.join(localization_dataset_dir, 'dataset.yaml')
    data = {
        'train': os.path.abspath(os.path.join(localization_dataset_dir, 'train.txt')),
        'val': os.path.abspath(os.path.join(localization_dataset_dir, 'val.txt')),
        'names': {0: 'puzzle'}
    }
    # print(os.path.abspath(os.path.abspath(localization_dataset_dir, 'train.txt')))
    with open(yaml_file, 'w') as file:
        yaml.dump(data, file)

In [103]:
# create_yaml_file()

In [105]:
from ultralytics import YOLO

model = YOLO("yolo11n.pt")

results = model.train(
    data=os.path.abspath(os.path.join(localization_dataset_dir, 'dataset.yaml')), 
    epochs=1, imgsz=640
)

New https://pypi.org/project/ultralytics/8.3.70 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.23  Python-3.12.1 torch-2.2.0+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=C:\Users\shbnd\Desktop\Work\sudoku-solver\data\processed\localization_dataset\dataset.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train7, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnost

[34m[1mtrain: [0mScanning C:\Users\shbnd\Desktop\Work\sudoku-solver\data\processed\localization_dataset\labels... 182 images, 0 backgrounds, 0 corrupt: 100%|████████[0m

[34m[1mtrain: [0mNew cache created: C:\Users\shbnd\Desktop\Work\sudoku-solver\data\processed\localization_dataset\labels.cache



[34m[1mval: [0mScanning C:\Users\shbnd\Desktop\Work\sudoku-solver\data\processed\localization_dataset\labels... 20 images, 0 backgrounds, 0 corrupt: 100%|██████████|[0m

[34m[1mval: [0mNew cache created: C:\Users\shbnd\Desktop\Work\sudoku-solver\data\processed\localization_dataset\labels.cache





Plotting labels to runs\detect\train7\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train7[0m
Starting training for 1 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/1         0G     0.6736      2.518      1.237         40        640:  58%|█████▊    | 7/12 [01:00<00:43,  8.60s/it]


KeyboardInterrupt: 

'C:\\Users\\shbnd\\Desktop\\Work\\sudoku-solver\\data\\processed\\localization_dataset\\dataset.yaml'