In [None]:
import cv2
import json
import base64
import torch
from PIL import Image
from io import BytesIO
from ultralytics import YOLO
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from collections import Counter
import yaml
import os
import pandas as pd
import os
import yaml
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt
from IPython.display import display

### Data Analysis

In [None]:
dataset_path = 'DataSet/Yolo'
output_path = 'DataAnalysis'
os.makedirs(output_path, exist_ok=True) 

with open(os.path.join(dataset_path, 'data.yaml'), 'r') as file:
    data = yaml.safe_load(file)
classes = data['names']

image_counts = {'train': 0, 'test': 0, 'valid': 0}
label_counter = Counter()
split_class_counts = {'train': Counter(), 'test': Counter(), 'valid': Counter()}

splits = ['train', 'test', 'valid']

for split in splits:
    annotations_path = os.path.join(dataset_path, split, 'labels')
    if os.path.exists(annotations_path):
        for filename in os.listdir(annotations_path):
            if filename.endswith('.txt'):
                image_counts[split] += 1  
                with open(os.path.join(annotations_path, filename), 'r') as file:
                    for line in file:
                        class_id = int(line.split()[0])
                        label_counter[class_id] += 1
                        split_class_counts[split][class_id] += 1

total_images_df = pd.DataFrame(list(image_counts.items()), columns=['Split', 'Image Count'])
labels_df = pd.DataFrame.from_dict(label_counter, orient='index', columns=['Count'])
labels_df.index = labels_df.index.map(lambda x: classes[x])


total_images_df.to_csv(os.path.join(output_path, 'total_images_by_split.csv'))
plt.figure(figsize=(8, 5))
plt.bar(total_images_df['Split'], total_images_df['Image Count'], color='lightcoral')
plt.title('Total Number of Images by Split')
plt.ylabel('Image Count')
plt.savefig(os.path.join(output_path, 'total_images_by_split.png'))
plt.close()  

print("Total Image Count by Split:")
display(total_images_df)


labels_df.to_csv(os.path.join(output_path, 'class_distribution_all_splits.csv'))
plt.figure(figsize=(10, 6))
labels_df.sort_values(by='Count', ascending=False).plot(kind='bar', legend=False, color='skyblue')
plt.title('Class Distribution Across All Splits')
plt.xlabel('Class')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.savefig(os.path.join(output_path, 'class_distribution_all_splits.png'))
plt.close()

print("Class Distribution Across All Splits:")
display(labels_df)

for split, df in split_class_dfs.items():
    df.to_csv(os.path.join(output_path, f'class_distribution_{split}.csv'))
    
    plt.figure(figsize=(10, 6))
    df['Count'].plot(kind='bar', color='orange', legend=False)
    plt.title(f'Class Distribution in {split.capitalize()} Split')
    plt.xlabel('Class')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.savefig(os.path.join(output_path, f'class_distribution_{split}.png'))
    plt.close()
    print(f"Class Distribution in {split.capitalize()} Split:")
    display(df[['Count']])

class_density = {}
for class_id, count in label_counter.items():
    class_name = classes[class_id]
    avg_instances = count / image_counts['train']
    class_density[class_name] = {
        'Average Instances': avg_instances,
        'Median Instances': count / 2  
}


class_density_df = pd.DataFrame.from_dict(class_density, orient='index')
class_density_df.to_csv(os.path.join(output_path, 'class_density.csv'))


print("Class Density (Average and Median Instances per Image):")
display(class_density_df)


plt.figure(figsize=(12, 6))


plt.hist(class_density_df['Average Instances'], bins=10, color='green', alpha=0.5, label='Average Instances')


plt.hist(class_density_df['Median Instances'], bins=10, color='purple', alpha=0.5, label='Median Instances')

plt.title('Histogram of Average and Median Instances per Class')
plt.xlabel('Instances per Image')
plt.ylabel('Frequency')
plt.legend()
plt.savefig(os.path.join(output_path, 'histogram_average_median_instances.png'))
plt.close()


Total Image Count by Split:


Unnamed: 0,Split,Image Count
0,train,2817
1,test,317
2,valid,621


Class Distribution Across All Splits:


Unnamed: 0,Count
Paper-Carton,3091
Metal,1899
Plastic,2015


Class Distribution in Train Split:


Unnamed: 0_level_0,Count
Class Name,Unnamed: 1_level_1
Paper-Carton,2312
Metal,1426
Plastic,1550


Class Distribution in Test Split:


Unnamed: 0_level_0,Count
Class Name,Unnamed: 1_level_1
Plastic,134
Paper-Carton,228
Metal,173


Class Distribution in Valid Split:


Unnamed: 0_level_0,Count
Class Name,Unnamed: 1_level_1
Metal,300
Paper-Carton,551
Plastic,331


Class Density (Average and Median Instances per Image):


Unnamed: 0,Average Instances,Median Instances
Paper-Carton,1.097267,1545.5
Metal,0.674121,949.5
Plastic,0.7153,1007.5


<Figure size 1000x600 with 0 Axes>

### Model Training

In [3]:
model = YOLO("yolov8n-seg.pt")

In [None]:
results = model.train(data="Data/data.yaml", epochs=100, imgsz=640, batch=16, patience=7, device="cpu")

New https://pypi.org/project/ultralytics/8.3.27 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.26  Python-3.10.10 torch-2.0.1+cu118 CPU (AMD Ryzen 9 5900X 12-Core Processor)
[34m[1mengine\trainer: [0mtask=segment, mode=train, model=yolov8n-seg.pt, data=Data/data.yaml, epochs=100, time=None, patience=7, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=Fa

[34m[1mtrain: [0mScanning D:\ThesisFinal\Data\train\labels.cache... 2817 images, 82 backgrounds, 103 corrupt: 100%|██████████| 2817/2817 [00:00<?, ?it/s]




[34m[1mval: [0mScanning D:\ThesisFinal\Data\valid\labels.cache... 621 images, 12 backgrounds, 27 corrupt: 100%|██████████| 621/621 [00:00<?, ?it/s]






Plotting labels to runs\segment\train2\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 66 weight(decay=0.0), 77 weight(decay=0.0005), 76 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\segment\train2[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   seg_loss   cls_loss   dfl_loss  Instances       Size


      1/100         0G      1.446       4.59      3.529       1.51         65        640:   1%|          | 2/170 [00:15<21:27,  7.66s/it]


KeyboardInterrupt: 

: 

### Evaluating Model

In [None]:
model = YOLO("Results/weights/best.pt")  

results = model.val(data="DataSet/Yolo/data.yaml", device='cpu')

Ultralytics 8.3.2  Python-3.10.10 torch-2.4.1+cu124 CPU (AMD Ryzen 9 5900X 12-Core Processor)
YOLOv8n-seg summary (fused): 213 layers, 2,937,369 parameters, 0 gradients, 10.7 GFLOPs


[34m[1mval: [0mScanning D:\ThesisFinal\Data\valid\labels.cache... 621 images, 12 backgrounds, 27 corrupt: 100%|██████████| 621/621 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 38/38 [00:34<00:00,  1.09it/s]


                   all        594       1151      0.752      0.639      0.707      0.523      0.741       0.63      0.685      0.463
                 Metal        228        289      0.858      0.799      0.871      0.701      0.858      0.799       0.87      0.666
          Paper-Carton        235        531      0.784      0.597       0.71      0.496       0.75      0.571      0.655      0.403
               Plastic        169        331      0.615      0.521      0.541      0.372      0.615      0.521      0.531      0.322
Speed: 1.0ms preprocess, 47.6ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns\segment\val4[0m


In [None]:
print("Evaluation Results:")
print(f"mAP@0.5: {results.results_dict['metrics/mAP50(B)']}")
print(f"mAP@0.5:0.95: {results.results_dict['metrics/mAP50-95(B)']}")
print(f"Precision: {results.results_dict['metrics/precision(B)']}")
print(f"Recall: {results.results_dict['metrics/recall(B)']}")

Evaluation Results:
mAP@0.5: 0.7072658425595644
mAP@0.5:0.95: 0.5230452962209783
Precision: 0.7523625419188452
Recall: 0.6392053834490808


In [None]:
results = model.val(data="DataSet/Yolo/data.yaml", device="cpu", plots=True)  # 'plots=True' generates the plots

Ultralytics 8.3.2  Python-3.10.10 torch-2.4.1+cu124 CPU (AMD Ryzen 9 5900X 12-Core Processor)


[34m[1mval: [0mScanning D:\ThesisFinal\Data\valid\labels.cache... 621 images, 12 backgrounds, 27 corrupt: 100%|██████████| 621/621 [00:00<?, ?it/s]




                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 38/38 [00:34<00:00,  1.09it/s]


                   all        594       1151      0.752      0.639      0.707      0.523      0.741       0.63      0.685      0.463
                 Metal        228        289      0.858      0.799      0.871      0.701      0.858      0.799       0.87      0.666
          Paper-Carton        235        531      0.784      0.597       0.71      0.496       0.75      0.571      0.655      0.403
               Plastic        169        331      0.615      0.521      0.541      0.372      0.615      0.521      0.531      0.322


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x00000230BF99B490>
Traceback (most recent call last):
  File "c:\Users\Jun\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 1477, in __del__
  File "c:\Users\Jun\AppData\Local\Programs\Python\Python310\lib\site-packages\torch\utils\data\dataloader.py", line 1435, in _shutdown_workers
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


Speed: 1.0ms preprocess, 47.9ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns\segment\val5[0m
