In [1]:
import torch
from IPython.display import Image  # for displaying images
import os 
import random
import shutil
from sklearn.model_selection import train_test_split
import pandas as pd
from tqdm import tqdm
from PIL import Image, ImageDraw
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
import glob

random.seed(108)

In [2]:
def load_normalize(image_path, normalized=True):
    image = Image.open(image_path).resize((512, 512))
    image= np.array(image).astype(np.float32)
    if len(image.shape) == 3:
        image = image.sum(axis=2)
    if normalized:
        return image / 255.0
    else:
        return image

In [3]:
annotations = pd.read_csv('annotations.csv')
annotations.head()

Unnamed: 0,Patient ID,Start Row,End Row,Start Column,End Column,Start Slice,End Slice,size,start_row_scaled,start_column_scaled,end_row_scaled,end_column_scaled,x_center,y_center,width,height
0,Breast_MRI_001,234,271,308,341,89,112,"(448, 448)",0.52,0.69,0.6,0.76,0.72,0.56,0.07,0.08
1,Breast_MRI_002,251,294,108,136,59,72,"(512, 512)",0.49,0.21,0.57,0.27,0.24,0.53,0.06,0.08
2,Breast_MRI_003,351,412,82,139,96,108,"(512, 512)",0.69,0.16,0.8,0.27,0.22,0.74,0.11,0.11
3,Breast_MRI_004,262,280,193,204,86,95,"(512, 512)",0.51,0.38,0.55,0.4,0.39,0.53,0.02,0.04
4,Breast_MRI_005,188,213,138,178,76,122,"(448, 448)",0.42,0.31,0.48,0.4,0.35,0.45,0.09,0.06


In [4]:
annotations.tail()

Unnamed: 0,Patient ID,Start Row,End Row,Start Column,End Column,Start Slice,End Slice,size,start_row_scaled,start_column_scaled,end_row_scaled,end_column_scaled,x_center,y_center,width,height
299,Breast_MRI_305,223,247,117,137,128,140,"(512, 512)",0.44,0.23,0.48,0.27,0.25,0.46,0.04,0.04
300,Breast_MRI_306,284,319,344,383,87,102,"(512, 512)",0.55,0.67,0.62,0.75,0.71,0.58,0.08,0.07
301,Breast_MRI_307,142,200,286,327,115,142,"(448, 448)",0.32,0.64,0.45,0.73,0.69,0.39,0.09,0.13
302,Breast_MRI_308,272,308,96,152,77,98,"(512, 512)",0.53,0.19,0.6,0.3,0.24,0.56,0.11,0.07
303,Breast_MRI_309,381,397,365,386,101,113,"(512, 512)",0.74,0.71,0.78,0.75,0.73,0.76,0.04,0.04


In [5]:
annotations["ID"] = annotations["Patient ID"].apply(lambda x: int(x.split("_")[-1]))

In [6]:
annotations["ID"][annotations["ID"] == 1].index[0]

0

In [7]:
pos_images = np.zeros((304, 512, 512), dtype=np.float32)
all_pos_images = glob.glob(os.path.join('/workspaces/breast-tumor-detection/Data/Original_Data/pos/*.png'))
for image_path in all_pos_images:
    id = image_path.split('/')[-1].split('.')[0].split('-')[-1][-3:]
    img = load_normalize(image_path,normalized=False)
    ind = annotations["ID"][annotations["ID"] == int(id)].index[0]
    pos_images[ind] = img

In [8]:
len(pos_images)

304

In [9]:
with open("annotations.txt") as file:
    annotation = [line.rstrip() for line in file]

In [10]:
annotation[0]

'1.0\t0.72\t0.56\t0.07\t0.08'

In [19]:
neg_images = np.zeros((304, 512, 512), dtype=np.float32)
all_neg_images = glob.glob(os.path.join('/workspaces/breast-tumor-detection/Data/Original_Data/neg/*.png'))
for i,image_path in enumerate(all_neg_images):
    img = load_normalize(image_path,normalized=False)
    neg_images[i] = img

In [22]:
images = np.concatenate((pos_images, neg_images), axis=0)

In [23]:
annotation+=['0.0\t0.0\t0.0\t0.0\t0.0']*304

In [24]:
# Split the dataset into train-valid-test splits 
train_images, val_images, train_annotations, val_annotations = train_test_split(images, annotation, test_size = 0.2, random_state = 1)
val_images, test_images, val_annotations, test_annotations = train_test_split(val_images, val_annotations, test_size = 0.5, random_state = 1)

In [13]:
# !mkdir datasets/images/train datasets/images/val datasets/images/test datasets/labels/train datasets/labels/val datasets/labels/test
!mkdir datasets/labels/train datasets/labels/val datasets/labels/test

mkdir: cannot create directory ‘datasets/labels/train’: File exists
mkdir: cannot create directory ‘datasets/labels/val’: File exists
mkdir: cannot create directory ‘datasets/labels/test’: File exists


In [32]:
for i in range(0, len(train_images)):
    im = Image.fromarray(train_images[i])
    if im.mode != 'RGB':
        im = im.convert('RGB')
    im.save("/workspaces/breast-tumor-detection/Model/yolov5/datasets/images/train/" + str(i) + ".png")

In [33]:
for i in range(0, len(val_images)):
    im = Image.fromarray(val_images[i])
    if im.mode != 'RGB':
        im = im.convert('RGB')
    im.save("/workspaces/breast-tumor-detection/Model/yolov5/datasets/images/val/" + str(i) + ".png")
for i in range(0, len(test_images)):
    im = Image.fromarray(test_images[i])
    if im.mode != 'RGB':
        im = im.convert('RGB')
    im.save("/workspaces/breast-tumor-detection/Model/yolov5/datasets/images/test/" + str(i) + ".png")

In [34]:
for i in range(0, len(train_annotations)):
    with open("/workspaces/breast-tumor-detection/Model/yolov5/datasets/labels/train/" + str(i) + ".txt", "w") as file:
        file.write(train_annotations[i])

for i in range(0, len(val_annotations)):
    with open("/workspaces/breast-tumor-detection/Model/yolov5/datasets/labels/val/" + str(i) + ".txt", "w") as file:
        file.write(val_annotations[i])

for i in range(0, len(test_annotations)):
    with open("/workspaces/breast-tumor-detection/Model/yolov5/datasets/labels/test/" + str(i) + ".txt", "w") as file:
        file.write(test_annotations[i])