In [3]:
import numpy as np
import pandas as pd

import torch

from transformers import AutoImageProcessor
from datasets import Dataset, DatasetDict, Features, Image, Sequence, Value, concatenate_datasets
import albumentations as A
from albumentations.pytorch import ToTensorV2
import PIL
import cv2

import os

  check_for_updates()


In [1]:
data_path = "dataset_segmented/"

In [4]:
train_df = pd.read_csv(data_path + "train/_classes.csv")
valid_df = pd.read_csv(data_path + "valid/_classes.csv")
test_df = pd.read_csv(data_path + "test/_classes.csv")

In [9]:
# Append image path to filename
def add_image_path(df, split):
    df['image'] = data_path + split + "/" + split + "_" + df['filename']
    df = df[df['image'].apply(os.path.exists)]
    return df[['image', 'labels']]

train_dataset_df = add_image_path(train_df, "train")
valid_dataset_df = add_image_path(valid_df, "valid")
test_dataset_df = add_image_path(test_df, "test")

In [10]:
train_dataset_df

Unnamed: 0,image,labels
0,dataset_segmented/train/train_image_00000.png,"[0.0, 0.0, 1.0]"
1,dataset_segmented/train/train_image_00001.png,"[1.0, 1.0, 1.0]"
2,dataset_segmented/train/train_image_00002.png,"[0.0, 1.0, 0.0]"
3,dataset_segmented/train/train_image_00003.png,"[0.0, 1.0, 0.0]"
4,dataset_segmented/train/train_image_00004.png,"[0.0, 1.0, 1.0]"
...,...,...
614,dataset_segmented/train/train_image_00614.png,"[1.0, 1.0, 1.0]"
615,dataset_segmented/train/train_image_00615.png,"[1.0, 1.0, 0.0]"
616,dataset_segmented/train/train_image_00616.png,"[1.0, 0.0, 1.0]"
617,dataset_segmented/train/train_image_00617.png,"[1.0, 1.0, 1.0]"


In [None]:
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.5),
    A.HueSaturationValue(hue_shift_limit=15, sat_shift_limit=25, val_shift_limit=15, p=0.5),
    A.RandomGamma(gamma_limit=(90, 110), p=0.5),
    A.GaussNoise(var_limit=(0.0, 0.01), p=0.5)
])

In [None]:
# Function to apply augmentations
def augment_images(df, transform):
    augmented_images = []
    for idx, row in df.iterrows():
        image = cv2.imread(row['image'])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        augmented = transform(image=image)
        augmented_images.append(augmented['image'])
    df['augmented_image'] = augmented_images
    return df

# Apply augmentations to the training set
train_dataset_df = augment_images(train_dataset_df, transform)

# Combine original and augmented images into one dataset
combined_dataset_df = pd.concat([train_dataset_df[['image', 'labels']], train_dataset_df[['augmented_image', 'labels']].rename(columns={'augmented_image': 'image'})], ignore_index=True)