<a href="https://colab.research.google.com/github/timuriz/ObjectSegmentationMarsWS25/blob/all_in_one_file_ipynb/mars_semantic_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DATASET PREPARATION

---



In [None]:
import os, cv2 # os - files, oper. sys., cv2 - image processing
import numpy as np # arrays
import pandas as pd # excel

from google.colab import drive
drive.mount('/content/drive')

"""
# links on folders with dataset

"""

data_folder = '/content/drive/MyDrive/Mashine Learning/Project/AI4Mars_dataset' # link of dataset folder

img_train_folder = '/content/drive/Othercomputers/Ноутбук/train_images'
mask_train_folder = '/content/drive/Othercomputers/Ноутбук/train_labels'

img_valid_folder = '/content/drive/Othercomputers/Ноутбук/valid_images'
mask_valid_folder = '/content/drive/Othercomputers/Ноутбук/valid_masks'

img_test_folder = '/content/drive/Othercomputers/Ноутбук/test_images'
mask_test_folder = '/content/drive/Othercomputers/Ноутбук/test_labels'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
"""

# get name of classes and their values

"""

class_dataframe = pd.read_csv('/content/drive/MyDrive/University study/Mashine Learning/Project/labels.csv') # pandas dataframe

class_names = class_dataframe['name'].tolist()

class_gray_values = class_dataframe['gray'].values.tolist()


print('Class Names: ', class_names)
print('Class gray values: ', class_gray_values)

Class Names:  ['soil', 'bedrock', 'sand', 'big rock', 'rest']
Class gray values:  [0, 1, 2, 3, 255]


In [None]:
import os
import numpy as np
import cv2
from torch.utils.data import Dataset

"""

# implementation of the Dataset class

"""

class CreatImageDataset(Dataset):

    def __init__(self, img_folder, mask_folder, transforms):
      self.img_folder = img_folder
      self.mask_folder = mask_folder
      self.transforms = transforms

      self.img_paths = os.listdir(self.img_folder)
      self.img_paths.sort()

      self.mask_paths = os.listdir(self.mask_folder)
      self.mask_paths.sort()


    # return the amount of files
    def __len__(self):
        return len(self.img_paths)

    # form tensors of the input images and masks
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_folder, self.img_paths[idx])
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # get Numpy arrays

        mask_path = os.path.join(self.mask_folder, self.mask_paths[idx])
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) # get Numpy arrays

        # apply an augmentation
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        mask = mask.long()

        return img, mask


AUGMENTATION

In [None]:
"""
# augmentation functions

"""
import albumentations as aug
from albumentations.pytorch import ToTensorV2


def get_train_transforms(image_size=(512, 512)):
    h, w = image_size
    return aug.Compose([     # albumentations transformation pipeline builder
      aug.Resize(height=h, width=w, interpolation=0), # resize the image and the mask, apply nearest-neighbor interpolation
      aug.RandomCrop(height=int(h * 0.9), width=int(w * 0.9), p=0.5), # crop images and mask with probability 50%(0.5) to decide to convert or not
      aug.HorizontalFlip(p=0.5), # flip with probability 50%
      aug.VerticalFlip(p=0.1),
      aug.Affine(rotate=(-15, 15), scale=(0.9, 1.1), translate_percent=(0.0625, 0.0625), border_mode=0, p=0.5),  # Rotate → rotate around the center (-15% to +15%)
                                                                                                                 # Scale → zoom in or out, scale factor [0.9, 1.1]
                                                                                                                 # shift → new pixels on the left are filled with a value 0 (shift on 6.25% of image size)
                                                                                                                 # border_mode=0 - fill outside area with 0 pixels
      aug.RandomRotate90(p=0.25),
      aug.Normalize(mean=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225)),  # image only, uses ImageNet mean/std by default if we have, else 0-1 normalization
      ToTensorV2() # converts NumPy arrays into PyTorch tensors
    ], additional_targets={}) # no additional masks

def get_valid_transforms(image_size=(512, 512)):
    h, w = image_size
    return aug.Compose([
        aug.Resize(height=h, width=w, interpolation=0),
        aug.Normalize(mean=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])


# visualization



In [None]:
"""

visualization function to preview images and masks

"""

import matplotlib.pyplot as plt

def visualize(**images) -> None: # function does not return anything

    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([]) # remove tick marks and numbers from the x- and y-axes
        plt.yticks([]) # remove tick marks and numbers from the x- and y-axes
        plt.title(" ".join(name.split("_")).title()) # formats and sets the title above the image
        plt.imshow(image)
    plt.show()

In [None]:
"""

the class for unnormolize image for preliminary visualization

"""

class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):

        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m) # apply the return function (x = x_norm ​× s + m)

        return tensor


unnorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

In [None]:
train_transform = get_train_transforms()
train_dataset = CreatImageDataset(img_train_folder, mask_train_folder, train_transform)

test_transform = get_valid_transforms()
test_dataset = CreatImageDataset(img_test_folder, mask_test_folder, test_transform)

valid_transform = get_valid_transforms()
valid_dataset = CreatImageDataset(img_valid_folder, mask_valid_folder, valid_transform)

In [None]:
"""

call visualization

"""

idx = np.random.randint(len(train_dataset))
image, mask = train_dataset[idx]
visualize(image=unnorm(image).permute(1, 2, 0), mask=mask)

DATALOADER

In [None]:
from torch.utils.data import DataLoader

batch_size = 32

n_workers = os.cpu_count()
print("num_workers = ", n_workers) # show how many CPU cores are available

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True, # randomly shuffles the order of samples at the beginning of each epoch.
    num_workers=n_workers,
    drop_last=True, # last smaller batch is discarded
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=n_workers,
    drop_last=False,
)

valid_dataloader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=n_workers,
    drop_last=False,
)

num_workers =  2


# MODEL

---



# TRAINING

---



# EVALUATION

---



# DEPLOYMENT

---

