## Import necessary libraries

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

In [None]:
DATA_ROOT = "E:\\Datasets\\Br35H\\"

## Load necessary functions

In [None]:
def load_dataset(dataset_folder: str, split_type = 'train'):
    print(f"Loading {split_type} set of {dataset_folder} dataset")
    dataset_path = os.path.join(DATA_ROOT, dataset_folder)
    categories = os.listdir(os.path.join(dataset_path, split_type))

    X = []
    Y = []
    for category in categories:
        category_path = os.path.join(dataset_path, split_type, category)
        category_index = categories.index(category)
        for image_name in os.listdir(category_path):
            image_path = os.path.join(dataset_path, split_type, category, image_name)
            image = cv2.imread(image_path)
            image = cv2.resize(image, (128, 128))
            X.append(image)
            Y.append(category_index) 
            
    print(f"Loading done. Dataset size: {len(X)}\n")
            
    return X, Y

In [None]:
def write_dataset(X, Y, dataset_folder: str, split_type: str):
    print(f"Writing {split_type} set of {dataset_folder} dataset")
    i = 0
    dataset_path = os.path.join(DATA_ROOT, dataset_folder)
    
    try:
        os.mkdir(dataset_path)
    except:
        print(f'W folder {dataset_path} already exists')
    
    try:
        os.mkdir(os.path.join(dataset_path, split_type))
    except:
        print(f'W folder {os.path.join(dataset_path, split_type)} already exists')
        
    for category in set(Y):
        os.mkdir(os.path.join(dataset_path, split_type, str(category)))
        
    for image, category in zip(X, Y):
        image_name = f"{str(category)}_{str(i)}.jpg"
        i+=1
        dst = os.path.join(dataset_path, split_type, str(category), image_name)
        cv2.imwrite(dst, image)
        
    print("Writing done")

In [None]:
from scipy import ndimage

def rotate_dataset_imgs(X, angle: int):
    rotated_X = []
    for image in X:
        h, w = image.shape[:2]
        rotated = ndimage.rotate(image, angle)
        rotated = cv2.resize(rotated, (h, w))
        rotated_X.append(rotated)
    
    return rotated_X

## Create rotated image datasets

In [None]:
X_pre_text, Y_pre_text = load_dataset('data_v1.0.0','pre_text')
X_pre_text=np.array(X_pre_text).reshape(-1, 128, 128, 3)

In [None]:
pre_text_len = len(X_pre_text)
# X_train_2 dataset will contain images rotated by 2 degrees(No rotation)
X_train_2=rotate_dataset_imgs(X_pre_text, -2)

# X_train_4 dataset will contain images rotated by 4 degrees
X_train_4=rotate_dataset_imgs(X_pre_text, -4)

# X_train_6 dataset will contain images rotated by 6 degrees
X_train_6=rotate_dataset_imgs(X_pre_text, -6)

# X_train_8 dataset will contain images rotated by 8 degrees
X_train_8=rotate_dataset_imgs(X_pre_text, -8)

# X_train_10 dataset will contain images rotated by 10 degrees
X_train_10=rotate_dataset_imgs(X_pre_text, -10)

# Assigning pseudo-labels to rotated image datasets
Y_train_2=np.full(pre_text_len, 0)
Y_train_4=np.full(pre_text_len, 1)
Y_train_6=np.full(pre_text_len, 2)
Y_train_8=np.full(pre_text_len, 3)
Y_train_10=np.full(pre_text_len, 4)

In [None]:
# Visualize samples from X_train_2
plt.figure(figsize=(12,10))
i=1
for row in range(4):
    for column in range(5):
        plt.subplot(4, 5, i)
        image = X_train_2[i]
        plt.imshow(image)
        plt.axis('off')
        i += 1

In [None]:
# Concatenating Datasets
X_train_unlabeled_full=np.concatenate((X_train_2, X_train_4, X_train_6, X_train_8, X_train_10), axis=0)
Y_train_unlabeled_full=np.concatenate((Y_train_2, Y_train_4, Y_train_6, Y_train_8, Y_train_10), axis=0)

In [None]:
# Split rotated data
X_rot_train, X_rot_val, Y_rot_train, Y_rot_val = train_test_split(X_train_unlabeled_full, Y_train_unlabeled_full, train_size=0.80, stratify = Y_train_unlabeled_full)

In [None]:
write_dataset(X_rot_train, Y_rot_train, 'data_v1.3.0','train')
write_dataset(X_rot_val, Y_rot_val, 'data_v1.3.0','val')

## Split train set into smaller sets

In [None]:
X_train, Y_train = load_dataset('data_v1.0.0','train')
X_train=np.array(X_train).reshape(-1, 128, 128, 3)
X_train_smaller, _, Y_train_smaller, _ = train_test_split(X_train, Y_train, train_size=0.125, stratify = Y_train, random_state=351)
write_dataset(X_train_smaller, Y_train_smaller, 'data_v1.0.3','train')