In [3]:
import os
import time
import random
import itertools
import cv2 as cv
import numpy as np
import pandas as pd
import tensorflow as tf

In [4]:
folder = 'Train'
folder_with_new_images = 'new_train'
csv = 'train.csv'
csv_for_new_images = 'new_train.csv'

In [7]:
class Preprocessing:
    def __init__(self, data_path, csv_path, new_data_path, new_csv_path):
        self.data_path = data_path
        self.new_data_path = new_data_path
        self.new_csv_path = new_csv_path
        
        self.images = [f'{data_path}/{image_path}' for image_path in os.listdir(data_path)]
        self.image_nr = 0
        self.df = pd.read_csv(csv_path)

    def show_image(self, image):
        cv.imshow('Image', image)
        cv.waitKey(0) 
        cv.destroyAllWindows() 

    def crop_images(self, image):
        image_half = int(image.shape[0]/2)
        topleft_image = image[0:image_half, 0:image_half]
        topright_image = image[0:image_half, image_half-1:-1]
        bottomleft_image = image[image_half-1:-1, 0:image_half]
        bottomright_image = image[image_half-1:-1, image_half-1:-1]
        return (topleft_image, topright_image, bottomleft_image, bottomright_image)
        
    def create_permutations(self, image):
        topleft, topright, bottomleft, bottomright = self.crop_images(image)
        perms = list(itertools.permutations([topleft, topright, bottomleft, bottomright]))
        for permutation in perms:
            top_image = np.concatenate((permutation[0], permutation[1]), axis=1)
            bottom_image = np.concatenate((permutation[2], permutation[3]), axis=1)
            image = np.concatenate((top_image, bottom_image), axis=0)
            image = cv.resize(image, (512, 512))
            cv.imwrite(f'{self.new_data_path}/{self.image_nr}.jpg', image)
            self.image_nr += 1
            
    def get_new_images(self):
        for image_path in self.images:
            image = cv.imread(image_path)
            self.create_permutations(image)

        for image_path in self.images:
            image = cv.imread(image_path)
            image = cv.flip(image, 1)
            self.create_permutations(image)

    def get_new_train_csv(self):
        df_duplicates = self.df.reindex(self.df.index.repeat(24)).reset_index(drop=True)

        df_duplicates = pd.concat([df_duplicates, df_duplicates])
        df_duplicates['House ID'] = range(0, len(df_duplicates))
        
        df_duplicates = df_duplicates.reset_index(drop=True)
        df_duplicates.to_csv(self.new_csv_path, index=False)

    def shuffle_images(self):
        numbers = list(range(len(os.listdir(self.new_data_path))))
        random.shuffle(numbers)
        
        df = pd.read_csv(self.new_csv_path)
        df['House ID'] = numbers
        df = df.sort_values('House ID')
        df.to_csv(self.new_csv_path, index=False)
        
        for image_name, new_image_name in zip(list(range(len(os.listdir(self.new_data_path)))), numbers):
            os.rename(f'{self.new_data_path}/{image_name}.jpg', f'{self.new_data_path}/a{new_image_name}.jpg')
        
            

In [8]:
pp = Preprocessing(folder, csv, folder_with_new_images, csv_for_new_images)
pp.get_new_images()
pp.get_new_train_csv()

In [9]:
pp.shuffle_images()