In [3]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt


def cut_rectangular_image_into_square(image):
    height, width, channels = image.shape
    
    # Find the size of the square (minimum dimension)
    square_size = min(height, width)
    
    # Calculate the starting coordinates for cropping
    start_row = (height - square_size) // 2
    start_col = (width - square_size) // 2
    
    # Crop the image into a square
    square_image = image[start_row:start_row + square_size, start_col:start_col + square_size]
    
    return square_image


def resize_square_image(image):
    # Calculate the scale factor for resizing
    scale_factor = 256 / max(image.shape[:2])
    
    # Resize the image using bilinear interpolation
    resized_image = cv2.resize(image, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
    
    return resized_image

def show_image(image):
    # Convert the BGR image to RGB (if necessary)
    if len(image.shape) == 3 and image.shape[2] == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Display the image
    plt.imshow(image)
    plt.axis('off')  # Turn off axis
    plt.show()




def read_images_into_array(folder_path):
    # Get a list of all files in the folder
    files = os.listdir(folder_path)
    
    # Filter out only the JPEG files
    jpeg_files = [file for file in files if file.endswith('.jpeg') or file.endswith('.jpg')]
    
    # Sort the file names to maintain order
    jpeg_files.sort()
    
    # Read the first image to get its dimensions
    sample_image = cv2.imread(os.path.join(folder_path, jpeg_files[0]))
    height, width, channels = sample_image.shape
    images = []
    # Read each image and store it in the array
    for i, file_name in enumerate(jpeg_files):
        image = cut_rectangular_image_into_square(cv2.imread(os.path.join(folder_path, file_name)))
        if(image.shape[0]<256):
            continue
        image = resize_square_image(image)
        images.append(image)
        # show_image(image)
    images = np.array(images)
    return images

# Example usage:
folder_path = "../chest_xray/train/NORMAL/"
images = read_images_into_array(folder_path)
folder_path = "../chest_xray/train/PNEUMONIA/"
images2 = read_images_into_array(folder_path)


my_images_train = np.vstack((images[:500],images2[:500]))
my_images_test = np.vstack((images[500:600],images2[500:600]))
np.random.shuffle(my_images_train)
np.random.shuffle(my_images_test)

# print(images.shape)
# print(images2.shape)

np.save("train_images_small_mix", my_images_train)
np.save("test_images_small_mix", my_images_test)