In [11]:
import h5py
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import os

# Load the train ids
labels = np.load('data/train/labels/labels.npy')

# Zoom factor
zoom_factor = 15

# Create containers for images
tt_images = []
qcd_images = []

# Load the HDF5 file
with h5py.File('data/train/images/jet_images.h5', 'r') as f:
    images = f['images'][:]  # Load all images
    print(images.shape)

    for i, img_array in enumerate(images):
        # Normalize to uint8
        img_array = (img_array * 255).astype(np.uint8) if img_array.max() <= 1 else img_array.astype(np.uint8)
        img = Image.fromarray(img_array, mode='L')
        img = img.resize(
            (img.width * zoom_factor, img.height * zoom_factor),
            resample=Image.NEAREST
        )

        # Append to appropriate list
        if labels[i] == 1:
            tt_images.append(img)
        else:
            qcd_images.append(img)

# Helper function to save images in 4x4 grids to PDF
def save_images_to_pdf(image_list, output_path, title):
    with PdfPages(output_path) as pdf:
        for i in range(0, len(image_list), 16):
            fig, axs = plt.subplots(4, 4, figsize=(12, 12))
            fig.suptitle(title, fontsize=16)
            for j, ax in enumerate(axs.flat):
                if i + j < len(image_list):
                    ax.imshow(image_list[i + j], cmap='gray')
                    ax.axis('off')
                else:
                    ax.axis('off')
            pdf.savefig(fig)
            plt.close(fig)

# Save both PDFs
save_images_to_pdf(tt_images, 'data/train/images/output_images/TT_images.pdf', title='TT Events')
save_images_to_pdf(qcd_images, 'data/train/images/output_images/QCD_images.pdf', title='QCD Events')

(3520, 30, 30)
