In [2]:
import os
import numpy as np
import random
from collections import Counter
from itertools import combinations

source_dirs = [f"training_set_{i}" for i in range(5)]
output_dir = "training_set_5"
os.makedirs(output_dir, exist_ok=True)

# Gather all samples
samples_by_coord = {}
for d in source_dirs:
    for fname in os.listdir(d):
        if fname.endswith(".npy"):
            x, y, _ = map(int, fname[:-4].split("_"))
            key = (x, y)
            if key not in samples_by_coord:
                samples_by_coord[key] = []
            matrix = np.load(os.path.join(d, fname)).astype("float32")
            samples_by_coord[key].append(matrix)

# Max target count
target_count = max(len(v) for v in samples_by_coord.values())

# Interpolate for undersampled coords
for (x, y), matrices in samples_by_coord.items():
    current_count = len(matrices)
    needed = target_count - current_count
    if needed <= 0:
        continue

    print(f"Upsampling ({x},{y}) by {needed} samples")

    for i in range(needed):
        a, b = random.sample(matrices, 2)
        alpha = random.uniform(0.3, 0.7)
        new_matrix = alpha * a + (1 - alpha) * b
        new_matrix = np.clip(new_matrix, 0, 1)

        filename = f"{x}_{y}_{i}.npy"
        np.save(os.path.join(output_dir, filename), new_matrix)


new_files_created

Upsampling (3,2) by 40 samples
Upsampling (2,0) by 80 samples
Upsampling (0,3) by 40 samples
Upsampling (1,1) by 80 samples
Upsampling (3,0) by 40 samples
Upsampling (2,2) by 40 samples
Upsampling (0,1) by 80 samples
Upsampling (1,3) by 40 samples
Upsampling (2,4) by 40 samples
Upsampling (1,5) by 80 samples
Upsampling (3,4) by 40 samples
Upsampling (0,5) by 80 samples
Upsampling (4,5) by 80 samples
Upsampling (5,1) by 40 samples
Upsampling (5,5) by 40 samples
Upsampling (4,1) by 40 samples
Upsampling (3,5) by 40 samples
Upsampling (0,4) by 80 samples
Upsampling (2,5) by 40 samples
Upsampling (1,4) by 40 samples
Upsampling (5,0) by 40 samples
Upsampling (3,1) by 40 samples
Upsampling (1,2) by 40 samples
Upsampling (0,0) by 80 samples
Upsampling (4,4) by 40 samples
Upsampling (2,1) by 80 samples
Upsampling (3,3) by 40 samples
Upsampling (1,0) by 80 samples
Upsampling (0,2) by 80 samples
Upsampling (5,2) by 80 samples
Upsampling (4,2) by 40 samples
Upsampling (4,0) by 40 samples
Upsampli

1800