# Artificial Neural Networks and Deep Learning

---

## Homework 2: Minimal Working Example

To make your first submission, follow these steps:
1. Create a folder named `[2024-2025] AN2DL/Homework 2` in your Google Drive.
2. Upload the `mars_for_students.npz` file to this folder.
3. Upload the Jupyter notebook `Homework 2 - Minimal Working Example.ipynb`.
4. Load and process the data.
5. Implement and train your model.
6. Submit the generated `.csv` file to Kaggle.

## ⚙️ Import Libraries

In [None]:
# Set seed for reproducibility
seed = 42

# Import necessary libraries
import os

# Set environment variables before importing modules
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# Import necessary modules
import logging
import random
import numpy as np
import imagehash
from IPython.display import FileLink

# Set seeds for random number generators in NumPy and Python
np.random.seed(seed)
random.seed(seed)

# Import TensorFlow and Keras
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Set seed for TensorFlow
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

# Reduce TensorFlow verbosity
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# Import other libraries
import os
import math
from PIL import Image
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from skimage.util import random_noise
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import seaborn as sns
import albumentations as A
from albumentations.core.composition import OneOf
from albumentations import Rotate, RandomScale, Compose
import cv2

# Configure plot display settings
sns.set(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tfk.__version__}")
print(f"GPU devices: {len(tf.config.list_physical_devices('GPU'))}")

## ⏳ Load the Data

In [None]:
data = np.load("/kaggle/input/data-sets/mars_for_students.npz")

print(data.files)
print(f"Training set shape: {data['training_set'].shape}")
print(f"Test set shape: {data['test_set'].shape}")

In [None]:
# Split the files provided. As shown in the previous cell, the test set has
# no labels
training_set = data["training_set"]
X_data = training_set[:, 0]
y_data = training_set[:, 1]

X_test = data["test_set"]

print(f"Training X shape: {X_data.shape}")
print(f"Training y shape: {y_data.shape}")
print(f"Test X shape: {X_test.shape}")

In [None]:
# Verify the minimum and maximum values in X_data and X_test
print("Original data range:")
print(f"X_data: Min = {X_data.min()}, Max = {X_data.max()}")
print(f"X_test: Min = {X_test.min()}, Max = {X_test.max()}")

In [None]:
input_shape = (64, 128, 1)
output_shape = len(np.unique(y_data))

print(f"Output shape: {output_shape}")

In [None]:
# Count the number of pixels for each class
class_counts = np.zeros(output_shape, dtype=int)
for mask in y_data:
    class_counts += np.bincount(mask.flatten().astype(int), minlength=output_shape)

# Calculate the proportion of pixels for each class
total_pixels = class_counts.sum()
class_distribution = class_counts / total_pixels

# Print the results in a formatted way
print("Number of pixels and percentage distribution per class:")
for class_index, (count, percentage) in enumerate(zip(class_counts, class_distribution * 100)):
    print(f"Class {class_index}: {count} pixels ({percentage:.2f}%)")

# Plot the distribution
class_labels = [f"Class {i}" for i in range(output_shape)]

plt.figure(figsize=(5, 4))
plt.bar(class_labels, class_distribution * 100)
plt.xlabel("Classes", fontsize=10)
plt.ylabel("Pixel percentage", fontsize=10)
plt.title("Class Distribution in Training set", fontsize=12)
plt.xticks(fontsize=9)
plt.yticks(fontsize=9)
plt.tight_layout()
plt.show()

In [None]:
# Calculate histograms
bins = np.linspace(0, 255, 50)  # Define 50 bins between 0 and 255
train_hist, _ = np.histogram(X_data.flatten(), bins=bins)
test_hist, _ = np.histogram(X_test.flatten(), bins=bins)

# Create two subplots side by side
fig, axs = plt.subplots(1, 2, figsize=(12, 5), sharey=True)  # Same y-axis for easier comparison

# Training histogram
axs[0].hist(X_data.flatten(), bins=bins, color='blue', alpha=0.7, label="Training")
axs[0].set_title("Training Pixel Distribution")
axs[0].set_xlabel("Pixel Intensity")
axs[0].set_ylabel("Count")
axs[0].legend()

# Test histogram
axs[1].hist(X_test.flatten(), bins=bins, color='orange', alpha=0.7, label="Test")
axs[1].set_title("Test Pixel Distribution")
axs[1].set_xlabel("Pixel Intensity")
axs[1].legend()

# Adjust layout and show plot
plt.tight_layout()
plt.show()

In [None]:
# Dictionary for the classes names
class_names = {
    0: "Background",
    1: "Soil",
    2: "Bedrock",
    3: "Sand",
    4: "Big Rock"
}

In [None]:
def display_random_samples(images, masks=None, class_names=None, num_samples=None, fontsize=None):
    # Define figure size and grid layout. Two columns if masks exist, otherwise one
    cols = 3 if masks is not None else 1  # Aggiungi una colonna extra per la legenda
    fig = plt.figure(figsize=(12, num_samples * 4))  # Modifica la dimensione del grafico
    gs = gridspec.GridSpec(num_samples, cols, hspace=0.3, wspace=0.4)

    for i in range(num_samples):
        # Select a random index
        idx = random.randint(0, len(images) - 1)

        # Plot the image
        ax1 = fig.add_subplot(gs[i, 0])
        ax1.imshow(images[idx].squeeze(), cmap='gray')
        ax1.set_title(f"Image {idx}", fontsize=fontsize)
        ax1.axis('off')

        if masks is not None:
            # Plot the corresponding mask
            ax2 = fig.add_subplot(gs[i, 1])
            mask = masks[idx].squeeze()

            # Ensure mask values are integers corresponding to class indices
            mask = mask.astype(int)

            # Plot the mask with a proper colormap
            num_classes = len(class_names)
            colormap = plt.cm.get_cmap("viridis", num_classes)
            ax2.imshow(mask, cmap=colormap, vmin=0, vmax=num_classes - 1)
            ax2.set_title(f"Mask {idx}", fontsize=fontsize)
            ax2.axis('off')

            # Add legend in a separate subplot
            ax3 = fig.add_subplot(gs[i, 2])
            ax3.axis('off')  # Rimuove gli assi per la legenda

            # Create legend elements
            legend_elements = [
                mpatches.Patch(color=colormap(c / (num_classes - 1)), label=class_names[c])
                for c in range(num_classes)
            ]

            # Add the legend to the figure
            ax3.legend(
                handles=legend_elements,
                loc="center",
                fontsize=fontsize,
                frameon=False
            )

    plt.tight_layout(pad=0.5)
    plt.show()

In [None]:
# Visualize 5 random samples
display_random_samples(X_data, masks=y_data, class_names=class_names, num_samples=5, fontsize=8)

In [None]:
# Visualize 5 random samples from the test set (without masks)
display_random_samples(X_test, num_samples=5, fontsize=8)

In [None]:
# Indices of the images and masks to inspect
sample_indices = [1370, 2161]

# Set up the plot for multiple images and masks
plt.figure(figsize=(10, len(sample_indices) * 5))  # Adjust the height based on the number of samples

for i, sample_index in enumerate(sample_indices):
    # Display the image
    plt.subplot(len(sample_indices), 2, i * 2 + 1)
    plt.imshow(X_data[sample_index].squeeze(), cmap='gray')  # cmap='gray' for grayscale images
    plt.title(f"Image {sample_index} (X_data)")
    plt.axis('off')

    # Display the mask
    plt.subplot(len(sample_indices), 2, i * 2 + 2)
    plt.imshow(y_data[sample_index].squeeze(), cmap='viridis')  # cmap='viridis' for masks
    plt.title(f"Mask {sample_index} (y_data)")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Alien image mask used as a reference
mask_reference = y_data[1370]
mask_tuple = tuple(mask_reference.flatten())

# Find all the duplicate images
duplicate_indices = [
    idx for idx, mask in enumerate(y_data)
    if tuple(mask.flatten()) == mask_tuple
]

print(f"Found {len(duplicate_indices)} images with identical masks to the reference.")

# Filter the dataset deleting the duplicate images
original_size = len(y_data)
remaining_indices = [idx for idx in range(len(y_data)) if idx not in duplicate_indices]
X_data = X_data[remaining_indices]
y_data = y_data[remaining_indices]


print(f"Original dataset size: {original_size}")
print(f"Filtered dataset size: {len(y_data)}")
print(f"Number of removed images: {len(duplicate_indices)}")

In [None]:
# Calculate the class distribution for each mask
class_profiles_data = np.array([
    np.bincount(mask.flatten().astype(int), minlength=output_shape) / mask.size
    for mask in y_data
])

# Perform a split in train_val and test while respecting the distribution
X_train_val, X_our_test, y_train_val, y_our_test = train_test_split(
    X_data, y_data,
    test_size=0.2,
    random_state=seed,
    stratify=class_profiles_data.argmax(axis=1)
)

# Compute again the class distribution for each mask
class_profiles_train_val = np.array([
    np.bincount(mask.flatten().astype(int), minlength=output_shape) / mask.size
    for mask in y_train_val
])

# Perform a split in train and validation while respecting the distribution
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val,
    test_size=0.25,  # 25% is equal to the 20% of total
    random_state=seed,
    stratify=class_profiles_train_val.argmax(axis=1)
)

# Controlla la distribuzione
print(f"Training set: {len(X_train)} samples")
print(f"Validation set: {len(X_val)} samples")
print(f"Test set: {len(X_our_test)} samples")

In [None]:
# Function to compute and display class distribution
def compute_class_distribution(y, output_shape):
    # Flatten the labels to predominant classes per mask
    y_flat = np.array([np.argmax(np.bincount(mask.flatten().astype(int), minlength=output_shape)) for mask in y])

    # Count the number of masks for each class
    predominant_classes, class_counts = np.unique(y_flat, return_counts=True)

    # Create a dictionary to include all classes, even those with zero counts
    class_distribution = {class_label: 0 for class_label in range(output_shape)}
    for label, count in zip(predominant_classes, class_counts):
        class_distribution[label] = count

    return class_distribution

# Compute class distribution for training, validation, and test sets
train_distribution = compute_class_distribution(y_train, output_shape)
val_distribution = compute_class_distribution(y_val, output_shape)
test_distribution = compute_class_distribution(y_our_test, output_shape)

# Prepare data for visualization
labels = [f"Class {i}" for i in range(output_shape)]
train_counts = [train_distribution[i] for i in range(output_shape)]
val_counts = [val_distribution[i] for i in range(output_shape)]
test_counts = [test_distribution[i] for i in range(output_shape)]

# Plot the distributions
x = np.arange(output_shape)  # Class indices

plt.figure(figsize=(10, 6))
plt.bar(x - 0.2, train_counts, width=0.2, label='Training', color='darkblue')
plt.bar(x, val_counts, width=0.2, label='Validation', color='orange')
plt.bar(x + 0.2, test_counts, width=0.2, label='Test', color='green')
plt.xticks(x, labels, fontsize=10)
plt.xlabel("Classes", fontsize=12)
plt.ylabel("Number of Masks", fontsize=12)
plt.title("Class Distribution in Training, Validation, and Test Sets", fontsize=14)
plt.legend(fontsize=10)
plt.tight_layout()
plt.show()

In [None]:
# The following functions are needed to transfer class 4 portions from image with class 4 pixels
# images that do not have them

def extract_class_4_portion(image, mask, target_class=4, crop_size=(16, 16)):
    # Find the indices of all pixels belonging to the target class
    indices = np.argwhere(mask == target_class)
    if len(indices) == 0:
        return None, None  # No portion of the target class found
    
    # Select a random center pixel from the target class
    center = random.choice(indices)
    y, x = center
    
    # Calculate the cropping boundaries
    crop_y1 = max(0, y - crop_size[0] // 2)
    crop_y2 = min(mask.shape[0], y + crop_size[0] // 2)
    crop_x1 = max(0, x - crop_size[1] // 2)
    crop_x2 = min(mask.shape[1], x + crop_size[1] // 2)
    
    # Crop the image and mask around the center pixel
    cropped_image = image[crop_y1:crop_y2, crop_x1:crop_x2]
    cropped_mask = mask[crop_y1:crop_y2, crop_x1:crop_x2]
    
    return cropped_image, cropped_mask

def apply_transfer(image, mask, class_4_portion, class_4_mask):
    # Get the dimensions of the portion to transfer
    h, w = class_4_portion.shape[:2]
    max_y, max_x = image.shape[:2]
    
    # Randomly select the position to paste the portion
    paste_y = random.randint(0, max_y - h)
    paste_x = random.randint(0, max_x - w)

    # Create copies of the original image and mask
    new_image = image.copy()
    new_mask = mask.copy()
    
    # Paste the portion and mask onto the new image and mask
    new_image[paste_y:paste_y + h, paste_x:paste_x + w] = class_4_portion
    new_mask[paste_y:paste_y + h, paste_x:paste_x + w] = class_4_mask
    
    return new_image, new_mask

# Define a transformation pipeline with rotation and scaling
transform = Compose([
    Rotate(limit=15, p=0.5),  # Randomly rotate within ±15 degrees
    RandomScale(scale_limit=0.1, p=0.5),  # Randomly scale by ±10%
])

# Percentage of images without class 4 to modify
transfer_percentage = 0.8
num_images = len(X_train)
num_to_transfer = int(transfer_percentage * num_images)

# Find indices of images without class 4 in their masks
indices_without_class_4 = [i for i, mask in enumerate(y_train) if 4 not in np.unique(mask)]

# Randomly select a subset of these images to modify
indices_to_modify = random.sample(indices_without_class_4, num_to_transfer)

# Initialize lists to store transformed images and masks
transformed_X_train = []
transformed_y_train = []

for i, (image, mask) in enumerate(zip(X_train, y_train)):
    if i in indices_to_modify:
        # Select a random image that contains class 4
        class_4_index = random.choice([j for j, m in enumerate(y_train) if 4 in np.unique(m)])
        class_4_image = X_train[class_4_index]
        class_4_mask = y_train[class_4_index]
        
        # Extract a random portion of class 4 from the selected image
        portion, portion_mask = extract_class_4_portion(class_4_image, class_4_mask, target_class=4)
        if portion is not None:
            # Apply light transformations to the extracted portion
            transformed = transform(image=portion, mask=portion_mask)
            portion_transformed = transformed['image']
            mask_transformed = transformed['mask']
            
            # Overlay the transformed portion onto the current image
            new_image, new_mask = apply_transfer(image, mask, portion_transformed, mask_transformed)
            transformed_X_train.append(new_image)
            transformed_y_train.append(new_mask)
        else:
            # If no portion could be extracted, add the original image and mask
            transformed_X_train.append(image)
            transformed_y_train.append(mask)
    else:
        # Add unmodified images and masks to the dataset
        transformed_X_train.append(image)
        transformed_y_train.append(mask)

# Convert the transformed data back to NumPy arrays
X_train_new = np.array(transformed_X_train)
y_train_new = np.array(transformed_y_train)


In [None]:
# Visualize 5 random samples
display_random_samples(X_train_new, masks=y_train_new, class_names=class_names, num_samples=5, fontsize=8)

In [None]:
# Count the number of pixels for each class
class_counts = np.zeros(output_shape, dtype=int)
for mask in y_train_new:
    class_counts += np.bincount(mask.flatten().astype(int), minlength=output_shape)

# Calculate the proportion of pixels for each class
total_pixels = class_counts.sum()
class_distribution = class_counts / total_pixels

# Print the results in a formatted way
print("Number of pixels and percentage distribution per class:")
for class_index, (count, percentage) in enumerate(zip(class_counts, class_distribution * 100)):
    print(f"Class {class_index}: {count} pixels ({percentage:.2f}%)")

# Plot the distribution
class_labels = [f"Class {i}" for i in range(output_shape)]

plt.figure(figsize=(5, 4))
plt.bar(class_labels, class_distribution * 100)
plt.xlabel("Classes", fontsize=10)
plt.ylabel("Pixel percentage", fontsize=10)
plt.title("Class Distribution in Training set", fontsize=12)
plt.xticks(fontsize=9)
plt.yticks(fontsize=9)
plt.tight_layout()
plt.show()

In [None]:
# General transformations
general_augmentations = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    
    A.ElasticTransform(alpha=1, sigma=10, alpha_affine=10, p=0.5), 
    
], p=1,additional_targets={'mask': 'mask'})

# Transformations for class 4
focused_augmentations = A.Compose([
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, p=0.5),
    A.RandomScale(scale_limit=0.2, p=0.5), 
    A.ElasticTransform(alpha=2, sigma=20, alpha_affine=20, p=0.5),
    A.CropNonEmptyMaskIfExists(height=48, width=96, p=0.5),
], p=1,additional_targets={'mask': 'mask'})


In [None]:
# Zoom for images with class 4

def zoom_on_class_4(image, mask, zoom_factor=1.1, seed=None):
    np.random.seed(seed)  
    height, width = mask.shape

    class_4_pixels = np.argwhere(mask == 4)

    # return the original images and mask if class 4 pixels are not present
    if class_4_pixels.size == 0:
        return image, mask

    # Compute bounding box around class 4 pixels
    top_left = class_4_pixels.min(axis=0)
    bottom_right = class_4_pixels.max(axis=0)

    top, left = top_left
    bottom, right = bottom_right

    # Add margin to the bounding box based on the zoom factor
    margin_h = int((bottom - top) * (zoom_factor - 1) / 2)
    margin_w = int((right - left) * (zoom_factor - 1) / 2)

    # Limit the coordinates to stay within the image boundaries
    top = max(0, top - margin_h)
    bottom = min(height, bottom + margin_h)
    left = max(0, left - margin_w)
    right = min(width, right + margin_w)

    # Perform the crop operation
    cropped_image = image[top:bottom, left:right]
    cropped_mask = mask[top:bottom, left:right]

    # Safety check: if the crop is empty, return the original image and mask
    if cropped_image.size == 0 or cropped_mask.size == 0:
        return image, mask

    # Resize the cropped region to match the original dimensions
    zoomed_image = cv2.resize(cropped_image, (width, height), interpolation=cv2.INTER_LINEAR)
    zoomed_mask = cv2.resize(cropped_mask, (width, height), interpolation=cv2.INTER_NEAREST)

    return zoomed_image, zoomed_mask


In [None]:
# Oversampling
augmented_X_train = []
augmented_y_train = []

In [None]:
# Loop over the augmented data
for image, mask in zip(X_train_new, y_train_new):
    # General transformation
    for _ in range(2): 
        augmented = general_augmentations(image=image, mask=mask)
        
        # Ridimensiona per garantire uniformità
        augmented_image = cv2.resize(augmented['image'], (128, 64))
        augmented_mask = cv2.resize(augmented['mask'], (128, 64), interpolation=cv2.INTER_NEAREST)
        
        augmented_X_train.append(augmented_image)
        augmented_y_train.append(augmented_mask)

    # Transformations for image with class 4
    if 4 in np.unique(mask):
        zoomed_image, zoomed_mask = zoom_on_class_4(image, mask, zoom_factor=9.5)

        for _ in range(4): 
            augmented = focused_augmentations(image=zoomed_image, mask=zoomed_mask)
            
            augmented_image = cv2.resize(augmented['image'], (128, 64))
            augmented_mask = cv2.resize(augmented['mask'], (128, 64), interpolation=cv2.INTER_NEAREST)
            
            augmented_X_train.append(augmented_image)
            augmented_y_train.append(augmented_mask)

augmented_X_train = np.array(augmented_X_train)
augmented_y_train = np.array(augmented_y_train)


In [None]:
# Count the number of pixels for each class
class_counts = np.zeros(output_shape, dtype=int)
for mask in augmented_y_train:
    class_counts += np.bincount(mask.flatten().astype(int), minlength=output_shape)

# Calculate the proportion of pixels for each class
total_pixels = class_counts.sum()
class_distribution = class_counts / total_pixels

# Print the results in a formatted way
print("Number of pixels and percentage distribution per class:")
for class_index, (count, percentage) in enumerate(zip(class_counts, class_distribution * 100)):
    print(f"Class {class_index}: {count} pixels ({percentage:.2f}%)")

# Plot the distribution
class_labels = [f"Class {i}" for i in range(output_shape)]

plt.figure(figsize=(5, 4))
plt.bar(class_labels, class_distribution * 100)
plt.xlabel("Classes", fontsize=10)
plt.ylabel("Pixel percentage", fontsize=10)
plt.title("Class Distribution in Training set", fontsize=12)
plt.xticks(fontsize=9)
plt.yticks(fontsize=9)
plt.tight_layout()
plt.show()

In [None]:
# Combine original and augmented data
X_train_new2 = np.concatenate([X_train_new, augmented_X_train])
y_train_new2 = np.concatenate([y_train_new, augmented_y_train])

# Debug: Print statistics
print(f"Original training set size: {len(X_train)}")
print(f"Augmented training set size: {len(augmented_X_train)}")
print(f"Final training set size: {len(X_train_new2)}")


In [None]:
# Count the number of pixels for each class
class_counts = np.zeros(output_shape, dtype=int)
for mask in y_train_new2:
    class_counts += np.bincount(mask.flatten().astype(int), minlength=output_shape)

# Calculate the proportion of pixels for each class
total_pixels = class_counts.sum()
class_distribution = class_counts / total_pixels

# Print the results in a formatted way
print("Number of pixels and percentage distribution per class:")
for class_index, (count, percentage) in enumerate(zip(class_counts, class_distribution * 100)):
    print(f"Class {class_index}: {count} pixels ({percentage:.2f}%)")

# Plot the distribution
class_labels = [f"Class {i}" for i in range(output_shape)]

plt.figure(figsize=(5, 4))
plt.bar(class_labels, class_distribution * 100)
plt.xlabel("Classes", fontsize=10)
plt.ylabel("Pixel percentage", fontsize=10)
plt.title("Class Distribution in Training set", fontsize=12)
plt.xticks(fontsize=9)
plt.yticks(fontsize=9)
plt.tight_layout()
plt.show()

In [None]:
# Visualize 5 random samples
display_random_samples(augmented_X_train, masks=augmented_y_train, class_names=class_names, num_samples=5, fontsize=8)

In [None]:
train_min = X_train_new2.min()
train_max = X_train_new2.max()

val_min = X_val.min()
val_max = X_val.max()

test_min = X_test.min()
test_max = X_test.max()

our_test_min = X_our_test.min()
our_test_max = X_our_test.max()

# Add color channel and rescale pixels between 0 and 1
X_train_new2 = (X_train_new2 - train_min) / (train_max - train_min)
X_train_new2 = X_train_new2[..., np.newaxis]

X_val = (X_val - val_min) / (val_max - val_min)
X_val = X_val[..., np.newaxis]

X_our_test = (X_our_test - our_test_min) / (our_test_max - our_test_min)
X_our_test = X_our_test[..., np.newaxis]

X_test = (X_test - test_min) / (test_max - test_min)
X_test = X_test[..., np.newaxis]


input_shape = X_train.shape[1:]
num_classes = len(np.unique(y_data))

print(f"Input shape: {input_shape}")
print(f"Number of classes: {num_classes}")

In [None]:
# Print shapes
print(f"X_train shape: {X_train_new2.shape}")
print(f"y_train shape: {y_train_new2.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"X_our_test shape: {X_our_test.shape}")
print(f"y_our_test shape: {y_our_test.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
print(f"Training set: Min = {X_train_new2.min()}, Max = {X_train_new2.max()}")
print(f"Validation set: Min = {X_val.min()}, Max = {X_val.max()}")
print(f"Our Test set: Min = {X_our_test.min()}, Max = {X_our_test.max()}")
print(f"Test set: Min = {X_test.min()}, Max = {X_test.max()}")

In [None]:
np.savez_compressed("mars_datasets_superaug.npz",
                    X_train=X_train_new2,
                    y_train=y_train_new2,
                    X_val=X_val,
                    y_val=y_val,
                    X_our_test = X_our_test,
                    y_our_test = y_our_test,
                    X_test=X_test)


FileLink("mars_datasets_superaug.npz")