# 01 - Exploratory Data Analysis of RAW input data

In [1]:
# Import necessary libraries
import rasterio
from rasterio.enums import Resampling
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from skimage.feature import graycomatrix, graycoprops
from rasterio.warp import reproject, calculate_default_transform
from typing import Tuple, Dict, Any

# Set plotting style
sns.set(style="whitegrid")

In [None]:
# Utils functions

def plot_image(image: np.ndarray, title: str, cmap: str = 'viridis') -> None:
    """Plot a single band image.

    Args:
        image (np.ndarray): The image array to plot.
        title (str): Title of the plot.
        cmap (str): Colormap to use for the plot.
    """
    plt.figure(figsize=(10, 10))
    plt.imshow(image, cmap=cmap)
    plt.colorbar()
    plt.title(title)
    plt.axis('off')
    plt.show()

def plot_histogram(image: np.ndarray, title: str) -> None:
    """Plot a histogram of pixel values in an image.

    Args:
        image (np.ndarray): The image array to plot.
        title (str): Title of the histogram.
    """
    plt.figure(figsize=(10, 6))
    plt.hist(image.flatten(), bins=50, color='c', edgecolor='k', alpha=0.7)
    plt.title(title)
    plt.xlabel('Pixel Values')
    plt.ylabel('Frequency')
    plt.show()

def summarize_image(image: np.ndarray, name: str) -> None:
    """Print summary statistics of an image.

    Args:
        image (np.ndarray): The image array to summarize.
        name (str): Name of the image.
    """
    print(f"Summary statistics for {name}:")
    print(f" - Shape: {image.shape}")
    print(f" - Min value: {np.min(image)}")
    print(f" - Max value: {np.max(image)}")
    print(f" - Mean value: {np.mean(image)}")
    print(f" - Standard deviation: {np.std(image)}\n")

def resample_image(src: rasterio.io.DatasetReader, target_transform: rasterio.Affine, 
                   target_shape: Tuple[int, int]) -> np.ndarray:
    """Resample an image to a target resolution and shape.

    Args:
        src (rasterio.io.DatasetReader): Source dataset reader.
        target_transform (rasterio.Affine): Target affine transform.
        target_shape (Tuple[int, int]): Target shape.

    Returns:
        np.ndarray: Resampled image.
    """
    data = src.read(
        out_shape=(
            src.count,
            target_shape[0],
            target_shape[1]
        ),
        resampling=Resampling.bilinear
    )
    return data.squeeze()


def reproject_resample(src_path: str, target_crs: str, 
                       target_transform: rasterio.Affine, target_shape: Tuple[int, int]) -> Tuple[np.ndarray, Dict[str, Any]]:
    """Reproject and resample an image to match a target CRS and resolution.

    Args:
        src_path (str): Path to the source image.
        target_crs (str): Target coordinate reference system.
        target_transform (rasterio.Affine): Target affine transform.
        target_shape (Tuple[int, int]): Target shape.

    Returns:
        Tuple[np.ndarray, Dict[str, Any]]: Resampled image and metadata.
    """
    with rasterio.open(src_path) as src:
        if src.crs != target_crs:
            transform, width, height = calculate_default_transform(
                src.crs, target_crs, src.width, src.height, *src.bounds)
            kwargs = src.meta.copy()
            kwargs.update({
                'crs': target_crs,
                'transform': transform,
                'width': width,
                'height': height
            })

            data = np.empty((src.count, target_shape[0], target_shape[1]), dtype=src.dtypes[0])
            reproject(
                source=rasterio.band(src, 1),
                destination=data,
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=target_transform,
                dst_crs=target_crs,
                resampling=Resampling.bilinear
            )
        else:
            kwargs = src.meta.copy()
            data = src.read(
                out_shape=(
                    src.count,
                    target_shape[0],
                    target_shape[1]
                ),
                resampling=Resampling.bilinear
            )
    return data.squeeze(), kwargs


