
## 1. Importing Libraries and Defining Paths
This cell imports essential libraries for the project and sets up configuration paths.

### Libraries Imported:
- `os`: For interacting with the operating system.
- `numpy`: For numerical operations.
- `pandas`: For data manipulation.
- `rasterio`: For reading and writing geospatial raster data.
- `subprocess`: For running subprocesses.

### Paths:
- Sets paths for training, validation, and testing datasets.
- Sets paths for storing outputs and logging.


In [11]:
import os
import cv2
import json
import pathlib
import math
import rasterio
from rasterio.windows import Window
import numpy as np
import pandas as pd
import earthpy.plot as ep
import earthpy.spatial as es
from dataset import read_img
from matplotlib import pyplot as plt
import subprocess
import pyperclip

import rasterio as rio
from rasterio.plot import show
import matplotlib.pyplot as plt
import numpy as np

# Load configuration
from config import config, update_config
import argparse

# Create a dummy namespace for arguments
args = argparse.Namespace()

# Update config with dummy arguments
update_config(args)

train_df = pd.read_csv(config['train_dir'])
test_df = pd.read_csv(config['test_dir'])
valid_df = pd.read_csv(config['valid_dir'])
p_train_json = config['p_train_dir']
p_test_json = config['p_test_dir']
p_valid_json = config['p_valid_dir']

## 2. Counting Images in Datasets
This cell prints the total number of images in the training, testing, and validation datasets.

### Outputs:
- Total number of training images.
- Total number of test images.
- Total number of validation images.

In [12]:
print(f"Total number of training images = {len(train_df)}")
print(f"Total number of test images = {len(test_df)}")
print(f"Total number of validation images = {len(valid_df)}")

Total number of training images = 46
Total number of test images = 6
Total number of validation images = 6


## 3. Checking Class Balance
This cell defines a function to check the class percentage in the full dataset.

### Function: `class_balance_check(patchify, data_dir)`
- **Parameters**:
  - `patchify` (bool): TRUE if class balance is to be checked for patchify experiments.
  - `data_dir` (str): Directory where data files are saved.
- **Returns**: Class percentage.
- **Prints**:
  - Class pixel percentage.
  - Unique values in the mask.

In [15]:
def class_balance_check(patchify, data_dir):
    """
    Summary:
        Checking class percentage in the full dataset
    Arguments:
        patchify (bool): TRUE if want to check class balance for patchify experiments
        data_dir (str): directory where data files are saved 
    Return:
        Class percentage
    """
    if config['patchify']:
        with open(config['p_train_dir'], "r") as j:
            train_data = json.loads(j.read())
        labels = train_data["masks"]
        patch_idx = train_data["patch_idx"]
    else:
        train_data = pd.read_csv(config['train_dir'])
        labels = train_data.masks.values
        patch_idx = None

    total = 0
    class_name = {}

    for i in range(len(labels)):
        with rasterio.open(labels[i]) as msk:
            mask = msk.read(1)

        if config['patchify']:
            idx = patch_idx[i]
            mask = mask[idx[0] : idx[1], idx[2] : idx[3]]

        total_pix = mask.shape[0] * mask.shape[1]
        total += total_pix

        dic = {}
        keys = np.unique(mask)
        for key in keys:
            dic[key] = np.count_nonzero(mask == key)

        for key, value in dic.items():
            if key in class_name.keys():
                class_name[key] = value + class_name[key]
            else:
                class_name[key] = value

    for key, val in class_name.items():
        class_name[key] = (val / total) * 100

    print("Class percentage:")
    for key, val in class_name.items():
        print("class pixel: {} = {}".format(key, val))
    print(f"unique value in the mask {class_name.keys()}")


## 4. Running Class Balance Check
This cell runs the `class_balance_check` function on the dataset.

### Outputs:
- Class percentage for each class in the dataset.


In [17]:
print("Class percentage of training data before patch")
class_balance_check(patchify=False, data_dir=config['train_dir'])
print(".........................................................................................")
print("Class percentage of training data after patch")
class_balance_check(patchify=True, data_dir=config['p_train_dir'])


Class percentage of training data before patch
Class percentage:
class pixel: 1.0 = 43.71723761925331
class pixel: 2.0 = 56.282762380746696
unique value in the mask dict_keys([1.0, 2.0])
.........................................................................................
Class percentage of training data after patch
Class percentage:
class pixel: 1.0 = 43.71723761925331
class pixel: 2.0 = 56.282762380746696
unique value in the mask dict_keys([1.0, 2.0])


## 5. Checking Unique Height and Width of Images
This cell defines a function `check_height_width` to check and print unique heights and widths of images and masks in a dataset.

### Function: `check_height_width(data_dir)`
- **Parameters**: 
  - `data_dir` (str): Path to the CSV file.
- **Process**:
  - Reads the CSV file.
  - Extracts image and mask paths.
  - Iterates through the images and masks to find unique shapes.
  - Prints the shapes of the dataset, input images, and masks.


In [24]:
def check_height_width(data_dir):
    """
    Summary:
        check unique hight and width of images from dataset
    Arguments:
        data_dir (str): path to csv file
    Return:
        print all the unique height and width
    """

    data = pd.read_csv(data_dir)


    print("Dataset:  ", data.shape)

    input_img = data.feature_ids.values
    input_mask = data.masks.values

    input_img_shape = []
    input_mask_shape = []

    for i in range(len(input_img)):
        with rasterio.open(input_img[i]) as im:
            img = im.read()
        with rasterio.open(input_mask[i]) as msk:
            mask = msk.read()
        # img = cv2.imread(input_img[i])
        # mask = cv2.imread(input_mask[i])
        print(f"Shape for:{i} image Shape:{img.shape}    mask shape:{mask.shape}")

        if img.shape not in input_img_shape:
            input_img_shape.append(img.shape)

        if mask.shape not in input_mask_shape:
            input_mask_shape.append(mask.shape)

    print("Input image shapes: ", input_img_shape)
    print("Input mask shapes: ", input_mask_shape)

## 6. Checking Image Dimensions in Different Datasets
This cell prints the unique heights and widths of images and masks for training, testing, and validation datasets by calling the `check_height_width` function.

### Actions:
- Checks and prints unique image and mask dimensions for the training dataset.
- Checks and prints unique image and mask dimensions for the testing dataset.
- Checks and prints unique image and mask dimensions for the validation dataset.


In [25]:
print("Unique height and width of training dataset")
check_height_width(config['train_dir'])
print(".........................................................................................")
print("Unique height and width of testing dataset")
check_height_width(config['test_dir'])
print(".........................................................................................")
print("Unique height and width of validation dataset")
check_height_width(config['valid_dir'])


Unique height and width of training dataset
Dataset:   (46, 2)
Shape for:0 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:1 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:2 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:3 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:4 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:5 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:6 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:7 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:8 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:9 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:10 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:11 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:12 image Shape:(3, 2048, 2048)    mask shape:(1, 2048, 2048)
Shape for:13 image Shape

## 7. Plotting Metrics from CSV Files
This cell defines functions to handle CSV files and plot metrics against epochs.

### Functions:
- `return_csv_from_path`: Returns a list of CSV file paths from a directory.
- `_plot_from_csv`: Plots specified columns from a CSV file against epochs.
- `plot_metrics_vs_epochs`: Plots metrics from a CSV file against epochs using `_plot_from_csv`.
- `plot_metric_vs_epochs_vs_models`: Plots a specific metric against epochs for different models and saves the combined results.


In [31]:
def return_csv_from_path(csv_path=config['csv_logger_path']):
    csv_list = []
    # Iterate through each subdirectory
    for folder in csv_path.iterdir():
        # Check if the entry is a directory
        if folder.is_dir():
            # Iterate through files in the subdirectory
            for file in folder.iterdir():
                # Check if the entry is a file
                if file.is_file():
                    csv_list.append(file)
    return csv_list

def _plot_from_csv(csv_path, name, x_axis_name, y_axis_name, columns_to_plot=None):
    pathlib.Path((config['root_dir'] / "logs" / "plots" / "metrics_plots")).mkdir(parents=True, exist_ok=True)
    df = pd.read_csv(csv_path)
    epochs = df['epoch']
    if columns_to_plot is not None:
        columns_to_plot = columns_to_plot
    else:
        columns_to_plot = df.columns.to_list()[1:]

    plt.figure(figsize=(12, 8))
    for column in columns_to_plot:
        plt.plot(epochs, df[column], label=column, linewidth=3.0, marker="o", markersize=5)

    plt.title(f"{y_axis_name}_over_{x_axis_name}")
    plt.xlabel(x_axis_name)
    plt.ylabel(y_axis_name)
    plt.xticks(epochs.astype(int))
    plt.legend()
    plt.savefig(config['root_dir'] / "logs" / "plots" / "metrics_plots" / name)
    plt.show()

def plot_metrics_vs_epochs(csv_path, name, x_axis_name="Epochs", y_axis_name="Metrics_score", columns_to_plot=None):
    _plot_from_csv(csv_path=csv_path, name=name, x_axis_name=x_axis_name, y_axis_name=y_axis_name, columns_to_plot=columns_to_plot)

def plot_metric_vs_epochs_vs_models(metric_name="my_mean_iou"):
    pathlib.Path((config['root_dir'] / "logs" / "plots" / "csv_for_plotting")).mkdir(parents=True, exist_ok=True)
    csv_list = return_csv_from_path()
    result_df = pd.DataFrame()
    for csv_path in csv_list:
        df = pd.read_csv(csv_path)
        result_df[os.path.basename(csv_path)] = df[metric_name]
    result_df.index.name = "epoch"
    result_df.to_csv(os.path.join(config['root_dir'] / "logs" / "plots" / "csv_for_plotting" / f"{metric_name}_vs_epoch.csv"), encoding='utf-8', index=True, header=True)
    _plot_from_csv(config['root_dir'] / "logs" / "plots" / "csv_for_plotting" / f"{metric_name}_vs_epoch.csv", x_axis_name="Epochs", y_axis_name=metric_name, name=metric_name)


## 8. Plotting Specific Metrics from CSV Files
This cell plots metrics against epochs using previously defined functions.

### Actions:
- Plots metrics from a specified CSV file.
- Plots F1 score from a specified CSV file.
- Plots metrics for different models.
- Plots recall metric for different models.


In [32]:
plot_metrics_vs_epochs(config['csv_logger_path'] / "planet-2" / "planet-2_ex_2024-07-13_e_4000_p_2048_s_1024_nsr-1_dtype_nsr-1.csv", name='metrics')
plot_metrics_vs_epochs(config['csv_logger_path'] / "planet-2" / "planet-2_ex_2024-07-13_e_4000_p_2048_s_1024_nsr-1_dtype_nsr-1.csv", name='metrics', columns_to_plot=["my_mean_iou"])
plot_metric_vs_epochs_vs_models()
plot_metric_vs_epochs_vs_models(metric_name="my_mean_iou")

## 9. Percentage Clipping Function
Defines `pct_clip` to clip array values between specified percentiles.


In [33]:
def pct_clip(array, pct=[2.5, 97.5]):
    array_min, array_max = np.nanpercentile(array, pct[0]), np.nanpercentile(array, pct[1])
    clip = (array - array_min) / (array_max - array_min)
    clip[clip > 1] = 1
    clip[clip < 0] = 0
    return clip

In [None]:
# with rio.open("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/dataset/input/tile_0_0_vVvHdF_Area1_17-18_Beg.tif") as src:
#     with rio.open(
#             'RGB_Temp.tif', 'w+',
#             driver='GTiff',
#             dtype= rio.float32,
#             count=3,
#             crs = src.crs,
#             width=src.width,
#             height=src.height,
#             transform=src.transform,
#         ) as dst:
#         V = pct_clip(src.read(1))
#         dst.write(V.astype(rio.float32),1)
#         V = pct_clip(src.read(2))
#         dst.write(V.astype(rio.float32),2)
#         V = pct_clip(src.read(3))
#         dst.write(V.astype(rio.float32),3)

In [None]:
# with rio.open("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/dataset/input/tile_0_0_vVvHdF_Area1_17-18_Beg.tif") as src:
#     # img= np.zeros((3,512,512))
#     img= pct_clip(src.read())

## 10. False Color Image Reading Function
Defines `false_colour_read` to read an image and apply percentage clipping to each channel.


In [34]:
def false_colour_read(path):
    img= np.zeros((3,512,512))
    with rasterio.open(path) as src:
        for i in range(3):
            img[i,:,:]= pct_clip(src.read(i+1))
            
    return img, src

In [38]:
global h,w

## 11. False Color Image Reading Function with Dynamic Shape
Defines `false_colour_read_bt` to read an image with dynamic shape and apply percentage clipping to each channel.


In [39]:
def false_colour_read_bt(path):
    with rasterio.open(path) as src:
        global h,w
        h,w =src.shape
        img= np.zeros((3,h,w))
        # print(img.shape)
        for i in range(3):
            img[i,:,:]= pct_clip(src.read(i+1))
            
    return img, src
# print("displaying training images and masks")
# display_all(data=train_df,name="train")

In [40]:
np.zeros((h,w,3)).shape

NameError: name 'h' is not defined

In [None]:
# fig,ax=plt.subplots()
# with rio.open("RGB_Temp.tif") as src2:
#     show(src2.read(),transform=src2.transform,ax=ax)
#     ax.grid(False)  # Turn off gridlines along the borders
#     ax.set_xticks([])  # Remove x-axis ticks
#     ax.set_yticks([])  # Remove y-axis ticks
#     ax.set_xlabel('')  # Remove x-axis label
#     ax.set_ylabel('')  # Remove y-axis label
# plt.show()
# plt.savefig("false_color")

In [42]:
visualization_dir = config["root_dir"] / "data/nsr-1"

## 12. Displaying and Saving All Images and Masks
Defines `display_all` to save images and their corresponding masks into a single figure for visualization.

### Function: `display_all(data, name)`
- **Parameters**:
  - `data`: Data file holding image paths.
  - `name` (str): Path to save images.
- **Process**:
  - Reads and processes each image and mask.
  - Displays images and masks in a figure.
  - Saves the figure to the specified directory.


In [43]:
def display_all(data, name):
    """
    Summary:
        save all images into single figure
    Arguments:
        data : data file holding images path
        directory (str) : path to save images
    Return:
        save images figure into directory
    """

    pathlib.Path((visualization_dir / "display")).mkdir(parents=True, exist_ok=True)
    pathlib.Path((visualization_dir / "display"/"train")).mkdir(parents=True, exist_ok=True)
    pathlib.Path((visualization_dir / "display"/"test")).mkdir(parents=True, exist_ok=True)
    pathlib.Path((visualization_dir / "display"/"valid")).mkdir(parents=True, exist_ok=True)

    for i in range(len(data)):
        image,src = false_colour_read_bt(data.feature_ids.values[i])
        print(image.shape)
        print(np.mean(image),np.std(image))
        mask = read_img(data.masks.values[i], label=True)
        print("................................")
        print(f"image_shape: {image.shape}")
        print(f"mask_shape: {mask.shape}")
        print("................................")
        id = data.feature_ids.values[i].split("/")[-1]
        display_list = {"image": image, "label": mask}

        plt.figure(figsize=(12, 8))
        title = list(display_list.keys())

        for i in range(len(display_list)):
            plt.subplot(1, len(display_list), i + 1)
            plt.title(title[i])
            if title[i]=='image':
                ax = plt.gca()
                show(display_list[title[i]],transform=src.transform, ax=ax)
            else:
                plt.imshow((display_list[title[i]]), cmap="gray")
            plt.axis("off")

        prediction_name = "img_id_{}.png".format(id)  # create file name to save
        plt.savefig(
            os.path.join((visualization_dir / "display"/ name), prediction_name),
            bbox_inches="tight",
            dpi=800,
        )
        plt.clf()
        plt.cla()
        plt.close()

## 13. Loading Dataset CSV Files
Loads training, testing, and validation datasets from CSV files.

### Actions:
- Reads the training dataset CSV file.
- Reads the testing dataset CSV file.
- Reads the validation dataset CSV file.


In [44]:
train_df = pd.read_csv("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/nsr-1/data/csv/train.csv")
test_df =  pd.read_csv("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/nsr-1/data/csv/test.csv")
valid_df = pd.read_csv("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/nsr-1/data/csv/valid.csv")
# p_train_json = config.p_train_dir
# p_test_json = config.p_test_dir
# p_valid_json = config.p_valid_dir

In [45]:
import math
from config import *
import config
import tensorflow as tf
# from logging import config
from einops import rearrange
from tensorflow import keras
import segmentation_models as sm
from tensorflow.keras import layers
from tensorflow.keras.models import Model
import keras_unet_collection.models as kuc
from tensorflow.keras import backend as K
from tensorflow.keras.layers import LeakyReLU, add, Conv2D, PReLU, ReLU, Concatenate, Activation, MaxPool2D, Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout, Lambda


Segmentation Models: using `keras` framework.


## 14. Reading and Normalizing Images and Masks
Defines `read_img` to read and normalize images and masks using `rasterio`.

### Function: `read_img(directory, in_channels=None, label=False, patch_idx=None, height=256, width=256)`
- **Parameters**:
  - `directory` (str): Path to the image.
  - `in_channels` (bool, optional): Number of channels to read.
  - `label` (bool): True if reading a mask, otherwise False.
  - `patch_idx` (list, optional): Patch indices to read.
  - `height` (int, optional): Height of the image.
  - `width` (int, optional): Width of the image.
- **Returns**: Numpy array of the image or mask.


In [46]:
def read_img(directory, in_channels=None, label=False, patch_idx=None, height=256, width=256):
    """
    Summary:
        read image with rasterio and normalize the feature
    Arguments:
        directory (str): image path to read
        in_channels (bool): number of channels to read
        label (bool): TRUE if the given directory is mask directory otherwise False
        patch_idx (list): patch indices to read
    Return:
        numpy.array
    """

    # for musk images
    if label:
        with rasterio.open(directory) as fmask: # opening the directory
            mask = fmask.read(1)    # read the image (Data from a raster band can be accessed by the band’s index number. Following the GDAL convention, bands are indexed from 1. [int or list, optional] – If indexes is a list, the result is a 3D array, but is a 2D array if it is a band index number.
        
        mask[mask == 2.0] = 0
        mask[mask == 1.0] = 1
        # np.swapaxes(mask,0,2)
        # mask[mask == 255] = 1
        mask[mask == 170] = 2
        # mask[mask == 85] = 2
        mask = mask[... , np.newaxis]
        mask = mask.astype("int32")
        # print(".......mask...............")
        # print(mask.shape)
    
        if patch_idx:
            # extract patch from original mask
            return mask[patch_idx[0]:patch_idx[1], patch_idx[2]:patch_idx[3]]
        else:
            return mask #np.expand_dims(mask, axis=2)
    # for features images
    else:
        # read N number of channels
        with rasterio.open(directory) as inp:
            X =inp.read()
        X= np.swapaxes(X,0,2)
        X = (X-config["mean"])/config["std"]
        if patch_idx:
            # extract patch from original features
            return X[patch_idx[0]:patch_idx[1], patch_idx[2]:patch_idx[3], :]
        else:
            return X

## 15. Displaying Training Images and Masks
Displays and saves training images and masks using the `display_all` function.


In [47]:
print("displaying training images and masks")
display_all(data=train_df,name="train")

displaying training images and masks
(3, 2048, 2048)
0.45905334162016237 0.26210658493878514
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.4231295877359089 0.30436063065924696
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.49032868802117285 0.2732168338553924
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.43081603305717414 0.2726879906249073
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.44685053335421526 0.27884300356909714
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.45334690877176853 0.26362800146950405
...............

## 16. Displaying Testing Images and Masks
Displays and saves testing images and masks using the `display_all` function.

In [52]:
print("displaying testing images and masks")
display_all(data=test_df, name = "test")

displaying testing images and masks
(3, 2048, 2048)
0.4614242265964597 0.25907621930535063
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.5096356904234135 0.27964891278188814
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.43287478030391396 0.264148527395466
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.44688856108519 0.2918150079265977
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.41630318034187214 0.3078911617133381
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.406943987636486 0.30234890239838397
........................

## 17. Displaying Validation Images and Masks
Displays and saves validation images and masks using the `display_all` function.


In [53]:
print("displaying validation images and masks")
display_all(data=valid_df, name= "valid")

displaying validation images and masks
(3, 2048, 2048)
0.4810193831846115 0.2649052017633632
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.404305705893939 0.28803888926824484
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.4863573461569 0.26044502706106587
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.43049160725798563 0.2655540340256653
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.421098497233139 0.26795243400117935
................................
image_shape: (3, 2048, 2048)
mask_shape: (2048, 2048, 1)
................................
(3, 2048, 2048)
0.4073432169947597 0.3067610144743087
.......................

## 18. Calculating Statistics for Image Bands
Loads training dataset CSV and defines a function to calculate mean and standard deviation for each band of the images.

### Actions:
- Loads training dataset CSV.
- Defines `calculate_stats` to:
  - Read and clip the first three bands of each image.
  - Calculate and print the mean and standard deviation for each band.
- Calls `calculate_stats` with the list of feature image paths.


In [54]:
train_df = pd.read_csv("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation2/data/nsr-1/data/csv/train.csv")
features_path = train_df["feature_ids"].to_list()
def calculate_stats(file_paths):
    all_data1 = []
    all_data2 = []
    all_data3 = []
    for file_path in file_paths:
        with rasterio.open(file_path) as src:
            data1 = pct_clip(src.read(1))  # Read the first band
            all_data1.append(data1)
            data2 = pct_clip(src.read(2))  # Read the first band
            all_data2.append(data2)
            data3 = pct_clip(src.read(3))  # Read the first band
            all_data3.append(data3)

    # Stack all the data into a single numpy array
    stacked_data1 = np.stack(all_data1)
    stacked_data2 = np.stack(all_data2)
    stacked_data3 = np.stack(all_data3)

    # Calculate mean and standard deviation
    mean1 = np.mean(stacked_data1)
    std_dev1 = np.std(stacked_data1)
    mean2 = np.mean(stacked_data2)
    std_dev2 = np.std(stacked_data2)
    mean3 = np.mean(stacked_data3)
    std_dev3 = np.std(stacked_data3)
    print("Average mean across 1st band:", mean1)
    print("Standard deviation across 1st band:", std_dev1)
    print("Average mean across 2bd band:", mean2)
    print("Standard deviation across 2nd band:", std_dev2)
    print("Average mean across 3rd band:", mean3)
    print("Standard deviation across 3rd band:", std_dev3)

# Example list of file paths
calculate_stats(features_path)




Average mean across 1st band: 0.4691332033859703
Standard deviation across 1st band: 0.28278651995773896
Average mean across 2bd band: 0.46229958486139205
Standard deviation across 2nd band: 0.26957449339570383
Average mean across 3rd band: 0.37691639220534906
Standard deviation across 3rd band: 0.27380976043702177


## 19. Calculating Overall Statistics for All Image Bands
Loads training dataset CSV and defines a function to calculate mean and standard deviation for all bands of the images.

### Actions:
- Loads training dataset CSV.
- Defines `calculate_stats` to:
  - Read and clip all bands of each image.
  - Calculate and return the mean and standard deviation for all bands combined.
- Calls `calculate_stats` with the list of feature image paths.
- Prints the average mean and standard deviation across all files.


In [55]:
# /mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/dataset-nsr-3/data/csv
train_df = pd.read_csv("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation2/data/nsr-1/data/csv/train.csv")
features_path = train_df["feature_ids"].to_list()
def calculate_stats(file_paths):
    all_data = []
    for file_path in file_paths:
        with rasterio.open(file_path) as src:
            data = pct_clip(src.read()) 
            # print(data.shape)
            all_data.append(data)

    # Stack all the data into a single numpy array
    stacked_data = np.stack(all_data)

    # Calculate mean and standard deviation
    mean = np.mean(stacked_data)
    std_dev = np.std(stacked_data)

    return mean, std_dev

# Example list of file paths

mean, std_dev = calculate_stats(features_path)

print("Average mean across all files:", mean)
print("Standard deviation across all files:", std_dev)

Average mean across all files: 0.409134915248531
Standard deviation across all files: 0.2813810486906851


In [56]:
root_dir = Path("/mnt/hdd2/mdsamiul/project/rice_crop_segmentation2")

## 20. Saving Image Tiles
Defines `save_tiles` to split large images into smaller tiles and save them.

### Function: `save_tiles(path, out_path, tiles_size=2048, stride=1024)`
- **Parameters**:
  - `path`: Directory with original images.
  - `out_path`: Directory to save the tiles.
  - `tiles_size`: Size of each tile.
  - `stride`: Stride for tiling.
- **Process**: Iterates through images, splits them into tiles, and saves the tiles.


In [57]:
def save_tiles(path, out_path, tiles_size=2048, stride=1024):
    os.makedirs(out_path, exist_ok=True)
    
    # Iterate over each file in the path
    for filename in os.listdir(path):
        file_path = os.path.join(path, filename)
        with rasterio.open(file_path) as src:
            # Get metadata and calculate number of tiles in each dimension
            meta = src.meta
            meta["height"]= tiles_size
            meta["width"]= tiles_size
            # print(meta)
            height, width = src.shape
            num_rows = math.ceil((height - tiles_size) / stride + 1)
            num_cols = math.ceil((width - tiles_size) / stride + 1)
            total_tiles = num_rows* num_cols
            print(f"shape of the image before tiles : {src.shape}")
            print(f"number of tiles={total_tiles}")
            print("..................................................")
            # Iterate over each tile
            for row in range(num_rows):
                for col in range(num_cols):
                    # Calculate window coordinates
                    row_start = row * stride
                    row_stop = min(row_start + tiles_size, height)
                    col_start = col * stride
                    col_stop = min(col_start + tiles_size, width)
                    
                    # Read the tile data
                    # window = Window(x0, y0, x1 - x0, y1 - y0)
                    window = Window.from_slices((row_stop-stride, row_stop), (col_stop-stride, col_stop))
                    tile_data = src.read(window=window)
                    # print("...........")
                    # print(tile_data.shape)
                    # Save the tile with a suffix of tile id
                    # out_filename = f"{os.path.splitext(filename)[0]}_tile_{row}_{col}.tif"
                    out_filename = f"tile_{row}_{col}_{os.path.splitext(filename)[0]}.tif"
                    out_file_path = os.path.join(out_path, out_filename)
                    with rasterio.open(out_file_path, 'w', **meta) as dst:
                        dst.write(tile_data)

## 21. Data Path
Sets the data path for the input images.


In [58]:
data = "/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/nsr-1/input/"

## 22. Output Path
Sets the output path for saving the image tiles.


In [59]:
out="/mnt/hdd2/mdsamiul/project/rice_crop_segmentation/data/nsr-1/input/"

## 23. Execute Image Tiling
Calls the `save_tiles` function to split images into tiles and save them to the specified output directory.


In [60]:
save_tiles(data,out)

shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
..................................................
shape of the image before tiles : (2048, 2048)
number of tiles=1
.......

## 24. Renaming Files
Defines `rename_files` to rename files in the specified directory based on predefined patterns.

### Function: `rename_files(datapath)`
- **Parameters**:
  - `datapath`: Directory containing the files to rename.
- **Process**:
  - Lists all files in the directory.
  - Renames files based on specific prefixes (`DEM_`, `VV_`, `VH_`, `GT_`).
  - Constructs new file paths and renames the files.


In [None]:
def rename_files(datapath):
    # List all files in the directory
    files = os.listdir(datapath)
    
    for filename in files:
        # Extract the file extension
        _, ext = os.path.splitext(filename)
        
        # Check if the filename starts with DEM_ab.tif
        if filename.startswith("DEM_"):
            new_filename = filename.replace("DEM_", "").replace(".tif", "_nasadem.tif")
        
        # Check if the filename starts with VV_ab.tif
        elif filename.startswith("VV_"):
            new_filename = filename.replace("VV_", "").replace(".tif", "_vv.tif")
        
        # Check if the filename starts with VH_ab.tif
        elif filename.startswith("VH_"):
            new_filename = filename.replace("VH_", "").replace(".tif", "_vh.tif")
        
        # Check if the filename starts with GT_ab.tif
        elif filename.startswith("GT_"):
            new_filename = filename.replace("GT_", "")
        
        else:
            # If none of the conditions are met, skip this file
            raise ValueError("files_name_mismatch")
        
        # Construct the new filepath
        new_filepath = os.path.join(datapath, new_filename)
        
        # Rename the file
        os.rename(os.path.join(datapath, filename), new_filepath)
        print(f"Renamed {filename} to {new_filename}")


## 25. Execute File Renaming
Calls the `rename_files` function to rename files in the specified dataset directory.


In [None]:
datapath = config.dataset_dir
rename_files(datapath)

## 26. Running Visualization Script and Saving Output
Runs a visualization script and saves the terminal output to an RTF file.

### Actions:
- Executes the `visualization.py` script using a terminal command.
- Captures the terminal output.
- Saves the output to `data_statistics.rtf`.


In [None]:
# Run the command in the terminal
command = "python visualization.py"
result = subprocess.run(command, shell=True, capture_output=True, text=True)

# Get the terminal output
terminal_output = result.stdout

# Save the output to an RTF file
rtf_filename = "data_statistics.rtf"
with open(rtf_filename, "w") as rtf_file:
    # rtf_file.write("{\\rtf1\\ansi\n")
    rtf_file.write(terminal_output)
    # rtf_file.write("}")

print(f"Terminal output saved to {rtf_filename}")

Terminal output saved to data_statistics.rtf
