In [1]:
import numpy as np
import netCDF4
import matplotlib.pyplot as plt
import os
import pandas as pd
from datetime import timedelta
from skimage.transform import resize
from scipy.interpolate import interp2d

# Data Extraction
Used for inspection, training image selection, and annotation.
To make this notebook work, replace the absolute paths to the .nc files with your corresponding path and specify the path, where images should be saved in the 'extract_path' and 'extract_all' function.

Depending on the flight that you want to inspect, run either the next code section or the section thereafter.

- nc_flight9 = 'unet_melt_pond_detection/nc_data/flight9/IRdata_ATWAICE_processed_220718_142920.nc'
- nc_flight16 = 'unet_melt_pond_detection/nc_data/flight16/IRdata_ATWAICE_processed_220730_111439.nc'

In [None]:
# either 9 or 16
flight = 9

# insert folder where to save images, e.g. 'E:/polar/flight9/'
save_path = None

In [2]:
# load data
if flight == 9:
    nc_flight9 = 'E:/polar/code/data/flight9/IRdata_ATWAICE_processed_220718_142920.nc'
    ds = netCDF4.Dataset(nc_flight9)

elif flight == 16:
    nc_flight16 = 'E:/polar/code/data/flight16/IRdata_ATWAICE_processed_220730_111439.nc'
    ds = netCDF4.Dataset(nc_flight16)

In [3]:
ds_size = len(ds.dimensions['t'])

imgs = ds.variables['Ts'][:]

print(imgs.shape)
print(type(imgs))

xd = ds.variables['xd'][:]
yd = ds.variables['yd'][:]

timestamps = ds.variables['time'][:]

(4989, 480, 640)
<class 'numpy.ma.core.MaskedArray'>


In [6]:
def extract_time(img_idx, timestamps, flight_nr):
    """
    Convert the timestamp for an image.

    Parameters:
    ----------
        img_idx : int
        timestamps : numpy.ma.core.MaskedArray
    """
    
    if flight_nr == 9:
        reference = '2022-07-18 00:00:00'
    elif flight_nr == 16:
        reference = '2022-07-30 00:00:00'

    times = pd.Series(timestamps)
    date = pd.Timestamp(reference)
    time = str(date + timedelta(seconds=int(times[img_idx])))

    return time

In [None]:
def extract_part(dataset, dataset_size, time, flight_nr, save_path):
    """
    Extracts only every fourth image - extracted images will be non-overlapping, saves memory.

    Parameters:
    -----------
        dataset : numpy.ma.core.MaskedArray
        dataset_size : int
        time : numpy.ma.core.MaskedArray
    """
    idx = 0
    
    for i in range(dataset_size):
        if(i % 4 == 0):
            timestamp = extract_time(i, time, flight_nr).replace(' ','_').replace(':','').replace('-','')
            img = dataset[i]

            # clip for better visibility
            img = np.clip(img, 273, 276)

            plt.imsave(os.path.join(save_path, '{}_{}.png'.format(timestamp,idx)), img, cmap='cividis')

            idx = idx + 1

In [None]:
def extract_all(dataset, dataset_size, time, flight_nr, save_path):
    """
    Extracts all images for flight specified.

    Parameters:
    -----------
        dataset : numpy.ma.core.MaskedArray
        dataset_size : int
        time : numpy.ma.core.MaskedArray
    """
    idx = 0
    
    for i in range(dataset_size):
        timestamp = extract_time(i, time, flight_nr).replace(' ','_').replace(':','').replace('-','')
        img = dataset[i]

        # clip for better visibility
        img = np.clip(img, 273, 276)

        plt.imsave(os.path.join(save_path, '{}_{}.png'.format(timestamp,idx)), img, cmap='cividis')

        idx = idx + 1

In [None]:
extract_part(imgs, ds_size, timestamps, flight, save_path)

In [None]:
extract_all(imgs, ds_size, timestamps, flight, save_path)