# Opening Chlorophyll-a NC File from NASA Ocean Color L2 repo


Step 1: Go to https://oceancolor.gsfc.nasa.gov/cgi/browse.pl?sen=amod

Step 2: Select a region from top right scroll menu and click "Find swaths"

Step 3: Click "Order Data" button on the new page that loads after Step 2

Step 4: Login with creds if you haven't already

In [2]:
from google.colab import drive

drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [3]:
pip install netCDF4

Note: you may need to restart the kernel to use updated packages.


In [1]:
import netCDF4 as nc
import os
import shutil
from PIL import Image

import matplotlib.image
from matplotlib import pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [5]:
%matplotlib inline
# set the font globally
plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.linewidth"] = 2
# update the overall font size
plt.rcParams.update({'font.size':11})
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42


In [3]:
# txt = 'AQUA_MODIS.20020712.L3m.DAY.CHL.x_chlor_a.nc'
# print(txt.split('.')[1])

## Generation of RGB Images from NC files

In [37]:
directory = 'data/algae_dataset'

for filename in os.listdir(directory):
    fn = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(fn):
      ds = nc.Dataset(fn)
      data = ds.variables['chlor_a'][:].data
      croppedtop = data[18:, :]
      isExist = os.path.exists('data/RGB_figures/')
      if not isExist:
        os.makedirs('data/RGB_figures/')
      
      fileExtension = filename.split('.')[1]
      matplotlib.image.imsave(f'data/RGB_figures/{fileExtension}.png', croppedtop)
      

## Generation of Grayscale Images from RGB Images

In [41]:
directory = 'data/RGB_figures'

for filename in os.listdir(directory):
    fn = os.path.join(directory, filename)
    img = Image.open(fn).convert('L')
    isExist = os.path.exists('data/Grayscale_figures/')
    if not isExist:
        os.makedirs('data/Grayscale_figures/')
    img.save('data/Grayscale_figures/'+filename)

## Split the dataset for Training, Validation and Testing

In [17]:
curr_len = 0
t = sorted(os.listdir('data/algae_dataset'))
totalLen = len(t)
for filename in t:
    fn = os.path.join('data/algae_dataset', filename)
    if curr_len <= totalLen *0.7:
        isExist = os.path.exists('data/trainnc_data/')
        if not isExist:
            os.makedirs('data/trainnc_data/')
        shutil.move(fn, 'data/trainnc_data/')
        curr_len += 1
    elif curr_len <= totalLen*0.85:
        isExist = os.path.exists('data/validationnc_data/')
        if not isExist:
            os.makedirs('data/validationnc_data/')
        shutil.move(fn, 'data/validationnc_data/')
        curr_len += 1
    else:
        isExist = os.path.exists('data/testnc_data/')
        if not isExist:
            os.makedirs('data/testnc_data/')
        shutil.move(fn, 'data/testnc_data/')
        curr_len += 1

In [18]:
print(len(os.listdir('data/trainnc_data')))
print(len(os.listdir('data/validationnc_data')))
print(len(os.listdir('data/testnc_data')))

5136
1101
1100
