# MS263 Final Project data processing

## Chlorophyll data

The following notebook:

- Puts each year of data values and dates into separate arrays
- Combines those yearly arrays into two (one for values, one for dates), then downloads those as CSV files
- Combines those arrays into a dataframe, then also downloads that as a CSV

In [2]:
import os
import numpy as np
import netCDF4 as nc4
from SJ_tools import final_proj_functions as fpf
import pandas as pd
import datetime as dt

In [2]:
chlor_folder = '/Volumes/TOSHIBAEXT/Thesis_data/Chlor-a'

#### Define the functions for extracting Ocean Color data.

In [3]:
# function to read in chl data and arrange file names into a list

def chl_files_ext(year, chlor_folder):
    chl_data_files = []

    for file_name in os.listdir(chlor_folder):
        if file_name[:4] =='AQUA':
            # file_year = 2008
            year = int(file_name[11:15])
            if year == file_year:
                chl_data_files.append(file_name)

    chl_data_files.sort()

    return chl_data_files
    

# function to extract sample date from each chl file and put them into a list of datetime64 objects
# input: result from chl_data_ext function

def chl_dates_ext(chl_data_files, file_year):
    chl_date_list = []

    for file_name in chl_data_files:
        year = int(file_name[11:15])
        if year == file_year:
            month = file_name.split('.')[1][4:6]
            day = file_name.split('.')[1][6:8]
        
        chl_date_string = str(year)+'-'+month+'-'+day
        chl_date_list.append(chl_date_string)

    chl_date_array = np.array(chl_date_list, dtype='datetime64')

    return chl_date_array


# function to extract chl values from data files and arrange them into an array
# input is outcome from chl_data_ext function

def chl_values_ext(chl_data_files):
    chl_values = []

    for file_name in chl_data_files:
    
        print(file_name)

        file_path = chlor_folder + '/' + file_name

        chlor = fpf.read_chl_from_file(file_path)
        HDF5_USE_FILE_LOCKING = False
        chlor = fpf.remove_nans(chlor)
        chl_mean = np.mean(chlor)

        chl_values.append(chl_mean)

    return chl_values

#### 2002

In [6]:
file_year = 2002

In [None]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [None]:
chl_values = chl_values_ext(chl_data_files)

In [10]:
chl_values_02 = chl_values
chl_dates_02 = chl_date_array

# double check arrays look correct

print(chl_values_02)
print(chl_dates_02)

[0.0, 0.20818305, 1.6162916, 0.48244414, 0.37556806, 0.0, 0.0, 0.0, 0.0, 0.0, 1.3924875, 0.022956561, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06405716, 0.11162441, 1.0110518, 0.9057756, 0.0, 0.0, 0.0, 0.22310542, 0.0, 0.0, 0.7984889, 1.1450915, 0.69332737, 0.02051283, 0.0051951706, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14394362, 0.0, 0.0, 1.0655992, 0.0, 0.0, 0.017673383, 0.043980606, 0.01770042, 0.3910505, 1.1261919, 0.0, 0.0, 0.0, 0.0, 0.4583851, 1.1377075, 0.0, 0.28072456, 0.26478037, 1.671641, 1.1393468, 1.18718, 2.5287676, 1.492463, 0.0, 0.0, 0.7497717, 0.15959752, 1.830432, 2.8347836, 1.5769762, 2.8307483, 0.0, 0.3930688, 0.0, 0.25326738, 0.38821864, 0.24439506, 0.0, 0.0, 0.0, 0.010125163, 1.2561688, 0.48902133, 0.5878964, 1.2658587, 0.72366565, 0.9344383, 0.0, 1.9048891, 1.9181875, 1.8058197, 0.0, 0.0, 1.9088637, 0.0, 0.6562919, 0.007698517, 0.024682155, 0.0017857664, 0.16826084, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.59739083, 0.0, 0.8771918, 0.79460186, 1.0769609, 0.7324966, 1.042192, 1.1162978, 0.0246

#### 2003

In [11]:
file_year = 2003

In [12]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [13]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20030101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20030121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [14]:
chl_values_03 = chl_values
chl_dates_03 = chl_date_array

#### 2004

In [15]:
file_year = 2004

In [16]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [17]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20040101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20040121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [18]:
chl_values_04 = chl_values
chl_dates_04 = chl_date_array

#### 2005

In [19]:
file_year = 2005

In [20]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [21]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20050101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20050121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [22]:
chl_values_05 = chl_values
chl_dates_05 = chl_date_array

#### 2006

In [23]:
file_year = 2006

In [24]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [25]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20060101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20060121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [26]:
chl_values_06 = chl_values
chl_dates_06 = chl_date_array

#### 2007

In [27]:
file_year = 2007

In [28]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [29]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20070101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20070121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [30]:
chl_values_07 = chl_values
chl_dates_07 = chl_date_array

#### 2008

In [31]:
file_year = 2008

In [32]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [33]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20080101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20080121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [34]:
chl_values_08 = chl_values
chl_dates_08 = chl_date_array

#### 2009

In [35]:
file_year = 2009

In [36]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [37]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20090101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20090121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [38]:
chl_values_09 = chl_values
chl_dates_09 = chl_date_array

#### 2010

In [39]:
file_year = 2010

In [40]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [41]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20100101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20100121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [42]:
chl_values_10 = chl_values
chl_dates_10 = chl_date_array

#### 2011

In [43]:
file_year = 2011

In [44]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [45]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20110101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20110121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [46]:
chl_values_11 = chl_values
chl_dates_11 = chl_date_array

#### 2012

In [47]:
file_year = 2012

In [48]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [49]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20120101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20120121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [50]:
chl_values_12 = chl_values
chl_dates_12 = chl_date_array

#### 2013

In [51]:
file_year = 2013

In [52]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [53]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20130101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20130121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [54]:
chl_values_13 = chl_values
chl_dates_13 = chl_date_array

#### 2014

In [55]:
file_year = 2014

In [56]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [57]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20140101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20140121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [58]:
chl_values_14 = chl_values
chl_dates_14 = chl_date_array

#### 2015

In [59]:
file_year = 2015

In [60]:
chl_data_files = chl_files_ext(file_year, chlor_folder)
chl_date_array = chl_dates_ext(chl_data_files, file_year)

In [61]:
chl_values = chl_values_ext(chl_data_files)

AQUA_MODIS.20150101.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150102.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150103.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150104.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150105.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150106.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150107.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150108.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150109.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150110.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150111.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150112.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150113.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150114.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150115.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150116.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150117.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150118.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150119.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150120.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20150121.L3m.DAY.CHL.chlor_a.4km.nc
AQUA_MODIS.20

In [62]:
chl_values_15 = chl_values
chl_dates_15 = chl_date_array

#### End

In [63]:
# combine all years of data into two arrays
# one for values and one for dates

# values

chl_all_values = np.concatenate((chl_values_02,chl_values_03,chl_values_04,
                                 chl_values_05,chl_values_06, chl_values_07,
                                 chl_values_08,chl_values_09,chl_values_10,
                                 chl_values_11,chl_values_12,chl_values_13,
                                 chl_values_14,chl_values_15))

# dates

chl_all_dates = np.concatenate((chl_dates_02,chl_dates_03,chl_dates_04,
                                 chl_dates_05,chl_dates_06,chl_dates_07,
                                 chl_dates_08,chl_dates_09,chl_dates_10,
                                 chl_dates_11,chl_dates_12,chl_dates_13,
                                 chl_dates_14,chl_dates_15))

# make the date array into a string to save into csv

chl_date_strings = chl_all_dates.astype(str)
chl_date_strings

array(['2002-07-04', '2002-07-05', '2002-07-06', ..., '2015-12-29',
       '2015-12-30', '2015-12-31'], dtype='<U28')

#### Save the value and date arrays into csv files

In [64]:
# values
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/Chlor-a_csvs/Chlor_values_all.csv'
np.savetxt(output_file, chl_all_values, delimiter=",")

In [65]:
# dates
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/Chlor-a_csvs/Chlor_dates_all.csv'
np.savetxt(output_file, chl_date_strings, delimiter=",", fmt='%s')

Combine the two arrays into one Pandas DataFrame

In [68]:
values_and_dates = {'Sample dates':chl_all_dates,'Chlorophyll values':chl_all_values}
chl_df = pd.DataFrame(values_and_dates)
chl_df

Unnamed: 0,Sample dates,Chlorophyll values
0,2002-07-04,0.000000
1,2002-07-05,0.208183
2,2002-07-06,1.616292
3,2002-07-07,0.482444
4,2002-07-08,0.375568
...,...,...
4917,2015-12-27,0.121091
4918,2015-12-28,0.000000
4919,2015-12-29,0.438608
4920,2015-12-30,0.346211


Save the DataFrame into a csv file

In [69]:
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/Chlor-a_csvs/Chl_data.csv'
np.savetxt(output_file, chl_df, delimiter=",",fmt='%s')

In [7]:
file_path = '/Volumes/TOSHIBAEXT/Thesis_data/Chlor-a_csvs/Chl_data.csv'
chl_df = pd.read_csv(file_path, header=1, parse_dates=['Date'])
chl_df

  chl_df = pd.read_csv(file_path, header=1, parse_dates=['Date'])


Unnamed: 0,Date,chl-a
0,2002-07-04,0.000000
1,2002-07-05,0.208183
2,2002-07-06,1.616292
3,2002-07-07,0.482444
4,2002-07-08,0.375568
...,...,...
4917,2015-12-27,0.121091
4918,2015-12-28,0.000000
4919,2015-12-29,0.438608
4920,2015-12-30,0.346211


Compute yearly averages of chlorophyll-a data since jellyfish data is yearly.

In [8]:
chl_yearly = chl_df.groupby(chl_df['Date'].dt.year)['chl-a'].mean().reset_index()
chl_yearly.columns = ['year', 'yearly ave']
chl_yearly

Unnamed: 0,year,yearly ave
0,2002,0.386018
1,2003,0.562396
2,2004,0.740899
3,2005,0.864926
4,2006,0.64786
5,2007,0.689585
6,2008,0.518115
7,2009,0.467241
8,2010,0.503023
9,2011,0.51237


Save the DataFrame into a CSV file.

In [9]:
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/Chlor-a_csvs/Chl_yearly_data.csv'
np.savetxt(output_file, chl_yearly, delimiter=",",fmt='%s')

## SST

In [2]:
sst_folder = '/Volumes/TOSHIBAEXT/Thesis_data/SST'

#### Get a list of SST data files

In [3]:
sst_data_files = []

for file_name in os.listdir(sst_folder):
    if file_name[-2:] =='nc' and 'MUR' in file_name:
        sst_data_files.append(file_name)

sst_data_files.sort()

print(len(sst_data_files))

7561


#### Get a list of dates corresponding to the SST values

In [4]:
sst_date_list = []

for file_name in sst_data_files:
    year = int(file_name[:4])
    if 2002 <= year <= 2015:
        month = file_name[4:6]
        day = file_name[6:8]
        
        sst_date_string = str(year)+'-'+month+'-'+day
        sst_date_list.append(sst_date_string)
        

sst_date_array = np.array(sst_date_list, dtype='datetime64')

print(sst_date_array)

['2002-09-01' '2002-09-02' '2002-09-03' ... '2015-12-29' '2015-12-30'
 '2015-12-31']


#### Define function

In [6]:
# function to extract SST values from the MUR SST products
def sst_values_ext(sst_data_files):
    sst_values = []

    for file_name in sst_data_files:
        year = int(file_name[:4])
        if 2002 <= year <= 2015:
    
            print(file_name)

            file_path = sst_folder + '/' + file_name

            sst = fpf.read_sst_from_file(file_path)
            HDF5_USE_FILE_LOCKING = False
            sst = fpf.remove_nans_sst(sst)
    
            sst_values.append(sst)
            sst_values_array = np.array(sst_values)

            # take the mean of each (5x5) array within sst_values_array
            sst_means = np.zeros(len(sst_values_array))
            
            for i in range(len(sst_values_array)):
                daily = sst_values_array[i,:,:]
                non_zeros = daily[daily !=0]
                
                if non_zeros.size > 0:
                    sum = np.sum(non_zeros)
                    count = np.count_nonzero(daily)
                    sst_means[i] = sum/count
                else:
                    sst_means[i] = np.nan

    return sst_means

In [7]:
sst_values = sst_values_ext(sst_data_files)

20020901090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020902090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020903090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020904090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020905090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020906090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020907090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020908090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020909090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020910090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020911090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020912090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020913090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020914090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020915090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc
20020916090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv

#### Save the SST values and dates into CSV files

In [10]:
# values
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/SST_csvs/SST_dailyvalues.csv'
np.savetxt(output_file, sst_values, delimiter=",")

In [11]:
# dates
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/SST_csvs/SST_dates.csv'
np.savetxt(output_file, sst_date_array, delimiter=",", fmt='%s')

In [12]:
values_and_dates = {'Sample dates':sst_date_array,'SST values':sst_values}
sst_df = pd.DataFrame(values_and_dates)
sst_df

Unnamed: 0,Sample dates,SST values
0,2002-09-01,288.575636
1,2002-09-02,288.469455
2,2002-09-03,288.063182
3,2002-09-04,287.471182
4,2002-09-05,287.279636
...,...,...
4865,2015-12-27,286.823000
4866,2015-12-28,286.687000
4867,2015-12-29,286.512636
4868,2015-12-30,286.451636


In [13]:
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/SST_csvs/SST_dailydata.csv'
np.savetxt(output_file, sst_df, delimiter=",",fmt='%s')

In [10]:
file_path = '/Volumes/TOSHIBAEXT/Thesis_data/SST_csvs/SST_dailydata.csv'
sst_df = pd.read_csv(file_path, header=1, parse_dates=['Date'])
sst_df

  sst_df = pd.read_csv(file_path, header=1, parse_dates=['Date'])


Unnamed: 0,Date,SST
0,2002-09-01,288.575636
1,2002-09-02,288.469455
2,2002-09-03,288.063182
3,2002-09-04,287.471182
4,2002-09-05,287.279636
...,...,...
4865,2015-12-27,286.823000
4866,2015-12-28,286.687000
4867,2015-12-29,286.512636
4868,2015-12-30,286.451636


Compute yearly averages for SST data.

In [11]:
sst_yearly = sst_df.groupby(sst_df['Date'].dt.year)['SST'].mean().reset_index()
sst_yearly.columns = ['year', 'yearly ave']
sst_yearly

Unnamed: 0,year,yearly ave
0,2002,286.727818
1,2003,286.613408
2,2004,286.376202
3,2005,286.518108
4,2006,286.82446
5,2007,286.114195
6,2008,285.928083
7,2009,286.196718
8,2010,286.268148
9,2011,286.251617


Convert from Kelvin to degrees Celsius

In [14]:
sst_yearly['yearly ave'] = sst_yearly['yearly ave'] - 273.15
sst_yearly

Unnamed: 0,year,yearly ave
0,2002,13.577818
1,2003,13.463408
2,2004,13.226202
3,2005,13.368108
4,2006,13.67446
5,2007,12.964195
6,2008,12.778083
7,2009,13.046718
8,2010,13.118148
9,2011,13.101617


Save into a CSV file

In [15]:
output_file = '/Volumes/TOSHIBAEXT/Thesis_data/SST_csvs/SST_yearlydata.csv'
np.savetxt(output_file, sst_yearly, delimiter=",",fmt='%s')

Now I have CSV files for yearly averages of chlorophyll-a data and SST data that I'll use for my analysis.