### Time series plotting: plankton (master)

#### Import modules

In [5]:
import netCDF4
from netCDF4 import Dataset
import numpy as np
import pandas as pd
from pathlib import Path
import os
import seaborn as sns
import matplotlib.pyplot as plt
import xarray as xr

#### Define lists called in the forthcoming for loops

In [8]:
sector = ['NB', 'CB', 'SB']
date_2013 = ['January_4750','February_4780','March_4810','April_4840','May_4870','June_4900',
             'July_4930','August_4960','September_4990','October_5020','November_5050', 'December_5080']
date_2014 = ['January_5115','February_5145','March_5175','April_5205','May_5235','June_5265',
             'July_5295','August_5325','September_5355','October_5385','November_5415', 'December_5445']
day_2013 = ['4750','4780','4810','4840','4870','4900','4930','4960','4990','5020','5050', '5080']
day_2014 = ['5115','5145','5175','5205','5235','5265','5295','5325','5355','5385','5415', '5445']

## Loop through all months for each sector: 2013

In [97]:
# Reset the working directory before running the loops
%pwd
%cd /home/lindsay/hioekg-2013/

# Add master list for the files collected by this first nested loop and group lists
file_list = []
nsmz=[]
nmdz=[]
nlgz=[]
nsm=[]
nlg=[]

for i in range(0,12):
    for j in range(0,3):
        folder = '/home/lindsay/hioekg-2013/' + str(date_2013[i]) + '/'
        os.chdir(folder)
        file = xr.open_dataset(str(sector[j]) + '_all_var_surface_0' + str(day_2013[i]) + '.nc')
        file_list.append(file)      

for item in file_list: 
# nsmz: Small zooplankton
    dat = item.nsmz
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nsmz.append(dat_numpy)
# nmdz: Medium zooplankton
    dat = item.nmdz
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nmdz.append(dat_numpy)
# nlgz: Large zooplankton
    dat = item.nlgz
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nlgz.append(dat_numpy)
# nsm: Small phytoplankton
    dat = item.nsm
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nsm.append(dat_numpy)
# nlg: Large phytoplankton
    dat = item.nlg
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nlg.append(dat_numpy)

/home/lindsay/hioekg-2013


Inspecting the elements of file_list indicates that the order of the arrays in my variable lists are as such:

- Jan_NB, Jan_CB, Jan_SB, Feb_NB, Feb_CB, Feb_SB...and so on.

My path forward:
- Loop through each array element in each list
- For elements 0-2, must join array with a Date list of '01-01-2013', 3-5 '02-01-2013'...
- All elements joined with alternating Sector lists of NB, CB, SB...

My approach at this point is to extract elements in groups of 3 and tailor each for loop accordingly.

Then, run this loop for 2014, and for the other variables, and append those lists to this master df. This will make plotting and further manipulation pretty easy.

In [110]:
### Change the selection object each time you change the list...
selection = nmdz

# Creating 12-unit date list, 3-unit sector list, and empty list to store organized values:
date = ['01-01-2013','02-01-2013','03-01-2013','04-01-2013','05-01-2013','06-01-2013','07-01-2013','08-01-2013',
        '09-01-2013','10-01-2013','11-01-2013','12-01-2013']
sector=['NB','CB','SB']
list_for_dataframe=[]

# Iterating sequentially (i) through each array (element) in the list of arrays (selection)
for element in selection:
    for i in range(0,36):
        this_month = date[i//3] # integer division: 0,1,2//3 returns same val (0); 3,4,5 returns same val (1)...
        this_sector = sector[i%3] # this moves faster- every other element, sequence of 3
        for sub_element in element:
            list_for_dataframe.append(
                {'month': this_month, 'sector': this_sector, 'concentration': sub_element})
        
# The loop above produces a list of dicts

Pandas magic: the list of dicts has been organized into a df, with each value entry in a row with its associated keys as column heads.

#### Change group label each time:

In [111]:
df = pd.DataFrame(list_for_dataframe)

# Add group identifier and year column
df['group']='nmdz'
df['year']=2013
print(len(df))
df.head()

2801088


Unnamed: 0,month,sector,concentration,group,year
0,01-01-2013,NB,8.901746e-08,nmdz,2013
1,01-01-2013,NB,8.895691e-08,nmdz,2013
2,01-01-2013,NB,8.855688e-08,nmdz,2013
3,01-01-2013,NB,8.85987e-08,nmdz,2013
4,01-01-2013,NB,8.841269e-08,nmdz,2013


#### Change the name of the file written each time to correspond with the group:

In [112]:
nmdz_df_2013 = df

#### Concatenate all dataframes

In [115]:
all_frames = [nsmz_df_2013,nmdz_df_2013,nlgz_df_2013,nsm_df_2013,nlg_df_2013]

df_2013 = pd.concat(all_frames, axis=0)
df_2013.tail()

Unnamed: 0,month,sector,concentration,group,year
2801083,12-01-2013,SB,7.168995e-08,nlg,2013
2801084,12-01-2013,SB,7.165477e-08,nlg,2013
2801085,12-01-2013,SB,7.163389e-08,nlg,2013
2801086,12-01-2013,SB,7.162012e-08,nlg,2013
2801087,12-01-2013,SB,7.161326e-08,nlg,2013


## Loop through each sector: 2014

In [75]:
# Reset the working directory before running the loops
%pwd
%cd /home/lindsay/hioekg-2014/

# Add master list for the files collected by this first nested loop and group lists
file_list = []
nsmz=[]
nmdz=[]
nlgz=[]
nsm=[]
nlg=[]

for i in range(0,12):
    for j in range(0,3):
        folder = '/home/lindsay/hioekg-2014/' + str(date_2014[i]) + '/'
        os.chdir(folder)
        file = xr.open_dataset(str(sector[j]) + '_all_var_surface_0' + str(day_2014[i]) + '.nc')
        file_list.append(file)      

for item in file_list: 
# nsmz: Small zooplankton
    dat = item.nsmz
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nsmz.append(dat_numpy)
# nmdz: Medium zooplankton
    dat = item.nmdz
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nmdz.append(dat_numpy)
# nlgz: Large zooplankton
    dat = item.nlgz
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nlgz.append(dat_numpy)
# nsm: Small phytoplankton
    dat = item.nsm
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nsm.append(dat_numpy)
# nlg: Large phytoplankton
    dat = item.nlg
    dat_numpy = dat.values
    dat_numpy=dat_numpy[~np.isnan(dat_numpy)]
    nlg.append(dat_numpy)

/home/lindsay/hioekg-2014


In [88]:
### Change the selection object each time you change the list...
selection = nsm

# Creating 12-unit date list, 3-unit sector list, and empty list to store organized values:
date = ['01-01-2014','02-01-2014','03-01-2014','04-01-2014','05-01-2014','06-01-2014','07-01-2014','08-01-2014',
        '09-01-2014','10-01-2014','11-01-2014','12-01-2014']
sector=['NB','CB','SB']
list_for_dataframe=[]

# Iterating sequentially (i) through each array (element) in the list of arrays (selection)
for element in selection:
    for i in range(0,36):
        this_month = date[i//3] # integer division: 0,1,2//3 returns same val (0); 3,4,5 returns same val (1)...
        this_sector = sector[i%3] # this moves faster- every other element, sequence of 3
        for sub_element in element:
            list_for_dataframe.append(
                {'month': this_month, 'sector': this_sector, 'concentration': sub_element})
        
# The loop above produces a list of dicts

#### Change the group identifier each time

In [89]:
df = pd.DataFrame(list_for_dataframe)

# Add group identifier and year column
df['group']='nsm'
df['year']=2014
print(len(df))
df.head()

2801088


Unnamed: 0,month,sector,concentration,group,year
0,01-01-2014,NB,4.616109e-08,nsm,2014
1,01-01-2014,NB,4.620185e-08,nsm,2014
2,01-01-2014,NB,4.733561e-08,nsm,2014
3,01-01-2014,NB,4.70659e-08,nsm,2014
4,01-01-2014,NB,4.72158e-08,nsm,2014


#### Change the name each time to match the group

In [90]:
nsm_df_2014 = df

#### Concatenate all 2014 dataframes

In [93]:
all_frames = [nsmz_df_2014,nmdz_df_2014,nlgz_df_2014,nsm_df_2014,nlg_df_2014]

df_2014 = pd.concat(all_frames, axis=0)

In [117]:
df_2014.head()

Unnamed: 0,month,sector,concentration,group,year
0,01-01-2014,NB,4.837016e-07,nsmz,2014
1,01-01-2014,NB,4.910902e-07,nsmz,2014
2,01-01-2014,NB,4.626904e-07,nsmz,2014
3,01-01-2014,NB,4.782062e-07,nsmz,2014
4,01-01-2014,NB,5.002344e-07,nsmz,2014


#### Concatenate the large year-distinct dfs

In [120]:
yrs = [df_2013,df_2014]
df_all = pd.concat(yrs, axis=0)

#### Write dfs to file

In [123]:
%cd /home/lindsay/hioekg-compare-years/

pd.DataFrame.to_csv(df_all, 'df_all.csv')

/home/lindsay/hioekg-compare-years
