# Notebook for exploration of the EEL datafile


In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option("display.precision", 4)

## 1. Load data into Dataframe

In [30]:
pathdir = '../data/raw'

# List available data files
import glob
listfiles= glob.glob(pathdir+'/'+'*.csv');
print(*listfiles, sep = "\n")


../data/raw/EELCTDandLADCP_refpos_origCTD.csv
../data/raw/EELCTDandLADCP_refdate.csv
../data/raw/EELCTDandLADCP_2Dfield.csv
../data/raw/EELCTDandLADCP_refpos_gvel.csv
../data/raw/EELCTDandLADCP_3Dfield.csv


#### Import 3D Fields

In [31]:
file1 = pathdir+'/'+'EELCTDandLADCP_3Dfield.csv'
df3D = pd.read_csv(file1,sep=',', index_col=None, 
                     header=0)
print(df3D)


      CruiseID Staname    Refdist  Depth       CT       SA   Sigma0  \
0       d22396    13G+  1186.1527     25  12.9802  35.3276  26.5186   
1       d22396    13G+  1186.1527     35  12.9572  35.3273  26.5229   
2       d22396    13G+  1186.1527     45  12.7418  35.3160  26.5563   
3       d22396    13G+  1186.1527     55  12.3321  35.3116  26.6325   
4       d22396    13G+  1186.1527     65  11.9225  35.3072  26.7087   
...        ...     ...        ...    ...      ...      ...      ...   
63894    dy078  IB22S+     6.7210     65   7.8572  35.2077  27.3261   
63895    dy078  IB22S+     6.7210     75   7.7614  35.2106  27.3424   
63896    dy078  IB22S+     6.7210     85   7.7387  35.2176  27.3512   
63897    dy078  IB22S+     6.7210     95   7.7221  35.2191  27.3548   
63898    dy078  IB22S+     6.7210    105   7.7093  35.2224  27.3593   

             Vrel  Vladcp    Vabs  Vladcpalong  
0      7.1146e-02 -0.0274  0.0317      -0.1860  
1      7.1017e-02 -0.0061  0.0316      -0.1952  


#### Import Metadata

In [32]:
# Date of each cruise
file2 = pathdir+'/'+'EELCTDandLADCP_refdate.csv'
dfdate = pd.read_csv(file2,sep=',', index_col=None, 
                     header=0)
print(dfdate)

   CruiseID  Year  Month
0    d22396  1996     10
1    d23097  1997      9
2    d23398  1998      5
3    d24299  1999      9
4    d24500  2000      2
5    d25301  2001     12
6   cd17605  2005     10
7    d31206  2006     10
8    d32107  2007      8
9    d34009  2009      6
10   d35110  2010      5
11   d36511  2011      5
12    jc086  2013      5
13    jr302  2014      7
14    dy031  2015      6
15    dy052  2016      6
16    dy078  2017      5


In [33]:
# Location of EEL stations
file3 = pathdir+'/'+'EELCTDandLADCP_refpos_gvel.csv'
dfloc = pd.read_csv(file3,sep=',', index_col=None, 
                     header=0)
print(dfloc)

   Staname    Refdist  RefLonGV  RefLatGV  RefDepthGV
0      1G+  1302.8994   -6.2080   56.6750       110.0
1      2G+  1295.3205   -6.3250   56.6955        50.0
2      3G+  1289.5191   -6.4085   56.7205        92.5
3      4G+  1282.0609   -6.5250   56.7330        97.5
4      5G+  1272.9178   -6.6750   56.7330        60.0
..     ...        ...       ...       ...         ...
63  IB18S+   103.0651  -19.7510   62.5000      1730.0
64  IB19S+    69.8213  -19.6095   62.7920      1535.0
65  IB20S+    40.4960  -19.7335   63.0250      1215.0
66  IB21S+    19.4086  -19.9915   63.1745       850.0
67  IB22S+     6.7210  -20.1410   63.2665       395.0

[68 rows x 5 columns]


## 2. Create Pivot Tables

In [27]:
df3D.groupby(['Staname','Depth'])["Vabs"].mean()

Staname  Depth
13G+     15      -0.0422
         25      -0.0056
         35      -0.0059
         45      -0.0054
         55      -0.0055
                   ...  
T+       65      -0.0187
         75      -0.0221
         85      -0.0605
         95      -0.0624
         105     -0.0635
Name: Vabs, Length: 6517, dtype: float64

In [28]:
df_MVabs = df3D.pivot_table(values="Vabs", index="Depth", columns="Staname")
print(df_MVabs)

Staname    13G+    14G+    15G+     8G+     9G+      A+      B+      C+  \
Depth                                                                     
5           NaN     NaN     NaN     NaN     NaN -0.0604  0.0985  0.0729   
15      -0.0422 -0.0383 -0.0582  0.0468 -0.0220 -0.0255 -0.0010  0.0246   
25      -0.0056  0.0195  0.0156  0.0489 -0.0169 -0.0283 -0.0042  0.0409   
35      -0.0059  0.0191  0.0125  0.0472 -0.0085 -0.0282 -0.0049  0.0399   
45      -0.0054  0.0194  0.0111  0.0418 -0.0221 -0.0289 -0.0115  0.0441   
...         ...     ...     ...     ...     ...     ...     ...     ...   
2645        NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
2655        NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
2665        NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
2675        NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   
2685        NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   

Staname      D+      E+ 

In [34]:
# Reorganise the columns of the pivot table according to the distance of the station on the EEL section
# Use dfloc to index the columns
list(df_MVabs.columns) 

['13G+',
 '14G+',
 '15G+',
 '8G+',
 '9G+',
 'A+',
 'B+',
 'C+',
 'D+',
 'E+',
 'F+',
 'G+',
 'H+',
 'I+',
 'IB1+',
 'IB10+',
 'IB11+',
 'IB11A+',
 'IB12+',
 'IB12A+',
 'IB13+',
 'IB13A+',
 'IB14+',
 'IB15+',
 'IB16+',
 'IB16A+',
 'IB17+',
 'IB18S+',
 'IB19S+',
 'IB1A+',
 'IB2+',
 'IB20S+',
 'IB21S+',
 'IB22S+',
 'IB2A+',
 'IB3+',
 'IB3A+',
 'IB4+',
 'IB4A+',
 'IB4B+',
 'IB4C+',
 'IB5+',
 'IB6+',
 'IB7+',
 'IB8+',
 'IB9+',
 'J+',
 'K+',
 'L+',
 'M+',
 'N+',
 'O+',
 'P+',
 'Q+',
 'Q1+',
 'R+',
 'S+',
 'T+']