# Notebook for exploration of the EEL data at specific depth lvl


In [171]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option("display.precision", 4)

## 1. Load data into Dataframe

In [172]:
pathdir = '../data/raw'

# List available data files
import glob
listfiles= glob.glob(pathdir+'/'+'*.csv');
print(*listfiles, sep = "\n")


../data/raw/EELCTDandLADCP_refpos_origCTD.csv
../data/raw/EELCTDandLADCP_refdate.csv
../data/raw/EELCTDandLADCP_2Dfield.csv
../data/raw/EELCTDandLADCP_refpos_gvel.csv
../data/raw/EELCTDandLADCP_3Dfield.csv


#### Import 3D Fields

In [173]:
file1 = pathdir+'/'+'EELCTDandLADCP_3Dfield.csv'
df3D = pd.read_csv(file1,sep=',', index_col=None, 
                     header=0)
df3D


Unnamed: 0,CruiseID,Staname,Refdist,Depth,CT,SA,Sigma0,Vrel,Vladcp,Vabs,Vladcpalong
0,d22396,13G+,1186.1527,25,12.9802,35.3276,26.5186,7.1146e-02,-0.0274,0.0317,-0.1860
1,d22396,13G+,1186.1527,35,12.9572,35.3273,26.5229,7.1017e-02,-0.0061,0.0316,-0.1952
2,d22396,13G+,1186.1527,45,12.7418,35.3160,26.5563,7.1887e-02,-0.0021,0.0324,-0.1987
3,d22396,13G+,1186.1527,55,12.3321,35.3116,26.6325,7.0745e-02,0.0071,0.0313,-0.2105
4,d22396,13G+,1186.1527,65,11.9225,35.3072,26.7087,6.4738e-02,0.0279,0.0253,-0.2432
...,...,...,...,...,...,...,...,...,...,...,...
63894,dy078,IB22S+,6.7210,65,7.8572,35.2077,27.3261,-6.5029e-06,0.0393,0.0065,-0.0091
63895,dy078,IB22S+,6.7210,75,7.7614,35.2106,27.3424,2.5855e-04,0.0483,0.0067,-0.0345
63896,dy078,IB22S+,6.7210,85,7.7387,35.2176,27.3512,-3.9223e-05,0.0504,0.0064,-0.0517
63897,dy078,IB22S+,6.7210,95,7.7221,35.2191,27.3548,-1.2993e-04,0.0437,0.0063,-0.0619


#### Import Metadata

In [174]:
# Date of each cruise
file2 = pathdir+'/'+'EELCTDandLADCP_refdate.csv'
dfdate = pd.read_csv(file2,sep=',', index_col=None, 
                     header=0)
print(dfdate)

   CruiseID  Year  Month
0    d22396  1996     10
1    d23097  1997      9
2    d23398  1998      5
3    d24299  1999      9
4    d24500  2000      2
5    d25301  2001     12
6   cd17605  2005     10
7    d31206  2006     10
8    d32107  2007      8
9    d34009  2009      6
10   d35110  2010      5
11   d36511  2011      5
12    jc086  2013      5
13    jr302  2014      7
14    dy031  2015      6
15    dy052  2016      6
16    dy078  2017      5


In [253]:
# Location of EEL stations
file3 = pathdir+'/'+'EELCTDandLADCP_refpos_gvel.csv'
dfloc = pd.read_csv(file3,sep=',', index_col=None, 
                     header=0)
#print(dfloc.info())

# Make sure the station name are sorted by their distance along the section
sdfloc = dfloc.sort_values('Refdist', ascending=True)
print(f"\n {sdfloc.iloc[:,:2]}")


    Staname    Refdist
67  IB22S+     6.7210
66  IB21S+    19.4086
65  IB20S+    40.4960
64  IB19S+    69.8213
63  IB18S+   103.0651
..     ...        ...
4      5G+  1272.9178
3      4G+  1282.0609
2      3G+  1289.5191
1      2G+  1295.3205
0      1G+  1302.8994

[68 rows x 2 columns]


<br><br>
## 2. Create Pivot Tables of Absolute geostrophic velocities at specific depth

#### Create several pivot tables for different depths and store them in a dictionary

In [254]:
# Depth of interest
zlist= [105, 505, 805, 2505]

In [264]:
# Create list of empty dictionary
Vabsbyz = []
# Create list of empty dictionary
for k in zlist:
    df = df3D[df3D['Depth']==zlvl].pivot(values="Vabs", index="CruiseID", columns="Staname")
    Vabsbyz.append({'Depth':str(k),'Data':df})
    
print(f" Dataframe for {Vabsbyz[0]['Depth']}m depth:")
Vabsbyz[0]['Data']

 Dataframe for 105m depth:


Staname,13G+,14G+,15G+,8G+,9G+,A+,B+,C+,D+,E+,...,L+,M+,N+,O+,P+,Q+,Q1+,R+,S+,T+
CruiseID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
cd17605,-0.0377,-0.0339,,0.054,,,,-0.046378,-0.0216,0.0881,...,0.1245,0.1066,0.1078,-0.112,0.0438,0.2073,0.0864,0.2576,,
d22396,,,,,,,,,0.0957,0.0548,...,-0.0689,-0.1537,-0.0254,0.0872,0.063,0.186,0.124,,,
d23097,,,,,,,,,,0.0249,...,,,-0.0672,-0.0173,0.0366,0.0253,-0.0141,,,
d23398,,,,,,,,,,-0.1134,...,,,-0.0589,-0.0826,0.0383,0.2685,0.2053,,,
d24299,,,,,,-0.0271,-0.0026,0.038209,0.271,0.1033,...,0.0848,-0.0522,-0.2234,-0.0325,0.0696,0.1674,0.1733,,,
d24500,,,,,,,,,,,...,0.0129,-0.0005,-0.2257,-0.1162,0.1562,0.104,0.1612,,,
d31206,,,-0.1695,,,-0.0763,0.076,-0.0091013,-0.1178,-0.0694,...,0.0647,0.0223,0.0588,0.0177,0.0282,0.1458,0.1092,0.0464,-0.1203,-0.0726
d34009,,,-0.2231,,,,,0.0039657,-0.0851,-0.3875,...,-0.1386,-0.1703,-0.0967,-0.0838,0.034,0.1762,0.0887,0.0947,-0.1349,-0.1587
d35110,,,-0.0828,,0.0863,,,0.17294,0.0832,-0.1175,...,-0.3672,-0.357,-0.1981,0.2296,0.126,0.1067,0.0941,0.0445,-0.0598,-0.0637
d36511,,,,,,,,,-0.3064,,...,-0.2986,-0.3662,0.0932,0.3095,0.1732,0.1287,0.0704,0.0991,-0.0239,


\
##### Add the table *df_DA* calculated from the depth average to the list:

In [256]:
# Create Pivot Table average over depth
df_DA = df3D.pivot_table(values="Vabs", index="CruiseID", columns="Staname")

Vabsbyz.append({'Depth': 'Mean', 'Data': df_DA})
print(len(Vabsbyz))

5


<br><br>  
## 3. Sort the dataframe 
### 3.a. Sort the rows according to year of the cruise
##### Merge the dataframe with the dataframe *dfdate* which link each cruise ID to a year and a month

In [257]:
for k,c in enumerate(Vabsbyz):
    df = Vabsbyz[k]['Data']
    dfnew=pd.merge(dfdate,
                    df,
                    how='left',
                    on='CruiseID')
    Vabsbyz[k]['Data'] = dfnew

# # Visual check
# print(f"First 3 columns of original dataframe for {Vabsbyz[k]['Depth']} depth :\n {df.iloc[:,:3]}")
# print(f"\n and for the new merged dataframe:\n {dfnew.iloc[:,:6]}")

<br><br>
### 3.b. Sort the columns according to location of the station on the reference section *list2*

In [259]:
# importing "copy" for copy operations 
import copy 

list2 = list(sdfloc.Staname)

# Copy list so the new list is independant (In Python, Assignment statements do not copy objects)
Vabsbyz_sort = copy.deepcopy(Vabsbyz)

# Sorty the Dataframe element in the list of dictionary
for k,c in enumerate(Vabsbyz_sort):
    df = Vabsbyz_sort[k]['Data']
    
    # List of the station name from the dataframe, excluding the first 3 columns
    list1 = list(df.columns[3:])  
    
    # Using list comprehension and the enumerate() function to sort list1 elements according to list2 order:
    # Take into consideration that the station column index starts only after columns 3 in df
    isort = [c+3 for xref in list2 for c,values in enumerate(list1,0) if values == xref] 
    
    isort2 = [0,1,2] + isort
    
    # Sort the order of the column station name in dataframe
    Vabsbyz_sort[k]['Data']= df.iloc[:,isort2]
    
# print(Vabsbyz[k]['Data'])
# print(Vabsbyz_sort[k]['Data'])

\
Display first columns of original and new list:

In [260]:
print(Vabsbyz[0]['Data'].iloc[:,:5])
print("\n")
print(Vabsbyz_sort[0]['Data'].iloc[:,:5])

   CruiseID  Year  Month    13G+    14G+
0    d22396  1996     10     NaN     NaN
1    d23097  1997      9     NaN     NaN
2    d23398  1998      5     NaN     NaN
3    d24299  1999      9     NaN     NaN
4    d24500  2000      2     NaN     NaN
5    d25301  2001     12     NaN     NaN
6   cd17605  2005     10 -0.0377 -0.0339
7    d31206  2006     10     NaN     NaN
8    d32107  2007      8     NaN     NaN
9    d34009  2009      6     NaN     NaN
10   d35110  2010      5     NaN     NaN
11   d36511  2011      5     NaN     NaN
12    jc086  2013      5     NaN     NaN
13    jr302  2014      7     NaN     NaN
14    dy031  2015      6     NaN     NaN
15    dy052  2016      6     NaN     NaN
16    dy078  2017      5     NaN     NaN


   CruiseID  Year  Month  IB22S+  IB21S+
0    d22396  1996     10     NaN     NaN
1    d23097  1997      9     NaN     NaN
2    d23398  1998      5     NaN     NaN
3    d24299  1999      9     NaN     NaN
4    d24500  2000      2     NaN     NaN
5    d25301  2

\
### Save temporary data

In [262]:
import pickle
pathdata = "../data/interim/"
filename = pathdata + "Zlvl_Vabs"
with open(filename, 'wb') as f:
    pickle.dump(Vabsbyz_sort, f)