In [1]:
import pandas as pd

## Metzyme Data Cleaning & Merging

In [24]:
# Import CSV file as Pandas Dataframes
HgD = pd.read_csv('../Data/Metzyme/met_Hg.csv')
CTD = pd.read_csv('../Data/Metzyme/met_CTD.csv')

In [25]:
HgD.head()

Unnamed: 0,Station,long_E,lat_N,Depth_m,Temp_C,SALINITY_PSS78,Oxygen_umol_kg,PO4_umol_L,Silicate_umol_L,nitrate_umol_L,nitrite_umol_L,HgT_pM,Hg0_pM,DMHg_fM,MMHg_fM
0,1,206,17,20,25.72,34.96,217.0,0.23,1.09,0.05,0.03,0.45,0.1,,
1,1,206,17,40,25.71,34.97,218.03,0.24,1.28,0.07,0.01,0.43,0.09,19.31,
2,1,206,17,60,25.02,34.9,223.91,0.23,1.08,0.01,0.01,0.81,0.08,21.85,
3,1,206,17,85,22.81,34.9,229.49,0.19,1.07,0.06,0.02,0.28,0.12,22.05,
4,1,206,17,120,20.96,34.98,214.31,0.25,3.42,1.47,0.08,1.0,0.17,38.04,


In [26]:
HgD.dtypes

Station              int64
long_E               int64
lat_N                int64
Depth_m              int64
Temp_C             float64
SALINITY_PSS78     float64
Oxygen_umol_kg     float64
PO4_umol_L         float64
Silicate_umol_L    float64
nitrate_umol_L     float64
nitrite_umol_L     float64
HgT_pM             float64
Hg0_pM             float64
DMHg_fM            float64
MMHg_fM            float64
dtype: object

In [27]:
CTD.head()

Unnamed: 0,Station,Latitude,Longitude,DEPTH,FluoroCh,Xmiss
0,1,17,206,1,nd,nd
1,1,17,206,2,0.02,nd
2,1,17,206,3,0.00,nd
3,1,17,206,4,0.01,nd
4,1,17,206,5,0.01,98.48


In [28]:
CTD.dtypes

Station       int64
Latitude      int64
Longitude     int64
DEPTH         int64
FluoroCh     object
Xmiss        object
dtype: object

In [29]:
# Update column names for merge
CTD.rename(columns = {'DEPTH':'Depth_m'}, inplace = True)
CTD.head()

Unnamed: 0,Station,Latitude,Longitude,Depth_m,FluoroCh,Xmiss
0,1,17,206,1,nd,nd
1,1,17,206,2,0.02,nd
2,1,17,206,3,0.00,nd
3,1,17,206,4,0.01,nd
4,1,17,206,5,0.01,98.48


In [30]:
HgD.shape

(233, 15)

In [31]:
# Merge HgD with CTD
Met_HgD = HgD.merge(CTD, how='left', on=['Station', 'Depth_m'])
Met_HgD.head()

Unnamed: 0,Station,long_E,lat_N,Depth_m,Temp_C,SALINITY_PSS78,Oxygen_umol_kg,PO4_umol_L,Silicate_umol_L,nitrate_umol_L,nitrite_umol_L,HgT_pM,Hg0_pM,DMHg_fM,MMHg_fM,Latitude,Longitude,FluoroCh,Xmiss
0,1,206,17,20,25.72,34.96,217.0,0.23,1.09,0.05,0.03,0.45,0.1,,,17.0,206.0,0.03,98.57
1,1,206,17,40,25.71,34.97,218.03,0.24,1.28,0.07,0.01,0.43,0.09,19.31,,17.0,206.0,0.02,98.57
2,1,206,17,60,25.02,34.9,223.91,0.23,1.08,0.01,0.01,0.81,0.08,21.85,,17.0,206.0,0.06,98.37
3,1,206,17,85,22.81,34.9,229.49,0.19,1.07,0.06,0.02,0.28,0.12,22.05,,17.0,206.0,0.1,98.48
4,1,206,17,120,20.96,34.98,214.31,0.25,3.42,1.47,0.08,1.0,0.17,38.04,,17.0,206.0,0.77,98.44


In [32]:
Met_HgD.shape

(233, 19)

In [33]:
# Remove duplicate depths in GT16_HgD dataframe
Met_HgD = Met_HgD.groupby(by=['Station', 'Depth_m']).mean()
Met_HgD.shape

(233, 15)

In [34]:
Met_HgD.columns

Index(['long_E', 'lat_N', 'Temp_C', 'SALINITY_PSS78', 'Oxygen_umol_kg',
       'PO4_umol_L', 'Silicate_umol_L', 'nitrate_umol_L', 'nitrite_umol_L',
       'HgT_pM', 'Hg0_pM', 'DMHg_fM', 'MMHg_fM', 'Latitude', 'Longitude'],
      dtype='object')

In [35]:
# Remove unneeded columns

Met_HgD.drop(columns=['Latitude', 'Longitude'], inplace=True)

Met_HgD.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,long_E,lat_N,Temp_C,SALINITY_PSS78,Oxygen_umol_kg,PO4_umol_L,Silicate_umol_L,nitrate_umol_L,nitrite_umol_L,HgT_pM,Hg0_pM,DMHg_fM,MMHg_fM
Station,Depth_m,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,20,206,17,25.72,34.96,217.0,0.23,1.09,0.05,0.03,0.45,0.1,,
1,40,206,17,25.71,34.97,218.03,0.24,1.28,0.07,0.01,0.43,0.09,19.31,
1,60,206,17,25.02,34.9,223.91,0.23,1.08,0.01,0.01,0.81,0.08,21.85,
1,85,206,17,22.81,34.9,229.49,0.19,1.07,0.06,0.02,0.28,0.12,22.05,
1,120,206,17,20.96,34.98,214.31,0.25,3.42,1.47,0.08,1.0,0.17,38.04,


In [36]:
# Export 
Met_HgD.to_csv('../Data/Metzyme/Met_HgD_cleaned.csv')