# Getting model "cast" files into one dataframe with depth matched to observations

In [31]:
import xarray as xr
import numpy as np
import pandas as pd
import gsw
from pathlib import Path

import warnings
warnings.filterwarnings("ignore")

In [32]:
def nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

In [33]:
path = Path("/data1/bbeutel/LO_output/extract_cast/")

In [34]:
def read_dfo_ctd(year):    

    dfo = pd.read_pickle('/data1/parker/LO_output/obs/dfo1/ctd/'+str(year)+'.p')

    dfo['h'] = np.nan
    dfo['z_model'] = np.nan
    dfo['PT_model'] = np.nan
    dfo['SA_model'] = np.nan
    dfo['CT_model'] = np.nan
    dfo['DO_model'] = np.nan
    cid = np.arange(0,int(np.max(dfo.cid))+1,1)

    files = [sorted(path.glob("dfo1_ctd_{}/{}.nc".format(year,str(cid[i])))) for i in range(len(cid))]

    for i in range(len(files)):
        if len(files[i]) == 1:
            # print(i)
            cast = xr.open_dataset(files[i][0])

            dfo.h[dfo.cid == i] = cast.h
            dfo.z_model[dfo.cid == i] = np.array([nearest(cast.h*cast.s_rho,dfo.z[dfo.cid == i][j]) 
                                                        for j in np.array(dfo[dfo.cid == i].index)])
            dfo.PT_model[dfo.cid == i] = np.array([cast.temp[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])
            dfo.SA_model[dfo.cid == i] = np.array([cast.salt[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])
            dfo.DO_model[dfo.cid == i] = np.array([cast.oxygen[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])


    dfo.CT_model = gsw.CT_from_pt(dfo.SA_model, dfo.PT_model)

    name = "dfo_ctd_"+str(year)+".csv"
    dfo.to_csv(name)

In [35]:
# read_dfo_ctd(2018)

In [36]:
def read_dfo_bottle(year):    

    dfo = pd.read_pickle('/data1/parker/LO_output/obs/dfo1/bottle/'+str(year)+'.p').drop(axis=1,labels=['name','cruise']).dropna()

    dfo['h'] = np.nan
    dfo['z_model'] = np.nan
    dfo['PT_model'] = np.nan
    dfo['SA_model'] = np.nan
    dfo['CT_model'] = np.nan
    dfo['DO_model'] = np.nan
    dfo['NO3_model'] = np.nan

    cid = np.arange(0,int(np.max(dfo.cid))+1,1)

    files = [sorted(path.glob("dfo1_bottle_{}/{}.nc".format(year,str(cid[i])))) for i in range(len(cid))]

    for i in range(len(files)):
        if len(files[i]) == 1:
            cast = xr.open_dataset(files[i][0])


            dfo.h[dfo.cid == i] = cast.h
            dfo.z_model[dfo.cid == i] = np.array([nearest(cast.h*cast.s_rho,dfo.z[dfo.cid == i][j]) 
                                                        for j in np.array(dfo[dfo.cid == i].index)])
            dfo.PT_model[dfo.cid == i] = np.array([cast.temp[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])
            dfo.SA_model[dfo.cid == i] = np.array([cast.salt[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])
            dfo.DO_model[dfo.cid == i] = np.array([cast.oxygen[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])
            dfo.NO3_model[dfo.cid == i] = np.array([cast.NO3[int(dfo.z_model[j])] for j in np.array(dfo[dfo.cid == i].index)])
            # dfo.Chl_model[dfo.cid == i] = np.array([cast.oxygen[int(dfo.z_model)]])
                    # dont really know what to do with Chl yet but might as well take it


    dfo.CT_model = gsw.CT_from_pt(dfo.SA_model, dfo.PT_model)

    name = "dfo_botte_"+str(year)+".csv"
    dfo.to_csv(name)

In [41]:
def read_ncei(CoastalSalish, year):    

    d = pd.read_pickle('/data1/parker/LO_output/obs/ncei'+CoastalSalish+'/bottle/'+str(year)+'.p').drop(axis=1,labels=['name','cruise'])#.dropna()
    d['h'] = np.nan
    d['z_model'] = np.nan
    d['PT_model'] = np.nan
    d['SA_model'] = np.nan
    d['CT_model'] = np.nan
    d['DO_model'] = np.nan
    d['NO3_model'] = np.nan
    d['TIC_model'] = np.nan
    d['TA_model'] = np.nan


    cid = np.arange(0,int(np.max(d.cid))+1,1)

    files = [sorted(path.glob("ncei"+CoastalSalish+"_bottle_{}/{}.nc".format(year,str(cid[i])))) for i in range(len(cid))]

    for i in range(len(files)):
        if len(files[i]) == 1:
            # print(files[i][0])
            cast = xr.open_dataset(files[i][0])


            d.h[d.cid == i] = cast.h
            d.z_model[d.cid == i] = np.array([nearest(cast.h*cast.s_rho,d.z[d.cid == i][j]) 
                                                        for j in np.array(d[d.cid == i].index)])
            d.PT_model[d.cid == i] = np.array([cast.temp[int(d.z_model[j])] for j in np.array(d[d.cid == i].index)])
            d.SA_model[d.cid == i] = np.array([cast.salt[int(d.z_model[j])] for j in np.array(d[d.cid == i].index)])
            d.DO_model[d.cid == i] = np.array([cast.oxygen[int(d.z_model[j])] for j in np.array(d[d.cid == i].index)])
            d.NO3_model[d.cid == i] = np.array([cast.NO3[int(d.z_model[j])] for j in np.array(d[d.cid == i].index)])
            d.TIC_model[d.cid == i] = np.array([cast.TIC[int(d.z_model[j])] for j in np.array(d[d.cid == i].index)])
            d.TA_model[d.cid == i] = np.array([cast.alkalinity[int(d.z_model[j])] for j in np.array(d[d.cid == i].index)])

    d.CT_model = gsw.CT_from_pt(d.SA_model, d.PT_model)

    name = "ncei"+CoastalSalish+"_"+str(year)+".csv"
    d.to_csv(name)

In [44]:
read_ncei('Salish', 2017)


In [45]:
read_ncei('Salish', 2018)
