# Loading and accessing data

In [None]:
import sys
sys.path.insert(0, "/work/rt17603/environments/openghg/bin/activate")

from openghg.modules import ObsSurface

In [None]:
import os
os.environ["OPENGHG_PATH"] = "/work/chxmr/objectStore"

### Macehead data

Start off by loading in Macehead ("MHD" or "macehead") data from all different sources. This includes:
 - data_type="GCWERKS", network="AGAGE" (instrument is "GCMD")
 - data_type="GCWERKS", network="AGAGE" (instrument is "GCMS")
 - data_type="GCWERKS", network="AGAGE" (instrument is "medusa")
 - data_type="ICOS", network="ICOS"

In [None]:
import os
import glob
from file_search import find_gc_files

# All MHD data files for GCMD instrument (GC)
mhd_gcmd_tuples = find_gc_files("MHD", "GCMD")

mhd_results_1 = ObsSurface.read_file(filepath=mhd_gcmd_tuples, 
                                    data_type="GCWERKS", 
                                    site="MHD", network="AGAGE")

In [None]:
# All MHD data files for GCMS instrument (GC)
mhd_gcms_tuples = find_gc_files("MHD", "GCMS")

mhd_results_2 = ObsSurface.read_file(filepath=mhd_gcms_tuples, 
                                    data_type="GCWERKS", 
                                    site="MHD", network="AGAGE")

In [None]:
# All MHD data files for medusa instrument (GC)
mhd_medusa_tuples = find_gc_files("MHD", "medusa")

mhd_results_3 = ObsSurface.read_file(filepath=mhd_medusa_tuples, 
                                    data_type="GCWERKS", 
                                    site="MHD", network="AGAGE")

In [None]:
# All MHD data files for ICOS
## At the moment this doesn't appear within the object store - Issue #57
from file_search import find_icos_files
icos_files = find_icos_files("MHD")

mhd_results_4 = ObsSurface.read_file(filepath=icos_files, 
                                    data_type="ICOS", 
                                    site="MHD", network="ICOS")

In [None]:
from openghg.objectstore import visualise_store
visualise_store()

In [None]:
#from openghg.localclient import get_obs_surface
## Should be able to read this from ICOS but doesn't seem to be able to.
#data = get_obs_surface(site="mhd", species="co2", network="ICOS")
#data

In [None]:
from openghg.localclient import get_obs_surface
# Extracting data for one gas "sf6", not specifying network for now
data = get_obs_surface(site="mhd", species="sf6")
data

In [None]:
# Extracting for nearest year start and year end when 
# including start and end dates
data = get_obs_surface(site="mhd", species="ch4",
                       start_date="2005-03-01", end_date="2008-02-01")
print(data.data["time"].values[0])
print(data.data["time"].values[-1])

Trying out searching based on data we know is present (loaded and accessed above).

In [None]:
from openghg.processing import search

In [None]:
# Search output produced includes a dictionary with the uuid
search_output = search(species="ch4", site="mhd"
                        ,inlet="10m")#, instrument="gcmd")
search_output

In [None]:
## Extracting from code base 
#from openghg.modules import Datasource
#obs = ObsSurface.load()
#datasource_uuids = obs.datasources()
#datasources = (Datasource.load(uuid=uuid, shallow=True) for uuid in datasource_uuids)

In [None]:
#d = next(datasources)
#d.metadata()

### Heathfield data

Trying loading different data types for Heathfield ("HFD") data:

- data_type = "GCWERKS", network = "DECC" (instrument is "GCMD")
- data_type = "CRDS", network = "DECC"

In [None]:
# All HFD data files for GCMD instrument (GC), network="DECC"
sitecode = "HFD"

hfd_gcmd_tuples = find_gc_files(sitecode, "GCMD")

hfd_results_1 = ObsSurface.read_file(filepath=hfd_gcmd_tuples, 
                                    data_type="GCWERKS", 
                                    site=sitecode, network="DECC")

In [None]:
# All HFD files for CRDS instrument, network="DECC"
from file_search import find_crds_files

crds_files = find_crds_files(sitecode)

hfd_results_2 = ObsSurface.read_file(filepath=crds_files, 
                                   data_type="CRDS", 
                                   site=sitecode, network="DECC")

In [None]:
visualise_store()

In [None]:
# HFD data loaded for two instruments on the same network (DECC)
# No ranking explicitly set and no height specified this 
# returns the data from "100m"
data = get_obs_surface(site="hfd", species="ch4")
data

In [None]:
# Can correctly extract data from alternative height when specified
data = get_obs_surface(site="hfd", species="ch4", inlet="50m")
print(data.metadata["inlet"])
data = get_obs_surface(site="hfd", species="ch4", inlet="100m")
print(data.metadata["inlet"])

In [None]:
# Can correctly extract data a different species
data = get_obs_surface(site="hfd", species="co2", inlet="50m")
print(data.metadata["species"], data.metadata["inlet"])

In [None]:
# Same issue with dates as for mhd data
data = get_obs_surface(site="hfd", species="ch4", instrument="picarro",
                      start_date="2014-02-01", end_date="2016-02-01")
print(data.data["time"].values[0])
print(data.data["time"].values[-1])