In [None]:
import os
from pathlib import Path
import sys
from datetime import datetime
sys.path.insert(0, "../../..")
sys.path.insert(0, "../../../../acquire")

from HUGS.Processing import search
from HUGS.Client import Process, Search, Retrieve

from Acquire.ObjectStore import datetime_to_string
from Acquire.Client import User, Drive, Service, PAR, Authorisation, StorageCreds

from bqplot import pyplot as plt
from bqplot import DateScale, LinearScale, LogScale, Axis, Lines, Figure, Scatter
from random import randint    

from numpy import random as np_random
import matplotlib.pyplot as plt
import numpy as np
from pandas import read_json as pd_read_json

from ipywidgets import HBox, VBox

In [None]:
%matplotlib notebook

In [None]:
# Autoreload modules before executing code, useful during development
%load_ext autoreload
%autoreload 2

In [None]:
base_url = "https://hugs.acquire-aaai.com/t"
user = User(username="gareth", identity_url=F"{base_url}/identity")
response = user.request_login()

In [None]:
user.wait_for_login()

In [None]:
# Helper function for test file paths
def get_path(filename, data_type="CRDS"):
    dir_path = os.path.abspath("")
    test_data = f"../../../test/data/proc_test_data/{data_type}"
    return os.path.join(dir_path, test_data, filename)

processing = Process(service_url=base_url)

In [None]:
bsd_file = get_path("bsd.picarro.1minute.248m.dat", data_type="CRDS")
hfd_file = get_path("hfd.picarro.1minute.100m_min.dat", data_type="CRDS")
""
result_bsd = processing.process_files(user=user, files=bsd_file, data_type="CRDS", source_name="bsd.picarro.1minute.248m")
result_hfd = processing.process_files(user=user, files=hfd_file, data_type="CRDS", source_name="hfd.picarro.1minute.100m_min")

In [None]:
print(result_bsd, "\n\n", result_hfd)

In [None]:
fourmeg = "/Users/wm19361/Documents/Devel/hugs/raw_data/bilsdale-picarro/results-gcwerks/bsd.picarro.hourly.108m.dat"

In [None]:
result_fourmeg = processing.process_files(user=user, files=fourmeg, data_type="CRDS", source_name="bsd.picarro.hourly.108m")

In [None]:
eight_meg = 8*1024*1024

In [None]:
raw_data  = "/Users/wm19361/Documents/Devel/hugs/raw_data"

In [None]:
filepaths = [f for f in Path(raw_data).glob(f'**/*.dat') if os.stat(f).st_size < eight_meg]

In [None]:
filepaths

In [None]:
size_in_meg = [(p.name, (os.stat(p).st_size)/(1024*1024)) for p in filepaths]

In [None]:
size_in_meg

In [None]:
bsd_picarro5310 = filepaths[4:8]
bsd_picarro5310

In [None]:
for f in bsd_picarro5310:
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)
    

In [None]:
source_name = os.path.splitext(filepaths[.name)[0]
res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
print(res)

In [None]:
rpb = filepaths[9:11]

In [None]:
rpb

In [None]:
for f in rpb:
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)

In [None]:
hfd = filepaths[11:15]

In [None]:
hfd

In [None]:
for f in hfd:
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)

In [None]:
f = hfd[-1]
source_name = os.path.splitext(f.name)[0]
res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
print(res)

In [None]:
tac1 = filepaths[15:18]
tac2 = filepaths[18:21]

In [None]:
tac1

In [None]:
tac2

In [None]:
for f in tac1:    
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)

In [None]:
for f in tac2:    
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)

In [None]:
bsd = filepaths[21:24]

In [None]:
bsd2 = filepaths[24:27]

In [None]:
bsd

In [None]:
bsd2

In [None]:
for f in bsd:    
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)

In [None]:
for f in bsd2:
    source_name = os.path.splitext(f.name)[0]
    res = processing.process_files(user=user, files=str(f.resolve()), data_type="CRDS", source_name=source_name)
    print(res)

## GC data

Will keep it under 10 MB as well

In [None]:
import pathlib

In [None]:
gc_data = "/Users/wm19361/Documents/Devel/hugs/raw_data/GC/data"

In [None]:
precisions = [f for f in pathlib.Path(gc_data).glob(f'**/*.C') if "precisions" in f.name and "-" not in f.name]

In [None]:
precisions

In [None]:
ten_meg = 10*1024*1024
data_precision = []

for p in precisions:
	parent = p.parents[0]
	data_file = p.name.rstrip(".precisions.C")
	data_file = data_file + ".C"
	# Get a proper path
	data_file = pathlib.Path(f"{parent}/{data_file}")
		
	if data_file.is_file() and os.path.getsize(data_file) < ten_meg:
		data_precision.append((str(data_file), str(p)))
	else:
		print(f"{data_file} not found")

In [None]:
len(data_precision)

In [None]:
print(data_precision)

In [None]:
for f in data_precision:
    filename = f[0].split("/")[-1]
    source_name = filename.split(".")[0]
    print(source_name)

In [None]:
second = data_precision[1]
print(second)

In [None]:
%time res = processing.process_files(user=user, files=data_precision, data_type="GC", instrument="GCMD")

In [None]:
step = 5
chunks = [data_precision[x:x+step] for x in range(0, len(data_precision), step)]

In [None]:
len(chunks)

In [None]:
import time
results = []
for c in chunks:
    res = processing.process_files(user=user, files=c, data_type="GC", instrument="GCMD")
    results.append(res)

In [None]:
gc = GC.load()
print(gc.datasources())


In [None]:
gc.clear_datasources()
gc.save()

In [None]:
gc.datasources()

In [None]:
del gc

In [None]:
del gc2

In [None]:
gc = GC.load()

In [None]:
gc.datasources()

Read all the GC data and upload

In [None]:
ten_meg = 10*1024*1024
data_precison = []

for p in precisions:
	parent = p.parents[0]
	data_file = p.name.rstrip(".precisions.C")
	data_file = data_file + ".C"
	# Get a proper path
	data_file = pathlib.Path(f"{parent}/{data_file}")
		
	if data_file.is_file() and os.path.getsize(data_file) < ten_meg:
		data_precision.append((str(data_file), str(p)))
	else:
		print(f"{data_file} not found")