In [None]:
import pathlib
import lzma
import re
import datetime

import numpy as np
import pandas as pd

In [None]:
root = pathlib.Path(r'\\physics-server\iComLogFiles')
compressed_files = sorted(list(root.glob('compressed/*.xz')))
compressed_files

In [None]:
data = b""

for path in compressed_files:
    with lzma.open(path, 'r') as f:
        data += f.read()

In [None]:
pattern = re.compile(b'\d\d\d\d-\d\d-\d\d\d\d:\d\d:\d\d.')
date_index = [m.span() for m in pattern.finditer(data)]

In [None]:
data[slice(*date_index[0])]

In [None]:
results = [data[slice(*span)] for span in date_index]

counter = [result[-1] for result in results]
counter_diff = np.diff(counter)
np.unique(counter_diff)

In [None]:
np.array(counter)[np.where(counter_diff == 2)[0]]

In [None]:
b'\x09'

In [None]:
times = pd.to_datetime([datetime.datetime.strptime(result[:-1].decode(), '%Y-%m-%d%H:%M:%S') for result in results]).array

In [None]:
np.unique(np.diff(times).astype('timedelta64[s]'))

In [None]:
len(control_points)

In [None]:
len(times)

In [None]:
len(times)*2

In [None]:
data[0:50]

In [None]:
start_points = [
    span[0] - 8 for span in date_index
]

end_points = start_points[1::] + [None]

data_points = [data[start:end] for start, end in zip(start_points, end_points)]

In [None]:
np.max([len(data_point) for data_point in data_points])

In [None]:
def initial_results_parse(data_point):
    pattern = re.compile(b'\x00\x00\x00([a-zA-Z0-9 \.-]+)')

    results = pattern.findall(data_point)
    results = np.array(results).astype(str)
    
    return results


def pull_header(tag, length, results_dict, results_scrape):
    index = np.where(results_scrape == tag)[0]

    for i, ref in enumerate(index):
        asymx = results_scrape[ref+1:ref+length+1]
        asymx = np.array(asymx).astype(float)
        asymx[(asymx == -32767) | (asymx == 32767)] = None
        results_dict["{}-{}".format(tag, i)] = asymx

    for ref in index[-1::-1]:
        results_scrape = np.delete(results_scrape, np.arange(ref,ref+length+1))
        
    return results_dict, results_scrape


def organise_by_tags(results):
    results_dict = dict()
    results_scrape = results.copy()

    results_dict, results_scrape = pull_header('ASYMX', 2, results_dict, results_scrape)
    results_dict, results_scrape = pull_header('ASYMY', 2, results_dict, results_scrape)
    results_dict, results_scrape = pull_header('MLCX', 160, results_dict, results_scrape)

    pattern = re.compile('[a-zA-Z][a-zA-Z0-9 -]+')

    alpha_numeric = np.array([
        pattern.match(value)
        for value in results_scrape
    ]).astype(bool)
    results_dict["Text-tags"] = results_scrape[alpha_numeric].tolist()
    results_scrape = np.delete(results_scrape, np.where(alpha_numeric)[0])

    left_overs = results_scrape.astype(str)
    left_overs[(left_overs == '-32767') | (left_overs == '32767')] = None

    results_dict["Left-overs"] = left_overs.tolist()
    
    return results_dict


def convert(data_point):
    results = initial_results_parse(data_point)
    results_dict = organise_by_tags(results)
    
    return results_dict

In [None]:
convert(data_points[10000])