-
Notifications
You must be signed in to change notification settings - Fork 23
/
timberdata.py
178 lines (155 loc) · 4.21 KB
/
timberdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
Wrapper around the CERN logging database command line tool
Usage:
open: Parse and return the output of a query from a file name
load: Parse and return the output of a query from a file object
"""
import os
import time
import gzip
import numpy as np
from .localdate import parsedate, parsedate_myl
def load(fh,sep=',',t1=None,t2=None,debug=False,nmax=1e99,types=(float,float)):
"""Parse the output of the CERN logging database command line tool
Usage:
fh: file handler
sep: separator ',' or '\\t'
t1,t2: time interval read
nmax: maximum number of records per variable
Returns:
A dictionary for which for each variable found data is stored in a tuple
of timestamps list and record list. Data is accesible by the variable name
and a numeric index.
"""
if type(t1) is str:
t1=parsedate(t1)
if type(t2) is str:
t2=parsedate(t2)
data={}
dataon=False
header=True
for l in fh:
if l.startswith('VARIABLE'):
vname=l.split()[1]
count=0
if debug is True:
print('Found var %s' % vname)
if vname in data:
t,v=data[vname]
else:
t,v=[],[]
data[vname]=[t,v]
dataon=False
header=False
elif l.startswith('Timestamp'):
dataon=True
tformat='string'
if 'UNIX Format' in l:
tformat='unix'
elif 'LOCAL_TIME' in l:
tformat='local'
elif 'UTC_TIME' in l:
tformat='utc'
elif l=='\n':
dataon=False
elif dataon:
ll=l.strip().split(sep)
if tformat=='unix':
trec=float(ll[0])/1000.
elif tformat=='utc':
trec=parsedate_myl(ll[0]+' UTC')
else:
trec=parsedate_myl(ll[0])
if (t1 is None or trec>=t1) and (t2 is None or trec<=t2) and count<nmax:
vrec=ll[1:]
t.append(trec)
v.append(vrec)
count+=1
elif header:
if debug is True:
print(l)
log.append(l)
if types is not None:
ttype,vtype=types
data=combine_data(data,vtype=vtype,ttype=ttype)
return data
def combine_data(data,vtype=float,ttype=float):
"""Combine and change data type"""
for k in data.keys():
t,v=data[k]
outv=[]
outt=[]
for tt,vv in zip(t,v):
try:
vvv=np.array(vv,dtype=vtype)
outv.append(vvv)
outt.append(tt)
except ValueError:
print("Warning %s at %s not converted"%(repr(vv),tt))
pass
t=np.array(outt,dtype=ttype)
v=np.array(outv)
if v.shape[-1]==1:
v=v.reshape(v.shape[0])
data[k]=[t,v]
return data
def open(fn,sep=None,t1=None,t2=None,nmax=1e99,debug=False,
types=(float,float)):
"""Load output of the CERN measurement database query from a filename
Usage: open("test.tsv")
fn: filename
sep: separator type
Separator is inferred from the file extension as well
"""
if sep is None:
if fn.endswith('tsv') or fn.endswith('tsv.gz'):
sep='\t'
else:
sep=','
if fn.endswith('.gz'):
fh=gzip.open(fn)
else:
fh=file(fn)
data=load(fh,sep=sep,debug=debug,t1=t1,t2=t2,nmax=nmax,types=types)
fh.close()
return data
def openfnames(fnames,sep=None,t1=None,t2=None,nmax=1e99,debug=False):
"""Open a set of filenames"""
mask=fnames[0]
if sep is None:
if mask.endswith('tsv') or mask.endswith('tsv.gz'):
sep='\t'
else:
sep=','
data=load(_icat(fnames),sep=sep,debug=debug,t1=t1,t2=t2,nmax=nmax)
return data
def _icat(fnames):
for fn in fnames:
if fn.endswith('.gz'):
for l in gzip.open(fn):
yield l
else:
for l in file(fn):
yield l
def pprint(data):
"""Pretty print data, last dimension is from the first record"""
print("CERN DB data:")
for k in data.keys():
t,v=data[k]
recl=set()
for ii,vv in enumerate(v):
recl.add(len(vv))
recl=' or '.join([str(i) for i in recl])
print(" ['%s'][1] => v[%d][%s]" %( k,len(t),recl))
def merge_out(fnames):
data_final={}
for vname in data_final.keys():
data_final[vname]=([],[])
for fn in fnames:
data=load(fn)
for vname in data.keys():
t,v=data[vname]
nt=data_final[vname][0].extend(t)
nv=data_final[vname][1].extend(v)
data_final['log'].extend(data['log'])
return data_final