-
Notifications
You must be signed in to change notification settings - Fork 12
/
mseedpull.py
273 lines (234 loc) · 9.45 KB
/
mseedpull.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# OOI Conversion script for continious streaming audio
# The OOI website doesn't live post so this script continually
# looks for old files (based on delay) and then converts them
# to WAV files, which could then be posted to Orcasound website
# via FFMPEG and other scripts
#
# The code below is based on some code from Val Veirs,
# Elijah Blaisdell, Scott Veirs and Valentina Staneva from Democracy Lab
# Hackathon, Jan 9, 2021.
#
#
#
"""
!wget 'https://rawdata.oceanobservatories.org/files/RS01SBPS/PC01A/08-HYDBBA103/2021/01/09/OO-HYVM2--YDH-2021-01-09T00:15:00.000015.mseed'
"""
from obspy import read
import requests
from html.parser import HTMLParser
import time
from datetime import datetime, timedelta
import os
import shutil
import dateutil.parser
import logging
import logging.handlers
import sys
DELAY = int(os.environ["STREAM_DELAY"])
# DELAY = 6.5
SEGMENT = int(os.environ["DELAY_SEGMENT"]) # maybe change to "buffer"
# SEGMENT = 1
# TODO Should put this in env variable
#BASE_URL = os.environ["BASE_URL"]
BASE_URL = 'https://rawdata.oceanobservatories.org/files/RS01SBPS/PC01A/08-HYDBBA103/'
# Format of date in filename is ISO 8601 extended format
# To parse the start time of the file
# import dateutil.parser
# >>> dateutil.parser.isoparse('2021-01-09T00:15:00.000015')
# datetime.datetime(2021, 1, 9, 0, 15, 0, 15)
TIME_PREFIX = os.environ['TIME_PREFIX']
TIME_POSTFIX = os.environ['TIME_POSTFIX']
LOGLEVEL = logging.DEBUG
log = logging.getLogger(__name__)
log.setLevel(LOGLEVEL)
handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter('%(module)s.%(funcName)s: %(message)s')
handler.setFormatter(formatter)
log.addHandler(handler)
filesdone = [] # files that have already been converted
def getFileTime(filestring, prefix=TIME_PREFIX, postfix=TIME_POSTFIX):
y = filestring.replace(prefix, '')
z = y.replace(postfix, '')
return(dateutil.parser.isoparse(z))
def getFileUrls():
class MyHTMLParser(HTMLParser):
def _init_(self, url):
self.url = url
def handle_data(self, data):
if 'HYVM2' in data:
datetimestr = getFileTime(data)
filelist.append({'datetime': datetimestr, 'url': url, 'filepath': data})
dates = []
filelist = []
now = datetime.utcnow()
# TODO This only deals with a delay of 24 hours. To generalize we need to
# divide delta by 24 to figure how the maximum number of days.
datestr = (now - timedelta(hours=DELAY)).strftime('%Y/%m/%d')
log.debug("now-delay: " + datestr)
datenowstr = (now).strftime('%Y/%m/%d')
log.debug("now: " + datenowstr)
dates.append(datestr)
if (datestr != datenowstr):
dates.append(datenowstr)
for datestr in dates:
r = None
url = BASE_URL + '{}'.format(datestr)
log.debug("fetching: "+url)
try:
r = requests.get(url)
except (OSError):
print('OS error. Please check Internet connection.')
time.sleep(10)
except:
log.debug("Unexpected error to get url.")
time.sleep(10)
# If url is none, skip the following operations:
if r is None:
continue
elif r == 'Response [404]':
print("website not responding or file not posted")
else:
parser = MyHTMLParser()
parser.feed(str(r.content))
return filelist
def fetchAndConvert(files):
convertedfiles = []
toconvert = 0
now = datetime.utcnow()
maxdelay = timedelta(hours=DELAY)
# mindelay = maxdelay - timedelta(hours=SEGMENT)
mindelay = maxdelay - timedelta(minutes=SEGMENT)
for file in files:
filepath = file['filepath']
filetime = file['datetime']
filedelay = now - filetime
if filepath not in filesdone:
if (filedelay < maxdelay and filedelay > mindelay):
toconvert += 1
log.debug(f'files to convert: {toconvert}')
# If no file to convert, capture the next available file time:
if(toconvert == 0):
nextAvailable = None
for file in files:
filetime = file['datetime']
filedelay = now - filetime
if (filedelay < maxdelay):
nextAvailable = filetime
break
if(nextAvailable):
time_to_wait_for_next = filetime - (now - mindelay)
print('The next available filetime: ' + str(filetime) + '; Time delta: ' + str(time_to_wait_for_next))
else:
print('The next available file is cuurently unavailable.')
for file in files:
filetime = file['datetime']
url = file['url']
filepath = file['filepath']
filedelay = now - filetime
if filepath not in filesdone:
full_url = f'{url}/{filepath}'
if (filedelay < maxdelay and filedelay > mindelay):
# reading from url
hydro = read(full_url) # load file into obspy object
log.debug('read url')
file['duration'] = hydro[0].meta['endtime'] - hydro[0].meta['starttime']
# increasing amplitude
hydro[0].data = hydro[0].data * 1e4
sampling_rate = hydro[0].meta['sampling_rate']
# writing to wav file
wavfilename = (filepath[:-12]+'wav').replace(':', '-') # TODO Could be tmp filename
tsname = (filepath[:-12]+'ts').replace(':', '-')
hydro.write(wavfilename, framerate=sampling_rate, format='WAV')
log.debug('converted wav')
# TODO fix this -ar to actually use sampling_rate
if os.path.exists(tsname):
os.remove(tsname)
os.system('ffmpeg -i {filename} -f mpegts -ar 64000 -acodec aac -ac 1 {tsfilename}'.format(filename=wavfilename, tsfilename=tsname))
log.debug('made mpegts')
if os.path.exists(wavfilename):
os.remove(wavfilename)
file['samplerate'] = sampling_rate
file['tsfilename'] = tsname
filesdone.append(filepath)
convertedfiles.append(file)
toconvert -= 1
log.debug(f'files to convert: {toconvert}')
return(convertedfiles)
def queueFiles(files):
delay = timedelta(hours=DELAY)
now = datetime.utcnow()
played = 0
deleted = 0
for idx, entry in enumerate(files):
duration = timedelta(seconds=entry['duration'])
age = now - entry['datetime']
tsfilename = entry['tsfilename']
filepath = entry['filepath']
if (delay + duration < age): # in the past
log.debug('deleting old entry: ' + tsfilename)
if os.path.exists(tsfilename):
os.remove(tsfilename)
filesdone.remove(filepath)
del files[idx]
deleted += 1
if ((delay + duration >= age) and (age > delay)):
# should be playing next
log.debug('playing : ' + tsfilename)
if os.path.exists(tsfilename):
shutil.move(tsfilename, '/root/data/dummy.ts')
played += 1
filesdone.remove(filepath)
del files[idx]
deleted += 1
return(played, deleted, files)
def main_loop():
starttime = time.time()
convertedfiles = []
files = []
while True:
# TODO this converts correctly but after queue files it
# get overwritten by fetchandconver
# you need to change it fetchandconvert appends the exisitng list
# and all timedate stamps are only converted once at most.
for i in range(0, 5):
print("Call getFileUrls(). Attempt #" + str(i+1) + " out of 5.")
files = getFileUrls()
if len(files) > 0:
log.debug(f'number of URLS: {len(files)}')
for i in range(0, 3):
print("Call fetchAndConvert(files). Attempt "+str(i+1) + " out of 3")
try:
convertedfiles.extend(fetchAndConvert(files))
except (OSError):
print('OS error. Please check Internet connection.')
time.sleep(10)
except:
logging.exception("Unexpected error to convert files.")
time.sleep(10)
log.debug(f'number of converted files: {len(files)}')
played, deleted, convertedfiles = queueFiles(convertedfiles)
log.debug(f'played: {played}, deleted: {deleted}')
break
else:
print("Unable to get file urls.")
time.sleep(10)
print("The next call will start in 2.5 minutes.")
time.sleep(150.0 - ((time.time() - starttime) % 150.0))
main_loop()
# todo - try encoding first to aac .ts and then stream looping
# ffmpeg -i dummy.wav -f mpegts -acodec aac dummy.ts
# splitting into 10 second .ts files
# os.system('ffmpeg -i {filename} -f segment -segment_list "live.m3u8" -segment_time 10 -segment_format mpegts -ar 48000 -ac 2 -acodec aac "live/live%03d.ts"'.format(filename=wavfilename))
# To encode hls forever
#
# ffmpeg -re -stream_loop -1 -i list.txt -f segment -segment_list \
# "./tmp/live.m3u8" -segment_list_flags +live -segment_time 10 \
# -segment_format mpegts -ar 64000 -ac 2 -threads 3 -acodec aac \
# "./tmp/live%03d.ts"
# list.txt contents below
#
# ffconcat version 1.0
# file 'dummy.wav'
# file 'list.txt'
#
#