In [2]:
import mpl_toolkits
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import numpy as np
import pickle
import os
import logging
from obspy import read
import time


from obspy.clients.fdsn.mass_downloader import RectangularDomain, Restrictions, MassDownloader
from scipy import signal
from obspy import UTCDateTime
from obspy.clients.fdsn import Client as FDSN_Client
from obspy import read_inventory
import asyncio
from itertools import islice
from itertools import tee

In [12]:
#events_df = pd.read_pickle('data/events_processed.pkl')
#events_df = pd.read_pickle('data/events_deep_processed.pkl')
events_df = pd.read_pickle('data/events_shallow_processed.pkl')
stations_df = pd.read_pickle('data/stations_processed.pkl')

selected_stations = ['BFZ', 'BKZ', 'DCZ', 'DSZ', 'HIZ', 'JCZ', 'KHZ', 'KUZ', 'LBZ',
                        'MSZ', 'MWZ', 'MXZ', 'NNZ', 'ODZ', 'OPRZ', 'OUZ', 'PUZ', 'PXZ', 'QRZ', 'RPZ',
                         'SYZ', 'THZ', 'TOZ', 'URZ', 'VRZ', 'WHZ', 'WIZ', 'WKZ', 'WVZ']

#events_full = events_df[(events_df.time > '2016-01-01') & (events_df.time < '2017-01-01')]
events_full = events_df
print(events_full.shape)
folder = "active" # normal/active
single = False # should download only closest?
station_to_get = None#selected_stations[0]
all_station_string = ",".join([station.station_code for j, station in stations_df.iterrows()])
existing_event_files = os.listdir(f'datasets/{folder}/waveforms/smi_nz.org.geonet/')
#e_df['event_id'] = events_df['event_id'].apply(lambda x: x.split("/")[1])
events = events_full

(13097, 7)


In [4]:
events = events_full#[0:3000]
stations_df.shape

(28, 4)

In [5]:
mdl = MassDownloader(providers=['GEONET'])

[2022-06-13 01:11:25,284] - obspy.clients.fdsn.mass_downloader - INFO: Initializing FDSN client(s) for GEONET.
[2022-06-13 01:11:25,938] - obspy.clients.fdsn.mass_downloader - INFO: Successfully initialized 1 client(s): GEONET.


In [6]:
def mass_data_downloader(start, stop, event_id, Station,
                         Network='NZ', 
                         Channel='HHZ', 
                         Location=10,
                         folder='normal'
                         ):
    '''
    This function uses the FDSN mass data downloader to automatically download
    data from the XH network deployed on the RIS from Nov 2014 - Nov 2016.
    More information on the Obspy mass downloader available at:
    https://docs.obspy.org/packages/autogen/obspy.clients.fdsn.mass_downloader.html
    Inputs:
    start: "YYYYMMDD"
    stop:  "YYYYMMDD"
    Network: 2-character FDSN network code
    Station: 2-character station code
    Channel: 3-character channel code
    Location: 10.
    '''
    #print("=" * 65)
    #print("Initiating mass download request.")
    #print('downloading: ',event_id)

    domain = RectangularDomain(
        minlatitude=-47.749,
        maxlatitude=-33.779,
        minlongitude=166.104,
        maxlongitude=178.990
    )

    restrictions = Restrictions(
        starttime = start,
        endtime = stop,
        chunklength_in_sec = None,
        network = Network,
        station = Station,
        location = Location,
        channel = Channel,
        reject_channels_with_gaps = False,
        minimum_length = 0.0,
        minimum_interstation_distance_in_m = 100.0
    )

    #mdl = MassDownloader(providers=['GEONET'])
    ev_str = str(event_id).replace(":", "_")
    try:
        mdl.download(
            domain,
            restrictions,
            mseed_storage=f"datasets/{folder}/waveforms/{ev_str}",
            stationxml_storage=f"datasets/{folder}/stations",
        )
    except:
        pass
    #print('done: ',event_id)

logger = logging.getLogger("obspy.clients.fdsn.mass_downloader")
logger.setLevel(logging.WARNING)

In [6]:
#async def final_download():
#    for i, event in events.iterrows():
#        event_id = event.event_id
#        event_time = event['time']  
#        start=event_time - T_event
#        end=event_time + H_event
#        
#        print("=" * 65)
#        print("Initiating mass download request.")
#        print(event_id)
#        #asyncio.run(main())
#        #tasks = [download_st(start, end, event_id, station) for j, station in stations_df.iterrows()]
#        
#        #tasks = [asyncio.to_thread(mass_data_downloader, start, end, event_id, station.station_code) for j, station in stations_df.iterrows()]
#        #print(len(tasks))
#        #await asyncio.gather(*tasks)
#        stations = ",".join([station.station_code for j, station in stations_df.iterrows()])
#        mass_data_downloader(start, end, event_id, stations)
#

In [13]:
async def final_download_threaded(events):
    time1 = time.perf_counter()
    tasks = []
    #print("Initiating mass download request.")
    for i, event in events.iterrows():
        event_id = event.event_id
        if event_id.split("/")[1] in existing_event_files:
            continue
            
        event_time = event['time']
        start=event_time - T_event - H_event
        end=event_time - H_event
        if single: 
            stations = event['closest_station']
        else:
            stations = all_station_string
        
        tasks.append(asyncio.to_thread(mass_data_downloader, start, end, event_id, stations, folder=folder))
        
    await asyncio.gather(*tasks)
    time2 = time.perf_counter()
    print(f"{threads_at_once} threads took {time2-time1:0.1f} seconds")
    
#VARIABLES FOR DOWNLOAD

T_event = 30
if folder == "normal": H_event = 1000
else: H_event = 10 #10
threads_at_once = 100
print(folder, H_event)

events_filtered = events
if station_to_get is not None and single:
    events_filtered[events_filtered['closest_station'] == station_to_get]
events_filtered = events_filtered[0:] #30000 $23000

for event_sublist in [events_filtered[x:x+threads_at_once] for x in range(0, len(events_filtered), threads_at_once)]:
    await final_download_threaded(event_sublist)
print("Done!")

active 10
100 threads took 0.0 seconds




100 threads took 33.3 seconds




100 threads took 50.3 seconds




100 threads took 37.1 seconds




100 threads took 49.2 seconds




100 threads took 52.4 seconds




100 threads took 56.1 seconds




100 threads took 51.7 seconds




100 threads took 54.3 seconds




100 threads took 44.9 seconds




100 threads took 42.4 seconds




100 threads took 42.9 seconds




100 threads took 48.0 seconds




100 threads took 49.7 seconds




100 threads took 35.9 seconds




100 threads took 41.4 seconds




100 threads took 46.3 seconds
100 threads took 43.7 seconds




100 threads took 41.9 seconds




100 threads took 44.2 seconds




100 threads took 44.6 seconds




100 threads took 39.4 seconds




100 threads took 36.5 seconds




100 threads took 37.8 seconds




100 threads took 36.3 seconds




100 threads took 27.1 seconds


Will be discarded.


100 threads took 36.7 seconds




100 threads took 39.3 seconds




100 threads took 28.9 seconds




100 threads took 35.3 seconds




100 threads took 35.8 seconds




100 threads took 38.1 seconds




100 threads took 38.5 seconds




100 threads took 30.4 seconds
100 threads took 37.4 seconds




100 threads took 34.0 seconds




100 threads took 35.5 seconds




100 threads took 29.7 seconds




100 threads took 35.2 seconds




100 threads took 35.9 seconds




100 threads took 37.5 seconds




100 threads took 30.1 seconds




100 threads took 35.7 seconds




100 threads took 36.2 seconds




100 threads took 37.4 seconds




100 threads took 38.1 seconds




100 threads took 37.0 seconds




100 threads took 32.4 seconds




100 threads took 33.9 seconds




100 threads took 38.8 seconds




100 threads took 42.3 seconds




100 threads took 41.4 seconds




100 threads took 33.4 seconds




100 threads took 38.3 seconds




100 threads took 40.7 seconds




100 threads took 40.8 seconds




100 threads took 41.1 seconds




100 threads took 37.4 seconds




100 threads took 30.6 seconds




100 threads took 29.8 seconds




100 threads took 43.7 seconds




100 threads took 42.5 seconds




100 threads took 31.8 seconds




100 threads took 38.5 seconds




100 threads took 37.4 seconds




100 threads took 45.1 seconds




100 threads took 36.7 seconds




100 threads took 40.3 seconds




100 threads took 42.6 seconds




100 threads took 45.2 seconds




100 threads took 42.5 seconds




100 threads took 45.2 seconds




100 threads took 49.2 seconds




100 threads took 46.2 seconds




100 threads took 42.0 seconds




100 threads took 36.1 seconds




100 threads took 44.7 seconds




100 threads took 45.9 seconds




100 threads took 49.9 seconds




100 threads took 41.9 seconds




100 threads took 36.2 seconds




100 threads took 40.8 seconds




100 threads took 43.3 seconds




100 threads took 35.0 seconds




100 threads took 38.8 seconds




100 threads took 30.7 seconds




100 threads took 48.3 seconds




100 threads took 37.7 seconds




100 threads took 45.2 seconds




100 threads took 40.3 seconds




100 threads took 39.4 seconds




100 threads took 28.5 seconds




100 threads took 34.6 seconds




100 threads took 33.1 seconds




100 threads took 41.4 seconds




100 threads took 38.6 seconds




100 threads took 34.9 seconds




100 threads took 43.0 seconds




100 threads took 42.9 seconds




100 threads took 38.5 seconds




100 threads took 48.3 seconds




100 threads took 38.1 seconds




100 threads took 43.8 seconds




100 threads took 34.3 seconds




100 threads took 43.7 seconds




100 threads took 42.9 seconds




100 threads took 32.3 seconds




100 threads took 38.7 seconds




100 threads took 27.3 seconds




100 threads took 31.6 seconds




100 threads took 39.0 seconds




100 threads took 27.2 seconds




100 threads took 39.4 seconds




100 threads took 40.6 seconds




100 threads took 25.2 seconds




100 threads took 36.6 seconds




100 threads took 29.4 seconds




100 threads took 37.2 seconds




100 threads took 41.7 seconds




100 threads took 38.3 seconds




100 threads took 38.5 seconds




100 threads took 36.0 seconds




100 threads took 36.3 seconds




100 threads took 34.8 seconds




100 threads took 36.3 seconds




100 threads took 27.9 seconds




100 threads took 35.4 seconds




100 threads took 41.0 seconds




100 threads took 35.1 seconds




100 threads took 36.5 seconds




100 threads took 32.8 seconds
Done!


In [None]:
for i in pd.interval_range(start=100, end=501, freq=30, closed='right').to_tuples():
    print(i)

In [38]:
def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)
min([abs(x-y) for x, y in pairwise(events['time'])])

210.958631