# Query EarthScope MT Data Holdings

In [1]:
doQuery = True
doSummary = True
doCharts = True
filebase = 'mt_availability'
outfile = f'{filebase}.txt'


In [2]:
import pandas as pd
import time
import os

channels=['?FE', '?FN', '?FZ','?F1','?F2', '?QE', '?QN', '?QZ', '?Q1', '?Q2']
channels = ','.join(channels)



if doQuery:
    with open(outfile, 'w') as f:
        f.write("net.sta,chan,hours")
        
    sta_URL=f"http://service.iris.edu/fdsnws/station/1/query?cha={channels}&level=channel&format=text&includecomments=true&nodata=204"
    print(sta_URL)

    try:
        network_df=pd.read_csv(sta_URL, sep='|')
    except Exception as e:
        print(f"ERROR with station service {sta_URL}")
        print(f"ERROR: {e}")
        quit()

    network_df
    networks=network_df['#Network '].unique()

    # grouped = stations.groupby(by=['#Network ', ' Station '])
    # grouped = network_df.groupby(by='#Network ')

    for network in networks:
        print(network)
        netfile = f'{filebase}_{network}.txt'
        av_URL = f"http://service.iris.edu/fdsnws/availability/1/query?format=text&net={network}&cha={channels}&orderby=nslc_time_quality_samplerate&includerestricted=false&nodata=204" 

        print(av_URL)
        try:
            avail = pd.read_csv(av_URL, sep=" ")
        except Exception as e:
            print(f"ERROR with availability service {av_URL} ")
            print(f"ERROR: {e}")
            with open(outfile, 'a') as f:
                f.write(f"\n#ERROR with {network}")
            time.sleep(2)
            continue


        avail.columns = avail.columns.str.strip()
        avail['Latest'] = pd.to_datetime(avail['Latest'], format="%Y-%m-%dT%H:%M:%S.%f") 
        avail['Earliest'] = pd.to_datetime(avail['Earliest'],  format="%Y-%m-%dT%H:%M:%S.%f") 
        avail['Span'] = avail.Latest - avail.Earliest

        avail.to_csv(netfile, index=False)
        
        grouped_chan = avail.groupby(by=['Station','Channel'])
        for name, group in grouped_chan:
            station = name[0]
            channel = name[1]
            total_time = group['Span'].sum()
            with open(outfile, 'a') as f:
                f.write(f"\n{network}.{station},{channel},{'%.2f' % (total_time/pd.Timedelta(hours=1))}")
#                 f.write(f"\n{network}.{station},{channel},{total_time}")
                
        time.sleep(5)

http://service.iris.edu/fdsnws/station/1/query?cha=?FE,?FN,?FZ,?F1,?F2,?QE,?QN,?QZ,?Q1,?Q2&level=channel&format=text&includecomments=true&nodata=204
1H
http://service.iris.edu/fdsnws/availability/1/query?format=text&net=1H&cha=?FE,?FN,?FZ,?F1,?F2,?QE,?QN,?QZ,?Q1,?Q2&orderby=nslc_time_quality_samplerate&includerestricted=false&nodata=204
4P
http://service.iris.edu/fdsnws/availability/1/query?format=text&net=4P&cha=?FE,?FN,?FZ,?F1,?F2,?QE,?QN,?QZ,?Q1,?Q2&orderby=nslc_time_quality_samplerate&includerestricted=false&nodata=204
7I
http://service.iris.edu/fdsnws/availability/1/query?format=text&net=7I&cha=?FE,?FN,?FZ,?F1,?F2,?QE,?QN,?QZ,?Q1,?Q2&orderby=nslc_time_quality_samplerate&includerestricted=false&nodata=204
8J
http://service.iris.edu/fdsnws/availability/1/query?format=text&net=8J&cha=?FE,?FN,?FZ,?F1,?F2,?QE,?QN,?QZ,?Q1,?Q2&orderby=nslc_time_quality_samplerate&includerestricted=false&nodata=204
8P
http://service.iris.edu/fdsnws/availability/1/query?format=text&net=8P&cha=?FE,?FN,?FZ,?

In [3]:
if doSummary:
    
    summaryDF = pd.DataFrame(columns=['Network','Channels', '# Stations','Average Time', 'Total Time'])    
    sumDF = pd.read_csv(outfile)
    sumDF[['net','sta']] = sumDF['net.sta'].str.split('.',expand=True)

    zDF = sumDF[sumDF.chan.str.endswith('Z', na=False)]


    grouped_sum = zDF.groupby(by=['net'])
    
    for name, group in grouped_sum:
        nsta = group.nunique(axis=0)['sta']
        avgTime = group['hours'].mean()
        totalTime = group['hours'].sum()
        chans = sumDF[sumDF['net']==name]['chan'].unique()
        newRow = [name, chans, nsta, avgTime, totalTime]
        
        summaryDF.loc[len(summaryDF.index)] = newRow
        

           
    summaryDF['Average Time'] = summaryDF['Average Time'].round().apply(pd.to_timedelta, unit='H')
    summaryDF['Total Time'] = summaryDF['Total Time'].round().apply(pd.to_timedelta, unit='H')
    
    display(summaryDF)
    

  element = np.asarray(element)


Unnamed: 0,Network,Channels,# Stations,Average Time,Total Time
0,1H,"[LFE, LFN, LFZ, LQE, LQN]",117,19 days 09:00:00,2267 days 01:00:00
1,4P,"[LFE, LFN, LFZ, LQE, LQN]",978,22 days 21:00:00,22382 days 20:00:00
2,8J,"[LFE, LFN, LFZ, LQE, LQN]",25,79 days 04:00:00,1979 days 01:00:00
3,8P,"[LFE, LFN, LFZ, LQE, LQN]",119,25 days 03:00:00,2989 days 07:00:00
4,AV,"[LFE, LFN, LFZ]",1,157 days 11:00:00,157 days 11:00:00
5,EM,"[LFE, LFN, LFZ, LQE, LQN, VFE, VFN, VFZ, VQE, ...",1728,24 days 05:00:00,41841 days 02:00:00
6,II,"[LFE, LFN, LFZ, BF1, BF2, BFE, BFN, BFZ, LF1, ...",2,4288 days 12:00:00,12865 days 12:00:00
7,IU,"[LF1, LF2, LFZ, UFZ, VFZ, LFE, LFN]",12,2973 days 21:00:00,41634 days 12:00:00
8,N4,"[LF1, LF2, LFZ]",2,972 days 05:00:00,1944 days 10:00:00
9,NV,"[MF1, MF2, MFZ, MFE, MFN]",2,257 days 13:00:00,515 days 03:00:00


In [4]:
if doCharts:
    from bokeh.plotting import figure, output_file, show, save
    from bokeh.layouts import column, gridplot
    from bokeh.models import ColumnDataSource, HoverTool
    from bokeh.models import Range1d
    from bokeh.io import output_notebook
    
    import os
    import fnmatch
    
    
    # Get list of all networks that have the availability files
    files = fnmatch.filter(os.listdir('.'), f'{filebase}_*.txt')
    
    
    
    s = list()
    idx = 0
    for file in files:
        network = file.split('_')[2].split('.')[0]
        print(network)
        
        # Timeline with number of stations depicted by width of line? 
        # Or two lines that have shaded area between?
        thisAvail = pd.read_csv(file,parse_dates=['Earliest','Latest'])
        earliest = min(thisAvail['Earliest'])
        latest = max(thisAvail['Latest'])
        print(earliest, latest)

        
        # create 100 bins for that timeframe(?)
        nbins = 100
        datelist = pd.date_range(earliest, latest, periods=100).tolist()

        numDF = pd.DataFrame(columns=['Date','Lower','Upper'])    

        for date in datelist:
            nsta = len(thisAvail[(thisAvail['Earliest']<=date) & (thisAvail['Latest']>=date)].Station.unique())
            numDF.loc[len(numDF.index)] = [date, -nsta, nsta]
            
        
        output_notebook()
        source = ColumnDataSource(numDF)
        
        if idx > 0:
            tmp_s = figure(width=800, height=100,x_axis_type="datetime",x_range=s[0].x_range)
        else: 
            tmp_s = figure(width=800, height=100,x_axis_type="datetime")
        s.append(tmp_s)
        
        s[idx].varea(x='Date', y1='Upper', y2='Lower', source=source)
        s[idx].y_range = Range1d(-30, 30)
        s[idx].title = network
        s[idx].add_tools(HoverTool(
            tooltips=[
                ( 'count', '@Upper'),
                ( 'time', '@Date'),
            ]))
        idx+=1
    
    p = gridplot(s, ncols=1)
    show(p)
    output_file('mt_numbers.html')
    save(p)
    


        
        
    
        
        # Histogram of length of each station
        # Histogram of length of each segment? Do I have that info?



1H
2013-04-02 21:24:16+00:00 2018-09-30 18:12:19+00:00


7I
2018-03-07 00:05:05.939000+00:00 2018-03-11 23:59:55.939000+00:00


NV
2018-08-11 01:39:51.600000+00:00 2023-04-07 23:59:59.800000+00:00


EM
1996-09-27 04:45:00+00:00 2018-11-12 20:29:04+00:00


II
2008-05-16 18:35:41.069500+00:00 2023-04-08 00:02:00.999996+00:00


XC
2015-01-08 19:49:15+00:00 2015-02-24 14:10:13.875000+00:00


US
2019-08-02 00:00:00.000012+00:00 2023-04-07 23:59:59.999998+00:00


BK
2004-06-15 17:57:24.972266+00:00 2011-05-05 20:02:45.112597+00:00


YB
2018-04-24 18:44:01+00:00 2020-06-13 00:01:42.600000+00:00


4P
2006-08-03 19:39:27+00:00 2017-11-05 18:35:35+00:00


8J
2015-08-26 00:06:34+00:00 2015-12-11 22:56:42+00:00


AV
2022-10-24 17:35:06+00:00 2023-04-07 23:59:59+00:00


Z7
2017-10-21 17:59:05+00:00 2017-11-15 21:49:13.875000+00:00


8P
2019-05-03 22:29:40+00:00 2021-06-26 15:10:19+00:00


IU
1990-02-02 15:01:21.934000+00:00 2023-04-07 23:59:59.069539+00:00


SF
2008-09-29 15:53:01.363000+00:00 2008-10-04 03:01:11.633750+00:00


ZU
2020-06-23 21:56:01+00:00 2023-01-13 18:56:57+00:00


N4
2019-09-19 21:03:20.069644+00:00 2023-04-07 23:59:59+00:00


SN
2012-07-24 13:43:14.828000+00:00 2019-06-27 20:37:19.654000+00:00
