In [1]:
"""
This notebook directly sonifies time series data and contains lots 
of modified content from Ben Holtzman's sonification notebooks:
https://github.com/benholtzman/datamovies_y21

It also generates an animation to accompany the audio so you
can see the data in time and frequency space while listening. 
"""

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# # # # # # # # # # # # # # # # # # # # Import and trim data # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

import obspy
from obspy import read, Stream
from obspy.core.utcdatetime import UTCDateTime
import numpy as np
import pickle

# to generate/save/load data or not? 
FILENAME = "rr10"
GENERATE_DATA = False
SAVE_DATA = GENERATE_DATA
LOAD_DATA = not SAVE_DATA

# helper function to wrangle mseed files
def trim_Daily_Waveforms(project_Name: str, start_Time, end_Time, channels:
                         list, write_File=False):
    '''loads project data into an Obspy Stream object. By default this will grab the entire day.
    If start_Time and end_Time are specified the Stream object will be trimmed to span that period.
    start_Time and end_Time are UTCDateTime objects

    Example: for all stations in one stream with distance hack, for picking
    start_Time = UTCDateTime("2018-03-13T01:33:00.0Z")
    end_Time =   UTCDateTime("2018-03-13T01:34:00.0Z")
    project_Name = "Rattlesnake Ridge"
    channels = ['DP1', 'EHN']
    trim_Daily_Waveforms(project_Name, start_Time, end_Time, channels, write_File=False)
    
    start_Time = UTCDateTime("2018-04-20T18:10:00.0Z")
    end_Time =   UTCDateTime("2018-04-20T18:14:01.0Z")
    project_Name = "Rattlesnake Ridge"
    channels = ['DP1', 'EHN']
    trim_Daily_Waveforms(project_Name, start_Time, end_Time, 
    channels, write_File=True)
    
    start_Time = UTCDateTime("2018-04-20T11:10:00.0Z")
    end_Time =   UTCDateTime("2018-04-20T11:14:01.0Z")
    project_Name = "Rattlesnake Ridge"
    channels = ['DP1', 'EHN']
    trim_Daily_Waveforms(project_Name, start_Time, end_Time, 
    channels, write_File=True)
    
    Example:
    start_Time = UTCDateTime("2018-03-13T01:33:00.0Z")
    end_Time =   UTCDateTime("2018-03-13T01:33:30.0Z")
    project_Name = "Rattlesnake Ridge"
    channels = ['DP1', 'DP2', 'DPZ', 'EHN', 'EHE', 'EHZ']
    trim_Daily_Waveforms(project_Name, start_Time, end_Time, 
    channels, write_File=True)
    
    '''
    project_Aliases = {"Rattlesnake Ridge": "RR"}

    if project_Name == "Rattlesnake Ridge":
        # build filepath list based on dates, station type, and channels
        node = ['DP1', 'DP2', 'DPZ']  # nodal seismometer channels
        ugap = ['EHN', 'EHE', 'EHZ']
        # order matters for distance hack
        stations_channels = {'1': node, '2': node, '3': node, '5': node,
                             '4': node, '6': node, '7': node, '8': node,
                             '13': node, '9': node, '10': node, '12': node,
                             '15': node, 'UGAP3': ugap, '16': node, '17': node,
                             '18': node, '20': node, '21': node, '22': node,
                             '23': node, '25': node, '26': node, '27': node,
                             'UGAP5': ugap, 'UGAP6': ugap, '28': node,
                             '30': node, '31': node, '32': node, '33': node,
                             '34': node, '35': node, '36': node, '37': node,
                             '38': node, '39': node, '40': node, '41': node,
                             '42': node}

        # to view stations in distance along scarp for picking in Snuffler
        station_distance_hack = {station : index for index, station in
                                 enumerate(stations_channels)}

        filepaths = []
        for station in stations_channels:
            for channel in stations_channels[station]:
                if channel in channels: # is this a channel we specified?
                    # LOCAL LOCATION
                    filepath = f"/Users/tyler/Desktop/MSEED/5A.{station}.." \
                               f"{channel}.{start_Time.year}-" \
                               f"{start_Time.month:02}-{start_Time.day:02}T00.00.00.ms"
                    filepaths.append(filepath)

        obspyStream = Stream()
        for filepath_idx in range(len(filepaths)):
            obspyStream += read(filepaths[filepath_idx]).merge(method=1,
                                                               fill_value=0)
            # station-distance hack for picking, assign number to network
            hack = station_distance_hack[obspyStream[
                filepath_idx].stats.station]
            obspyStream[filepath_idx].stats.network = f'{hack:02}'

    # make sure all traces have the same sampling rate (and number of
    # samples and length) to avoid bugs
    interpolate = False
    for index, trace in enumerate(obspyStream):
        # the sampling rate of the first trace is assumed to be correct
        if trace.stats.sampling_rate != obspyStream[0].stats.sampling_rate:
            print(f"Trace {index} has a different sampling rate. ")
            print(f"Station {trace.stats.station}, Channel "
                  f"{trace.stats.channel}, Start: "
                  f"{trace.stats.starttime}, End: {trace.stats.endtime}")
            # raise the flag
            interpolate = True
    if interpolate:
        print("Interpolating...")
        # interpolate to correct sampling rate and trim to correct time period
        sampling_Rate = obspyStream[0].stats.sampling_rate
        obspyStream = obspyStream.trim(start_Time - sampling_Rate, end_Time
                                       + sampling_Rate)
        npts = int((end_Time-start_Time) * sampling_Rate)
        obspyStream.interpolate(sampling_Rate, method="lanczos",
                                starttime=start_Time, npts=npts, a=30)
        
    else:
        # trim to specified time period
        obspyStream = obspyStream.trim(start_Time, end_Time)

    if write_File:
        # format filename and save Stream as miniseed file
        start_Time_Stamp = str(obspyStream[0].stats.starttime)[
                           11:19].replace(":", ".") # use
        # [:19] for date and time
        end_Time_Stamp = str(obspyStream[0].stats.endtime)[11:19].replace(
            ":", ".")
        # writes to specified path
        obspyStream.write(f"/Users/tyler/Desktop/Programs/sonification/"
                          f"{project_Aliases[project_Name]}_{start_Time_Stamp}_"
                          f"{end_Time_Stamp}.ms", format="MSEED")

    return obspyStream

# get data from files if necessary
if GENERATE_DATA:
    start_Time = UTCDateTime("2018-03-13T22:00:00.0Z")
    end_Time =   UTCDateTime("2018-03-13T23:00:00.0Z")
    project_Name = "Rattlesnake Ridge"
    channels = ['DP1', 'EHN']
    stream = trim_Daily_Waveforms(project_Name, start_Time, end_Time, channels, write_File=False)

    print("\nFinished loading data.")
    print(f"Data length: {len(stream[0].data)}")
    
    if SAVE_DATA:
        # save data to pickle file
        with open(FILENAME+'.pkl', 'wb') as f:
            pickle.dump(stream, f)
        print("Finished pickling.")

# load data if specified
elif LOAD_DATA:
    # load data from pickle file
    with open(FILENAME+'.pkl', 'rb') as f:
        stream = pickle.load(f)

    print("\nFinished loading data.")
    print(f"Data stats: \n{stream[0].stats}\n")
    print(f"Data duration: {(stream[0].stats.endtime - stream[0].stats.starttime)} seconds")
    print(f"Data duration: {(stream[0].stats.endtime - stream[0].stats.starttime) / 60} minutes")
    print(f"Data duration: {(stream[0].stats.endtime - stream[0].stats.starttime) / 60 / 60} hours")
    
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # pickle file log # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# FILENAME  |  DURATION  |       START TIME - - - - - - - - END TIME 
# rr0.1.pkl | 6 seconds  | 2018-03-13T06:00:00.0Z - 2018-03-13T06:00:06.0Z
# rr1.pkl   | 1 minute   | 2018-03-13T06:00:00.0Z - 2018-03-13T06:01:00.0Z
# rr10.pkl  | 10 minutes | 2018-03-13T06:00:00.0Z - 2018-03-13T06:10:00.0Z
# rr.pkl    | 6 hours    | 2018-03-13T06:00:00.0Z - 2018-03-13T12:00:00.0Z
# rr60.pkl  | 1 hour     | 2018-03-13T22:00:00.0Z - 2018-03-13T23:00:00.0Z


Finished loading data.
Data stats: 
         network: 00
         station: 1
        location: 
         channel: DP1
       starttime: 2018-03-13T22:00:00.000000Z
         endtime: 2018-03-13T22:59:59.996000Z
   sampling_rate: 250.0
           delta: 0.004
            npts: 900000
           calib: 1.0
         _format: MSEED
           mseed: AttribDict({'dataquality': 'D', 'number_of_records': 8993, 'encoding': 'STEIM2', 'byteorder': '>', 'record_length': 4096, 'filesize': 36835328})
      processing: ['ObsPy 1.2.2: trim(endtime=UTCDateTime(2018, 3, 13, 23, 4, 10)::fill_value=None::nearest_sample=True::pad=False::starttime=UTCDateTime(2018, 3, 13, 21, 55, 50))', "ObsPy 1.2.2: interpolate(a=30::args=()::method='lanczos'::npts=900000::sampling_rate=250.0::starttime=UTCDateTime(2018, 3, 13, 22, 0)::time_shift=0.0)"]

Data duration: 3599.996 seconds
Data duration: 59.99993333333334 minutes
Data duration: 0.999998888888889 hours
