In [1]:
import h5py, os
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle, random
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [2]:
from bokeh.plotting import figure, show, output_notebook
output_notebook()

In [3]:
### seed_everythin
seed = 1987
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [3]:
waveforms_path = "./STEAD_DATA/waveforms_11_13_19.hdf5"
metadata_path = "./STEAD_DATA/metadata_11_13_19.csv"

In [4]:
metadata_df = pd.read_csv(metadata_path)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [5]:
metadata_df.shape

(1137793, 35)

In [6]:
metadata_df.columns

Index(['network_code', 'receiver_code', 'receiver_type', 'receiver_latitude',
       'receiver_longitude', 'receiver_elevation_m', 'p_arrival_sample',
       'p_status', 'p_weight', 'p_travel_sec', 's_arrival_sample', 's_status',
       's_weight', 'source_id', 'source_origin_time',
       'source_origin_uncertainty_sec', 'source_latitude', 'source_longitude',
       'source_error_sec', 'source_gap_deg',
       'source_horizontal_uncertainty_km', 'source_depth_km',
       'source_depth_uncertainty_km', 'source_magnitude',
       'source_magnitude_type', 'source_magnitude_author',
       'source_mechanism_strike_dip_rake', 'source_distance_deg',
       'source_distance_km', 'back_azimuth_deg', 'snr_db', 'coda_end_sample',
       'trace_start_time', 'trace_category', 'trace_name'],
      dtype='object')

In [7]:
metadata_df.head()

Unnamed: 0,network_code,receiver_code,receiver_type,receiver_latitude,receiver_longitude,receiver_elevation_m,p_arrival_sample,p_status,p_weight,p_travel_sec,...,source_magnitude_author,source_mechanism_strike_dip_rake,source_distance_deg,source_distance_km,back_azimuth_deg,snr_db,coda_end_sample,trace_start_time,trace_category,trace_name
0,TA,109C,BH,32.8889,-117.1051,150.0,700.0,manual,0.5,17.08,...,,,0.92,102.09,159.3,[56.79999924 55.40000153 47.40000153],2896.0,2006-07-23 15:59:00.960000,earthquake_local,109C.TA_20060723155859_EV
1,TA,109C,BH,32.8889,-117.1051,150.0,600.0,manual,0.5,16.879999,...,,,0.91,101.34,281.7,[65. 65.5 61.40000153],5508.0,2006-11-03 15:56:53.610000,earthquake_local,109C.TA_20061103155652_EV
2,TA,109C,BH,32.8889,-117.1051,150.0,500.0,manual,0.5,17.26,...,,,0.92,101.87,280.5,[37.20000076 42. 38.59999847],3114.0,2006-11-03 16:12:24.700000,earthquake_local,109C.TA_20061103161223_EV
3,TA,109C,BH,32.8889,-117.1051,150.0,900.0,manual,0.5,17.280001,...,,,0.93,103.26,281.6,[54.09999847 54.90000153 45.5 ],3152.0,2006-11-14 13:32:22.540000,earthquake_local,109C.TA_20061114133221_EV
4,TA,109C,BH,32.8889,-117.1051,150.0,700.0,manual,0.5,18.139999,...,,,0.92,102.48,4.7,[58.20000076 56.20000076 53.79999924],3134.0,2006-11-27 10:46:41.060000,earthquake_local,109C.TA_20061127104640_EV


In [8]:
data = metadata_df['source_magnitude'].dropna().values
hist, edges = np.histogram(data, density=True, bins=50)

In [9]:
p = figure(title='Histogram', 
           plot_height = 300, plot_width = 500, toolbar_location = 'right')
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
       fill_color="navy", line_color="white", alpha=0.5)

p.y_range.start = 0
p.legend.location = "center_right"
p.legend.background_fill_color = "#fefefe"
p.xaxis.axis_label = 'x'
p.yaxis.axis_label = 'Pr(x)'
p.grid.grid_line_color="white"

show(p)

You are attempting to set `plot.legend.location` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.

You are attempting to set `plot.legend.background_fill_color` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.



### Read Waveforms

In [10]:
### Get the desired filtered data
df = metadata_df[(metadata_df.trace_category == 'earthquake_local') & 
                 (metadata_df.source_distance_km <= 100) &
                 (metadata_df.source_magnitude >=2.0)
                ]

# making a list of trace names for the selected data
ev_list = df['trace_name'].to_list()

In [11]:
print(f'Number of unique seismogram: {len(set(ev_list))}')

Number of unique seismogram: 182330


In [12]:
# retrieving selected waveforms from the hdf5 file: 
dtfl = h5py.File(waveforms_path, 'r')
half_samples = 25
data = []
lables = []
for evi in tqdm(ev_list):
    dataset = dtfl.get('earthquake/local/'+str(evi))
    p_arrival = int(dataset.attrs['p_arrival_sample'])
    s_arrival = int(dataset.attrs['s_arrival_sample'])
    s_arrival_time = np.abs(s_arrival - p_arrival)/100 ## 100 HZ
    start = p_wave_arrival - half_samples
    end = p_wave_arrival + half_samples
    
    # waveforms, 3 channels: first row: E channle, second row: N channel, third row: Z channel
    data.append(np.array(dataset)[start:end, :])
    lables.append(s_arrival_time)

In [36]:
len(data), len(lables)

(182330, 182330)

### Save data as pickle

In [46]:
filename = 'selected_data.pkl'
outfile = open(filename,'wb')
pickle.dump({'data': data, 'labels': lables}, outfile)
outfile.close()