# read, format and classify data from original mseed files

Ong, Giani, Nielsen

**mount Google Drive as a disk to access files and data**



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
cd  "/content/drive/MyDrive/DISS_FOLDER/Veda_Data"

/content/drive/MyDrive/DISS_FOLDER/Veda_Data


In [3]:
choice = 'new2'

**load libraries for importing and formatting the input data:**
---



In [4]:
!pip install obspy
from obspy import read as readobs
from obspy import read_inventory
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import pickle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


**Import the "original" data from mseed files, format them:**

In [5]:
# Read all datafiles in folder and store them in "events":

events = []
for i in range(1,90):

    print(i)
    events.append(readobs("./Data/seeddata32/TEST"+str(i)+".mseed"))
    #events.append(read("./Data/seeddata32/TEST8.mseed"))

# make a list of indexes corresponding to the specific events:
eventslist = [ 0, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 
20, 23, 26, 30, 32, 35, 36, 37, 39, 41, 43, 46, 50, 51, 
52, 54,  57, 58, 59, 63, 65, 66, 69 , 81, 87]

# Extract the corresponding events and store then in "selectedevents":
selectedevents = [events[i] for i in eventslist]

# Form selectedevents, extract the 3 channels as data, and store them in "eventdata"
eventdata = []
for event in selectedevents:
    stra = event[0].data
    strb = event[1].data
    strc = event[2].data
    eventdata.append([stra, strb, strc])
    
#Concatenate channels for each event
eventdataconcat = []
for event in eventdata:
    sta = np.expand_dims(event[0][0:1440000], axis=-1)
    stb = np.expand_dims(event[1][0:1440000], axis=-1)
    stc = np.expand_dims(event[2][0:1440000], axis=-1)
    eventdataconcat.append(np.concatenate((sta,stb, stc),axis=1))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89


The structure of each event in "eventdataconcat" is: <br>
array([  <br>
[c1, c2, c3], # time 1 <br>
[c1, c2, c3], # time 2 <br>
..... <br>
[c1, c2, c3]  # time n <br>
]) <br>
where c1, c2, c3 are the data for the channels 1, 2, 3 resp. and n is the total number of time samples.

### Extract first and last 40k samples and class as noise and precusros, respectivley

In [6]:
def create_precursors(X1, last = 40000):
    precurstr = X1[-last:] 
    return(precurstr)
    
def create_noise(X1, first = 0, second = 40000):
    precurstr = X1[first:second]
       
    return(precurstr)

#Select the final 40000 time steps from each event (all 3 channels) and label as 'precursors' 
#Select the first 40000 time steps from each event (all 3 channels) and label as 'noise' 

precursors = []
noise = []
for event in eventdataconcat:
    precursors.append(create_precursors(event,40000))
    
for event in eventdataconcat:
    noise.append(create_noise(event, 0, 40000))

### Make windows of length "window_length" from the selected 'noise' and 'precursor' data 
The windows overlap of 650 timesteps
With this overlap, each precursor window of 40000 samples produces 37 "normpre"v windows of 16384 samples. 
Same for noise windows.

In [7]:
window_length=16384

def make_windows(X1, sample_stride = 650):
    X2 = []
    for i in range(len(X1)-window_length):        
        if i % sample_stride == 0:
               X2.append(X1[i:i+window_length])    
    return(X2)
   
precursor_windows = []
for event in precursors:
    precursor_windows.append(make_windows(event, 650))
    
noise_windows = []
for event in noise:
    noise_windows.append(make_windows(event, 650))

### Normalisation of signal
Three different options for normalisation fuction: 
- new 
- new2
- old

In previous work (Ong et al.) normalisation_old was used. 

In [8]:
def normalise_new(X1): # Here each 3 individual component of each window is mean stripped and normalised
    X2 = []
    for data in X1:
        values = np.zeros((len(data),3))
        mea=np.mean(data,axis=0).reshape(-1, 1)
        values = (data.T-mea).T
        values = values / np.linalg.norm(values,axis=0,ord=np.Inf)
        X2.append(values)
    return X2

def normalise_new2(X1): # not normalise, not strip mean
    X2 = []
    for data in X1:
        values = np.zeros((len(data),3))
        values = data
        X2.append(values)
    return X2

def normalise_old(X1): ## Here the 3 components are mean stripped and normalised together, results strange but working for the CNN:
    X2 = []
    for data in X1:
        values = np.zeros((len(data),3))
        values = data - np.mean(data)
        values = values / np.linalg.norm(values)
        X2.append(values)
    return X2

#choice = 'new2'

normpre = []
normnoise = []

if choice == 'new2':
  for event in precursor_windows:    normpre.append(normalise_new2(event))
  for event in noise_windows:    normnoise.append(normalise_new2(event))
elif choice == 'old':
  for event in precursor_windows:    normpre.append(normalise_old(event))
  for event in noise_windows:    normnoise.append(normalise_old(event))
elif choice == 'new':
  for event in precursor_windows:    normpre.append(normalise_new(event))
  for event in noise_windows:    normnoise.append(normalise_new(event))


### Select events for train and test datasets. These events were randomly selected.

In [9]:
trainlist = [7,8, 3, 4, 5, 6, 9, 10, 11, 16, 17, 20, 21, 22, 24, 25, 27, 28,  
             31,  33, 34, 35, 37, 38]
testlist = [12,  13, 14, 15, 29, 30, 39]    

## output data to files for further use by the CNN

In [10]:
pickle.dump( normpre, open( "./Data/normpre_"+choice+".p", "wb" ) ) # pickle file with all precursor windows
pickle.dump( normnoise, open( "./Data/normnoise_"+choice+".p", "wb" ) ) # pickle fiel with alll noise windows
pickle.dump( trainlist, open( "./Data/trainlist.p", "wb" ) ) # pickel file with the labels of events in train data
pickle.dump( testlist, open( "./Data/testlist.p", "wb" ) ) # pickle file with labels of events in test data
