# Extract features

Here, we want to extract features from the seismic signals

In [None]:
from tqdm.notebook import tqdm

from obspy import ObsPyException

import analysis
import catalog
import energy

tqdm.pandas()
catalog.display_parameters()

## Load the database

In [None]:
dataframe = catalog.load("data/new_dataframe_with_inventory_cleaned.pickle")

catalog.open_plot(dataframe, pos_number=0.04, xlim=15000)

## Extract features

After detecting the seismic signal with the detection method, features will be extracted from the spectrogram and the energy envelope of the signals.

In [None]:
## Loop over all events
for event_index in tqdm(dataframe["numero"], total=len(dataframe)):
        
    #if event_index == 0: ## Uncomment this line if you want to see a specific event

        print("-------------------------")
        print("Event number", event_index)
        print("-------------------------")
        
        try:
            ## Filter the stream
            _, trace = analysis.filter_stream(dataframe, event_index, 0, freq_HP=9, freq_LP=0.5, max_percentage=0.3)
        
            # The detection method
            time_start_detection, data_start_detection, trimmed_time, trimmed_data, time_raw, data_raw, upper_threshold, lower_threshold = analysis.detection_on_one_trace(trace, dataframe, event_index)

            ## Extract features from the spectrogram and save them in a dataframe for each events
            analysis.plot_spectre(trace, dataframe, trimmed_data, 0, event_index, conserv_result=True)

            ## Extract features from the energy envelope and save them in a dataframe for each events
            energy.compute(dataframe, trace, event_index)

        except (IndexError, ObsPyException, ValueError):
            print("An error occurred in event", event_index)

In [4]:
print("In the database, there are", (dataframe["type"] == "earthquake").value_counts()[1], "seismic signals generated by earthquakes and", (dataframe["type"] == "earthquake").value_counts()[0], "generated by other sources")

In the database, there are 4618 seismic signals generated by earthquakes and 8064 generated by other sources


  print("In the database, there are", (dataframe["type"] == "earthquake").value_counts()[1], "seismic signals generated by earthquakes and", (dataframe["type"] == "earthquake").value_counts()[0], "generated by other sources")
