<a href="https://colab.research.google.com/github/shufan6011/GW-Event-Detection/blob/main/Step_2_Basic_GW_Data_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Update:
# Time-series data & spectrograms data are critical to the subsequent steps

# Data Preprocessing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests, os
from scipy.signal import butter, filtfilt, spectrogram
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')


In [None]:
# Go to https://gwosc.org
# Find info required below (GPS time & detector)


In [None]:
# Set GPS time:
t_start = 1126259462.4
t_end = 1126259462.4 # For specific events, make t_end the same as t_start

# Choose detector (H1, L1, or V1)
detector = 'H1'


In [None]:
%config InlineBackend.figure_format = 'retina'

try:
    from gwpy.timeseries import TimeSeries
except:
    ! pip install -q "gwpy==3.0.8"
    ! pip install -q "matplotlib==3.9.0"
    ! pip install -q "astropy==6.1.0"
    from gwpy.timeseries import TimeSeries


In [None]:
from gwosc.locate import get_urls
url = get_urls(detector, t_start, t_end)[-1]

# If an event is chosen, then its info will be shown in url
print('Downloading: ' , url)
fn = os.path.basename(url)
with open(fn,'wb') as strainfile:
    straindata = requests.get(url)
    strainfile.write(straindata.content)


In [None]:
# Read strain data
strain = TimeSeries.read(fn,format='hdf5.gwosc')

# Examine an interval closely
# center = int(t_start)
# strain = strain.crop(center-0.2, center+0.1)

# Extract time and strain vals
timestamps = strain.times.value
strain_values = strain.value

# Store data in pd df
data = pd.DataFrame({
    'time': timestamps,
    'strain': strain_values
})


## Handling Missing Values

In [None]:
# Drop rows with missing vals
data = data.dropna()

print("\nMissing vals after cleaning:")
print(data.isnull().sum())


## Data Noise Filtering

In [None]:
# Band-pass filter function
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data)
    return y

# Filter params
lowcut = 20  # Low cutoff frequency (Hz)
highcut = 500  # High cutoff frequency (Hz)

# Band-pass filter strain data
data['strain'] = bandpass_filter(data['strain'], lowcut, highcut, 4096)


## Data Normalization

In [None]:
# Normalize strain data
scaler = StandardScaler()
data['strain'] = scaler.fit_transform(data[['strain']])


## Data Inspection

In [None]:
# Inspect first few rows
print("First few rows of data:")
print(data.head())

# Inspect col headers
print("\nCol headers:")
print(data.columns)

# Summary stats
print("\nSummary stats:")
print(data.describe())

# Check for missing vals
print("\nMissing vals in each col:")
print(data.isnull().sum())

# Check sampling frequency
print(f"Sampling frequency: {strain.sample_rate} Hz")
fs = 4096 # Change this if sampling frequency is diff


# Time Series Plot

In [None]:
def plot_strain_data(time, strain, label, ylabel, title, color='blue', figsize=(12, 6)):
    plt.figure(figsize=figsize)
    plt.plot(time, strain, label=label, color=color)
    plt.xlabel('Time (s)')
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.show()

# Plot strain data
plot_strain_data(data['time'], data['strain'],
                 label='Strain', ylabel='Strain',
                 title='Gravitational Wave Strain Data')


# Spectrogram

In [None]:
def compute_and_plot_spectrogram(data, fs, title):
    frequencies, times, Sxx = spectrogram(data, fs)
    plt.figure(figsize=(12, 6))
    plt.pcolormesh(times, frequencies, 10 * np.log10(Sxx), shading='gouraud')
    plt.colorbar(label='Intensity (dB)')
    plt.ylabel('Frequency (Hz)')
    plt.xlabel('Time (s)')
    plt.title(title)
    plt.show()

# Plot spectrogram for strain data
compute_and_plot_spectrogram(data['strain'], fs, 'Spectrogram of Strain Data')


# Histogram

In [None]:
def plot_histogram(data, bins, xlabel, title, label, color='blue', figsize=(12, 6)):
    try:
        plt.figure(figsize=figsize)
        plt.hist(data, bins=bins, alpha=0.7, color=color, label=label)
        plt.xlabel(xlabel)
        plt.ylabel('Frequency')
        plt.title(title)
        plt.legend()
        plt.show()
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

# Plot histogram of strain data
plot_histogram(data['strain'], bins=50, xlabel='Strain',
               title='Histogram of Strain Data', label='Strain')
