## <center>G2Net competiton</center>

#### <center>Data transformation and storage</center>

Data is provided in time series stored in numpy files. Readings have been simulated by 3 gravitational wave interferometers (LIGO Hanford, LIGO Livingston, and Virgo). 

In this notebook i try to explore 3 different data transformations for time series data - 
1. Recurrence plot
2. Gramian Angualar field plots - summation 
3. Gramian Angualar field plots - difference

More complex pre-trained models can be used on these transformations.

References - 
* https://www.kaggle.com/ihelon/g2net-eda-and-modeling/output

Read more at -
[Imaging time series](https://pyts.readthedocs.io/en/stable/modules/image.html)

#### If you find something useful or gain some insights. Please upvote.



In [None]:
pip install pyts

### Dependencies

In [None]:
import os
import json
import random
import collections

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
sns.set_theme(style="darkgrid")

from scipy.spatial.distance import pdist, squareform #scipy spatial distance
import sklearn as sk
import sklearn.metrics.pairwise
from skimage.transform import resize

from pyts.image import RecurrencePlot, GramianAngularField, MarkovTransitionField

In [None]:
class Config:
    data_dir = '../input/g2net-gravitational-wave-detection/{}/{}/{}/{}/{}.npy'
    train_file = '../input/g2net-gravitational-wave-detection/training_labels.csv'
    store_dir = '{}-plots/{}/{}/{}/{}/{}.npy'
    test_file = ''
    submission_file = ''
    signal_names = ("LIGO Hanford", "LIGO Livingston", "Virgo")
    colors = ("black", "red", "green")
    

### Utilites

In [None]:
def convert_image_id_2_path(image_id: str, is_train: bool = True) -> str:
    folder = "train" if is_train else "test"
    return Config.data_dir.format(
        folder, image_id[0], image_id[1], image_id[2], image_id 
    )

In [None]:
def convert_image_id_2_store_path(image_id: str, is_train: bool = True, plot_type = 'rec') -> str:
    folder = "train" if is_train else "test"
    store_path = Config.store_dir.format(
        plot_type, folder, image_id[0], image_id[1], image_id[2], image_id 
    )
    os.makedirs(os.path.dirname(store_path), exist_ok=True)
    return store_path

In [None]:
def get_random_sample_for_both_targets():
    i0 = random.choice(train_df.index[train_df['target']==0].tolist())
    i1 = random.choice(train_df.index[train_df['target']==1].tolist())

    id0 = train_df.iloc[i0]["id"]
    id1 = train_df.iloc[i1]["id"]
    
    return id0, id1

In [None]:
train_df = pd.read_csv(Config.train_file)
train_df

### Helper functions for visualisation EDA for all different transformations

In [None]:
def visualize_line_plot(
    id0,
    id1,
):
    path0 = convert_image_id_2_path(id0)
    data0 = np.load(path0)
    
    path1 = convert_image_id_2_path(id1)
    data1 = np.load(path1)
    
    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(20, 12))
    
    for i, ax in enumerate(axes):        
        x0 = range(len(data0[i]))
        y0 = data0[i]
        
        x1 = range(len(data1[i]))
        y1 = data1[i]
        if i==0:
            ax[0].set_title(id0 + ' Target = 0', fontsize=20)
            ax[1].set_title(id1 + ' Target = 1', fontsize=20)
            
        ax[0].plot(x0, y0, color=Config.colors[i])
        ax[0].set_xlabel(Config.signal_names[i], fontsize=14)
        
        ax[1].plot(x1, y1, color=Config.colors[i])
        ax[1].set_xlabel(Config.signal_names[i], fontsize=14)
        
    # set the spacing between subplots
    plt.subplots_adjust(left=0.1,
                    bottom=0.1, 
                    right=0.9, 
                    top=0.9, 
                    wspace=0.2, 
                    hspace=0.5)

In [None]:
def visualize_recurrence_plot(
    id0,
    target,
):
    path = convert_image_id_2_path(id0)
    data = np.load(path)
    
    rp = RecurrencePlot(threshold='distance', percentage=20)
    X_rp = rp.fit_transform(data)
    
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 7))
    
    for i, ax in enumerate(axes):        
        ax.imshow(X_rp[i], cmap='binary', origin='lower')
        ax.set_xlabel(Config.signal_names[i], fontsize=16)
            
    plt.suptitle(f"id: {id0} target: {target}", fontsize=16)
    plt.show()


In [None]:
def visualize_gramian_angular_fields_plot(
    id0,
    target,
    method = 'summation'
):
    path = convert_image_id_2_path(id0)
    data = np.load(path)
    
    gaf = GramianAngularField(image_size=24, method=method)
    X_gaf = gaf.fit_transform(data)
    
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 7))
    
    for i, ax in enumerate(axes):
        
        ax.imshow(X_gaf[i], cmap='rainbow', origin='lower')
        ax.set_xlabel(Config.signal_names[i], fontsize=16)
            
    plt.suptitle(f"id: {id0} target: {target}", fontsize=16)
    plt.show()

In [None]:
def visualize_mft_plot(
    id0,
    target,
):
    path = convert_image_id_2_path(id0)
    data = np.load(path)
    
    mft = MarkovTransitionField(image_size = 0.3, n_bins=3)
    X_mft = mft.fit_transform(data)
    
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 7))
    
    for i, ax in enumerate(axes):
        ax.imshow(X_mft[i], cmap='inferno', origin='lower')
        ax.set_xlabel(Config.signal_names[i], fontsize=16)
            
    plt.suptitle(f"id: {id0} target: {target}", fontsize=16)
    plt.show()

#### Different type of plots demonstrated - 
1. Line plot
2. Recurrence plot
3. Gramian Angualar field plots - summation and difference type
4. Markov Transition Field

Below i have demonstrated the above plots for a sine wave

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 12))
x = np.linspace(-4*np.pi, 4*np.pi, 1000)
data = x, np.sin(x)
rp = RecurrencePlot(threshold='point', percentage=20)
X_rp = rp.fit_transform(data)
gaf = GramianAngularField(image_size=24)
X_gaf = gaf.fit_transform(data)
mft = MarkovTransitionField(image_size = 0.3, n_bins=3)
X_mft = mft.fit_transform(data)

axes = axes.flatten()

axes[0].plot(x, np.sin(x))
axes[0].set_xlabel('Line Plot', fontsize=14)
axes[1].imshow(X_rp[1], cmap='rainbow', origin='lower')
axes[1].set_xlabel('Recurssion Plot', fontsize=14)
axes[2].imshow(X_gaf[1], cmap='rainbow', origin='lower')
axes[2].set_xlabel('Gramian Angular fields Plot', fontsize=14)
axes[3].imshow(X_mft[1], cmap='inferno', origin='lower')
axes[3].set_xlabel('Markov Transition Field Plot', fontsize=14)
plt.suptitle("Sine Wave example", fontsize=16)
plt.show()

#### Line plots comparison for a random sample of both the targets

In [None]:
id0, id1 = get_random_sample_for_both_targets()
visualize_line_plot(id0, id1)

#### Recurrence plots comparison for a random sample of both the targets

In [None]:
id0, id1 = get_random_sample_for_both_targets()
visualize_recurrence_plot(id0, 0)
visualize_recurrence_plot(id1, 1)

#### Gramian Angular Fields - summation plots comparison for a random sample of both the targets

In [None]:
id0, id1 = get_random_sample_for_both_targets()
visualize_gramian_angular_fields_plot(id0, 0)
visualize_gramian_angular_fields_plot(id1, 1)

#### Gramian Angular Fields - difference plots comparison for a random sample of both the targets

In [None]:
id0, id1 = get_random_sample_for_both_targets()
visualize_gramian_angular_fields_plot(id0, 0, method='difference')
visualize_gramian_angular_fields_plot(id1, 1, method='difference')

#### Markov Transition field plots comparison for a random sample of both the targets

In [None]:
id0, id1 = get_random_sample_for_both_targets()
visualize_mft_plot(id0, 0)
visualize_mft_plot(id1, 1)

### Helper functions for transforming time-series to plots, storing and loading them

In [None]:
def store_plot(reading_id, is_train = True, plot_type = 'rec', transformer = RecurrencePlot(threshold='point', percentage=20)):
    path = convert_image_id_2_path(reading_id)
    data = np.load(path)
    X = transformer.fit_transform(data)
    if plot_type == 'rec':
        X = X.reshape(4096, 4096, 3)
        X = cv2.resize(X, dsize=(256, 256), interpolation=cv2.INTER_CUBIC)
        X = X.reshape(3, 256, 256)
    store_path = convert_image_id_2_store_path(reading_id, is_train, plot_type)
    np.save(store_path, X)

In [None]:
def load_plot(reading_id, is_train = True, plot_type = 'rec'):
    store_path = convert_image_id_2_store_path(reading_id, is_train, plot_type)
    X_loaded = np.load(store_path)
    return X_loaded

In [None]:
def visualize_stored_plot(reading_id, is_train = True, plot_type='rec', cmap='binary'):
    X = load_plot(reading_id, is_train, plot_type)
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 7))
    
    for i, ax in enumerate(axes):        
        ax.imshow(X[i], cmap=cmap, origin='lower')
        ax.set_xlabel(Config.signal_names[i], fontsize=16)
            
    plt.suptitle(f"id: {reading_id}", fontsize=16)
    plt.show()

In [None]:
rp = RecurrencePlot(threshold='distance', percentage=20)
id_list = random.sample(train_df.index.tolist(), 2)
for i in id_list:
    reading_id = train_df.iloc[i]["id"]
    store_plot(reading_id, rp)
for i in id_list:
    reading_id = train_df.iloc[i]["id"]
    visualize_stored_plot(reading_id)

In [None]:
gaf = GramianAngularField(image_size=24, method='summation')
id_list = random.sample(train_df.index.tolist(), 2)
for i in id_list:
    reading_id = train_df.iloc[i]["id"]
    store_plot(reading_id, transformer = gaf, plot_type = 'gaf')
    
for i in id_list:
    reading_id = train_df.iloc[i]["id"]
    visualize_stored_plot(reading_id, plot_type = 'gaf', cmap = 'rainbow')

## <center>Work in progress </center>
#### <center>Baseline models coming up</center>