In [None]:
from IPython.display import clear_output
!pip install efficientnet_pytorch
!pip install timm
!python -m pip install gwpy
clear_output()

In [None]:
import os
import sys 
import json
from glob import glob
import random
import collections
import time
import re
import warnings
from IPython.display import clear_output

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from tqdm import tqdm_notebook as tqdm

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional
import torch.nn.functional as F
from torchvision import transforms

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

from efficientnet_pytorch import EfficientNet
import timm

from gwpy.timeseries import TimeSeries
from gwpy.plot import Plot
from scipy import signal
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt
import joblib
from tqdm import tqdm
import shutil
import librosa

def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(1)
warnings.filterwarnings("ignore") 
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [None]:
Q_RANGE = (16,32)
F_RANGE = (30,400)
SIZE = 128

def read_file(fname):
    data = np.load(fname)
    d1 = TimeSeries(data[0,:], sample_rate=2048)
    d2 = TimeSeries(data[1,:], sample_rate=2048)
    d3 = TimeSeries(data[2,:], sample_rate=2048)
        
    return d1, d2, d3

def plot_time_data(d1, d2, d3):
    plot = Plot(d1, d2, d3, separate=True, sharex=True, figsize=[12, 8])
    ax = plot.gca()
    ax.set_xlim(0,2)
    ax.set_xlabel('Time [s]')
    plot.show()
    
def preprocess(d1, d2, d3, bandpass=False, lf=35, hf=350):
    white_d1 = d1.whiten(window=("tukey",0.2))
    white_d2 = d2.whiten(window=("tukey",0.2))
    white_d3 = d3.whiten(window=("tukey",0.2))
    
    if bandpass: # bandpass filter
        bp_d1 = white_d1.bandpass(lf, hf) 
        bp_d2 = white_d2.bandpass(lf, hf)
        bp_d3 = white_d3.bandpass(lf, hf)
        return bp_d1, bp_d2, bp_d3
    else: # only whiten
        return white_d1, white_d2, white_d3    
    return d1, d2, d3        
    
def create_rgb(fname):
    r1, r2, r3 = read_file(fname)
    p1, p2, p3 = preprocess(r1, r2, r3)
    hq1 = p1.q_transform(qrange=Q_RANGE, frange=F_RANGE, logf=True, whiten=False)
    hq2 = p2.q_transform(qrange=Q_RANGE, frange=F_RANGE, logf=True, whiten=False)
    hq3 = p3.q_transform(qrange=Q_RANGE, frange=F_RANGE, logf=True, whiten=False)

    img = np.zeros([hq1.shape[0], hq1.shape[1], 3])
    
    img[:,:,0] = hq1
    img[:,:,1] = hq2
    img[:,:,2] = hq3
    
    img = img - np.min(img)
    img = img / (np.max(img) - np.min(img))
    img = (img * 255).astype(np.uint8)
    
    return cv2.resize(img, (SIZE, SIZE), interpolation = cv2.INTER_LANCZOS4)

def id2path(img_id, is_test):
    a, b, c = img_id[0], img_id[1], img_id[2]
    if is_test: 
        return f'../input/g2net-gravitational-wave-detection/test/{a}/{b}/{c}/{img_id}.npy'
    else: 
        return f'../input/g2net-gravitational-wave-detection/train/{a}/{b}/{c}/{img_id}.npy'
    
def save_train_img(_id):
    fname = id2path(_id, False)
    im = create_rgb(fname)
    cv2.imwrite(f'./train_images/{_id}.png', im)    
    
df = pd.read_csv('../input/train-files/train.csv').sample(n=500, random_state=1)

if not os.path.exists('train_images'):
    os.makedirs('train_images', exist_ok=True)    
    _ = joblib.Parallel(n_jobs=-1)(joblib.delayed(save_train_img)(_id) for _id in tqdm(df['id']))    

In [None]:
data_directory = '../input/g2net-gravitational-wave-detection'
SIZE = (32,32)
batch_size = 128
in_channels = 3
lr = 0.00002

In [None]:
def get_transforms():
    return transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomOrder([                               
        transforms.RandomAffine(degrees=15),
        transforms.RandomAutocontrast(p=0.05),
        transforms.RandomEqualize(p=0.05),
        transforms.RandomHorizontalFlip(p=0.05),
        transforms.RandomInvert(p=0.05),     
        transforms.RandomSolarize(threshold=15, p=0.05),
        ]),
        transforms.ToTensor()
    ])

def get_valid_transforms():
    return transforms.Compose([
        transforms.ToPILImage(),                               
        transforms.ToTensor()
    ])     

In [None]:
def onehot(size, target):
    vec = torch.zeros(size, dtype=torch.float64)
    vec[target] = 1.
    return vec  

class Dataset(torch_data.Dataset):
    def __init__(self, id, targets=None, split="train_images", augment=False):
        self.targets = targets
        self.id = id
        self.split = split
        self.augment = augment
          
    def __len__(self):
        return len(self.id)
    
    def __getitem__(self, index):
        id = self.id[index]
        if self.targets is None:
            data = cv2.imread(f"{self.split}/{id}.png")
            data = cv2.resize(data, SIZE)
            data = get_valid_transforms()(data)
        else:
            data = cv2.imread(f"{self.split}/{id}.png")
            data = cv2.resize(data, SIZE)
            if self.augment:
              data = get_transforms()(data)
            else:
              data = get_valid_transforms()(data)
            
        if self.targets is None:
            return {"X": data, "id": id}
        else:
            y = torch.tensor(abs(self.targets[index]- 0.01), dtype=torch.float)
            return {"X": data, "y": y}

def Model():

    net = timm.create_model('resnetv2_50x3_bitm_in21k', pretrained=True, num_classes=1)
    net.drop_rate = 0.2
    return net    

class Trainer:
    def __init__(
        self, 
        model, 
        device, 
        optimizer, 
        criterion,
        scheduler
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.criterion = criterion
        self.scheduler = scheduler

        self.best_valid_auc = 0
        self.n_patience = 0
        self.lastmodel = None
        
    def fit(self, epochs, train_loader, valid_loader, save_path, patience):        
        for n_epoch in range(1, epochs + 1):
            self.info_message("EPOCH: {}, \nLR: {}", n_epoch, self.optimizer.param_groups[0]['lr'])
            
            train_loss, train_time = self.train_epoch(train_loader)

            valid_loss, valid_auc, valid_time = self.valid_epoch(valid_loader)
            
            self.scheduler.step(valid_auc)
            
            self.info_message(
                "[Epoch Train: {}] loss: {:.4f}, time: {:.2f} s            ",
                n_epoch, train_loss, train_time
            )
            
            self.info_message(
                "[Epoch Valid: {}] loss: {:.4f}, auc: {:.4f}, time: {:.2f} s",
                n_epoch, valid_loss, valid_auc, valid_time
            )

            if self.best_valid_auc < valid_auc: 
                self.save_model(n_epoch, save_path, valid_loss, valid_auc)
                self.info_message(
                     "auc improved from {:.4f} to {:.4f}. Saved model to '{}'", 
                    self.best_valid_auc, valid_auc, self.lastmodel
                )
                self.best_valid_auc = valid_auc
                self.n_patience = 0
            else:
                self.n_patience += 1
            
            if self.n_patience >= patience:
                self.info_message("\nValid auc didn't improve last {} epochs.", patience)
                break
            
    def train_epoch(self, train_loader):
        self.model.train()
        t = time.time()
        sum_loss = 0

        for step, batch in enumerate(tqdm(train_loader),1):
            X = batch["X"].to(self.device)
            targets = batch["y"].to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(X).squeeze(1)
            
            loss = self.criterion(outputs, targets)
            loss.backward()

            sum_loss += loss.detach().item()

            self.optimizer.step()

        return sum_loss/len(train_loader), int(time.time() - t)
    
    def valid_epoch(self, valid_loader):
        self.model.eval()
        t = time.time()
        sum_loss = 0
        y_all = []
        outputs_all = []
        auc = RocAucMeter()

        for step, batch in enumerate(tqdm(valid_loader),1):
            with torch.no_grad():
                X = batch["X"].to(self.device)
                targets = batch["y"].to(self.device)
                outputs = self.model(X).squeeze(1)

                loss = self.criterion(outputs, targets)
                sum_loss += loss.detach().item()
                
                y_all.extend(batch["y"].tolist())
                outputs_all.extend(torch.sigmoid(outputs).tolist())
        
        y_all = [1 if x > 0.5 else 0 for x in y_all]
        auc = roc_auc_score(y_all, outputs_all)

        return sum_loss/len(valid_loader), auc, int(time.time() - t)
    
    def save_model(self, n_epoch, save_path, loss, auc):
        self.lastmodel = f"{save_path}-e{n_epoch}-loss{loss:.3f}-auc{auc:.3f}.pth"
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_auc,
                "n_epoch": n_epoch,
            },
            self.lastmodel,
        )
    
    @staticmethod
    def info_message(message, *args, end="\n"):
        print(message.format(*args), end=end)

In [None]:
class RocAucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1])
        self.y_pred = np.array([0.5,0.5])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().argmax(axis=1).clip(min=0, max=1).astype(int)
        y_pred = nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,1]
        self.y_true = np.hstack((self.y_true, y_true))
        self.y_pred = np.hstack((self.y_pred, y_pred))
        self.score = roc_auc_score(self.y_true, self.y_pred)

    @property
    def avg(self):
        return self.score

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_type(df_train, df_valid):
    train_data_retriever = Dataset(
        df_train["id"].values, 
        df_train["target"].values, 
        augment=True
    )

    valid_data_retriever = Dataset(
        df_valid["id"].values, 
        df_valid["target"].values,
    )

    train_loader = torch_data.DataLoader(
        train_data_retriever,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,pin_memory = False
    )

    valid_loader = torch_data.DataLoader(
        valid_data_retriever, 
        batch_size=batch_size,
        shuffle=False,
        num_workers=2,pin_memory = False
    )

    model = Model()
    model.to(device)

    # checkpoint = torch.load("model.pth")
    # model.load_state_dict(checkpoint["model_state_dict"])

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
    optimizer = torch.optim.Adam(optimizer_grouped_parameters, lr=lr)
    criterion = torch_functional.binary_cross_entropy_with_logits
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, 
                                                           threshold=0.0001, threshold_mode='abs', cooldown=0, 
                                                           min_lr=0, eps=1e-08, verbose=False)

    trainer = Trainer(
        model, 
        device, 
        optimizer, 
        criterion,
        scheduler
    )

    history = trainer.fit( 
        5,  
        train_loader, 
        valid_loader, 
        'model',  
        2,
    )
    
    return trainer.lastmodel


In [None]:
train_type(df.iloc[0:450], df.iloc[450::])