## What is this?
Attempt ot create a baseline model with data from [here](https://www.kaggle.com/thedrcat/g2net-train-images-with-gpwy-sample/)
and [here](https://www.kaggle.com/thedrcat/g2net-test-imgs-with-gwpy/) 

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
!pip install spacy==3.1.1

In [None]:
#!yes Y|conda install -c fastai fastai=2.4.1
!pip install fastai==2.4.1

In [None]:
from pandas.core.frame import DataFrame
from fastai.vision.all import *

## Get the files

In [None]:
path = Path("../input")

## getfiles(path: Path, ext)

In [None]:
def getfiles(path: Path, ext):
    "Get numpy files in `path` recursively, only in `folders`, if specified."
    return L(path.glob(f'**/*.{ext}'))

In [None]:
%%time
files = getfiles(path/'g2net-train-images-with-gpwy-sample', 'png')

In [None]:
len(files)

In [None]:
files[-1]

In [None]:
files[-1].name.replace(".png", "")

## Get labels

In [None]:
df = pd.read_csv(path/'g2net-gravitational-wave-detection/training_labels.csv')

In [None]:
df.head(1)

In [None]:
filtered_df = df.loc[df.id. isin([f.name.replace(".png", "") for f in files])]

In [None]:
filtered_df.head(1)

## Creating the dataloader

In [None]:
filtered_df.shape

In [None]:
sample = filtered_df[:20000]

In [None]:
train_path = path/'g2net-train-images-with-gpwy-sample/kaggle/tmp/train'

ds = DataBlock(blocks=(ImageBlock, CategoryBlock),
                get_x=ColReader('id', pref=train_path, suff='.png'),
                splitter=TrainTestSplitter(),
                get_y=ColReader('target'),
                item_tfms=Resize(460),
                batch_tfms=aug_transforms(size=224, flip_vert=True,
                                          max_lighting=0.1, max_zoom=1.05, 
                                          max_warp=0.))

In [None]:
ds.summary(filtered_df)

In [None]:
dls = ds.dataloaders(filtered_df, bs=256)

In [None]:
dls.show_batch()

## How does a simple model like resnet would perform?

## The Learner

In [None]:
learn = cnn_learner(dls, resnet34,
                    loss_func=CrossEntropyLossFlat(), metrics=[RocAucBinary(axis=0)]).to_fp16()

## Train

In [None]:
def show_me_lrs(learn):
    suggestions = namedtuple('Suggestions', ["min", "steep",
                                            "valley", "slide"])
    lr_min, lr_steep,lr_valley, lr_slide = learn.lr_find(suggest_funcs=(minimum, steep,valley, slide))
    suggested_lrs = suggestions(lr_min, lr_steep, lr_valley, lr_slide)
    
    print(f"Minimum/10:\t{lr_min:.2e}\
          \nSteepest point:\t{lr_steep:.2e}\
          \nLongest valley:\t{lr_valley:.2e}\
          \nSlide interval:\t{lr_slide:.2e}")
    
    return suggested_lrs

In [None]:
learn.fit_one_cycle(2, 3e-3)

In [None]:
suggested_lrs = show_me_lrs(learn)

In [None]:
learn.unfreeze()
learn.fit_one_cycle(5, lr_max=slice(suggested_lrs.slide/10, suggested_lrs.slide))

In [None]:
learn.recorder.plot_loss()

## Save trained model

In [None]:
learn.export("./final")