# fastai training with resnet34
fastai is a great tool to create a strong baseline quickly. I'm learning about signal processing so there may be big errors in my approach :) 

In [None]:
import pandas as pd
import numpy as np
from fastai.vision.all import *
import pickle
import os
import torch
import librosa

In [None]:
def sample_to_mel(_id, is_test):
    x = np.load(id2path(_id, is_test))
    spectrogram = []
    for i in range(3):
        mel = librosa.feature.melspectrogram(x[i]/x[i].max(), 
                                                sr=2048,
                                                n_mels=16,
                                               hop_length=16)
        mel = mel[:,:256]
        mel = librosa.power_to_db(mel).astype(np.float32)
        mel = mel.reshape(64,64)
        spectrogram.append(mel)
    spectrogram = np.stack(spectrogram)
    return spectrogram

In [None]:
train = pd.read_csv('../input/g2net-gravitational-wave-detection/training_labels.csv')
test_df = pd.read_csv('../input/g2net-gravitational-wave-detection/sample_submission.csv')

In [None]:
def id2path(id, is_test):
    a, b, c = id[0], id[1], id[2]
    if is_test: return f'../input/g2net-gravitational-wave-detection/test/{a}/{b}/{c}/{id}.npy'
    return f'../input/g2net-gravitational-wave-detection/train/{a}/{b}/{c}/{id}.npy'

In [None]:
train = train.sample(frac=0.3).reset_index(drop=True)

In [None]:
class NumpyDataset(torch.utils.data.Dataset):
    def __init__(self, df, is_test=False):
        self.df,self.is_test = df,is_test
        
    def __getitem__(self, i):
        image_id = self.df['id'].loc[i]
        img = sample_to_mel(image_id, self.is_test)
        if self.is_test:
            tgt = 0 if i < 10 else 1
            return (torch.tensor(img, dtype=torch.float), torch.tensor(tgt, dtype=torch.long))
        else:
            tgt = self.df['target'].loc[i]
            return (torch.tensor(img, dtype=torch.float), torch.tensor(tgt, dtype=torch.long))
    
    def __len__(self): return len(self.df)

In [None]:
cut = int(0.8 * len(train))
train_df = train[:cut].reset_index(drop=True)
valid_df = train[cut:].reset_index(drop=True)
len(train_df), len(valid_df)

In [None]:
train_ds = NumpyDataset(train_df, is_test=False)
valid_ds = NumpyDataset(valid_df, is_test=False)
test_ds = NumpyDataset(test_df, is_test=True)

In [None]:
dls = DataLoaders.from_dsets(train_ds, valid_ds, bs=16)
dls.c = 1

In [None]:
learn = cnn_learner(dls, resnet34, loss_func=BCEWithLogitsLossFlat(), metrics=RocAucBinary())

In [None]:
learn.fit_one_cycle(1, 3e-4)

In [None]:
learn.save('model')

In [None]:
learn.recorder.plot_loss()

# Inference

In [None]:
test_dl = DataLoader(test_ds, bs=16, shuffle=False, drop_last=False)
preds, _ = learn.get_preds(dl=test_dl)

In [None]:
test_df.target = np.array(preds)
test_df.to_csv('submission.csv', index=False)
test_df.head()