This is a simple baseline using Resnet-18. The source code on this notebook is based on @abhishek Abhishek Thakur's [AAAMLP](https://github.com/abhi1thakur/approachingalmost) textbook.

In [None]:
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split
from sklearn import metrics
from tqdm import tqdm

In [None]:
!pip install -q pretrainedmodels

In [None]:
import pretrainedmodels
import albumentations

In [None]:
train_labels = pd.read_csv('../input/seti-breakthrough-listen/train_labels.csv')

In [None]:
train_labels.head()

In [None]:
import torch

class ClassificationDataset:
    
    def __init__(self, image_paths, targets, resize=None, augmentations=None): 
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations

    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, item):      
        image = np.load(self.image_paths[item]).astype(float)

        targets = self.targets[item]
        
        if self.resize is not None:
            image = np.transpose(image, (1,2,0))
            image = cv2.resize(image, dsize=self.resize, interpolation=cv2.INTER_CUBIC)        
        
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
            
        # pytorch expects CHW instead of HWC
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        
        return {
            "image": torch.tensor(image, dtype=torch.float),
            "targets": torch.tensor(targets, dtype=torch.long),
        }

In [None]:
train_labels['img_path'] = train_labels['id'].apply(lambda x: f'../input/seti-breakthrough-listen/train/{x[0]}/{x}.npy')

In [None]:
train_labels.head()

In [None]:
train_dataloader = ClassificationDataset(image_paths=train_labels['img_path'], targets=train_labels['target'], resize=(256, 256))

In [None]:
def train(data_loader, model, optimizer, device):
    
    model.train()
    
    for data in tqdm(data_loader, position=0, leave=True, desc='Training'):
        inputs = data["image"]
        targets = data['targets']
        
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.float)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))
        loss.backward()
        optimizer.step()
        
def evaluate(data_loader, model, device):
    model.eval()
    
    final_targets = []
    final_outputs = []
    
    with torch.no_grad():
        
        for data in tqdm(data_loader, position=0, leave=True, desc='Evaluating'):
            inputs = data["image"]
            targets = data["targets"]
            inputs = inputs.to(device, dtype=torch.float)
            targets = targets.to(device, dtype=torch.float)
            
            output = model(inputs)
            
            targets = targets.detach().cpu().numpy().tolist()
            output = output.detach().cpu().numpy().tolist()
            
            final_targets.extend(targets)
            final_outputs.extend(output)
            
    return final_outputs, final_targets

In [None]:
import torch.nn as nn
import pretrainedmodels

def get_model(pretrained):
    if pretrained:
        model = pretrainedmodels.__dict__["resnet18"](pretrained='imagenet')
    else:
        model = pretrainedmodels.__dict__["resnet18"](pretrained=None)
        
    model.last_linear = nn.Sequential(
        nn.BatchNorm1d(512),
        nn.Dropout(p=0.25),
        nn.Linear(in_features=512, out_features=1024),
        nn.ReLU(),
        nn.BatchNorm1d(1024, eps=1e-05, momentum=0.1),
        nn.Dropout(p=0.5),
        nn.Linear(in_features=1024, out_features=1)
    )
    
    return model

## Train

In [None]:
device = "cuda"

epochs = 5

images = train_labels.img_path.values

targets = train_labels.target.values

model = get_model(pretrained=False)
model.conv1 = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3,bias=False)

model.to(device)

# mean = (0.485, 0.456, 0.406)
# std = (0.229, 0.224, 0.225)

# aug = albumentations.Compose([albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)])
aug = None

train_images, valid_images, train_targets, valid_targets = train_test_split(images, targets, stratify=targets, random_state=42)

train_dataset = ClassificationDataset(image_paths=train_images,
                                     targets=train_targets,
                                     resize=(224, 224),
                                     augmentations=aug)

train_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=16,
                                          shuffle=True,
                                          num_workers=4)

valid_dataset = ClassificationDataset(image_paths=valid_images,
                                     targets=valid_targets,
                                     resize=(224, 224),
                                     augmentations=aug)

valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                          batch_size=16,
                                          shuffle=False,
                                          num_workers=4)

optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)

for epoch in range(epochs):
    train(train_loader, model, optimizer, device=device)
    predictions, valid_targets = evaluate(valid_loader, model, device=device)
    roc_auc = metrics.roc_auc_score(valid_targets, predictions)
    print(f"Epoch={epoch}, Valid ROC AUC={roc_auc}")

In [None]:
torch.save(model.state_dict(),f'resnet18_{epochs}.pt')

## Inference

In [None]:
submission = pd.read_csv('../input/seti-breakthrough-listen/sample_submission.csv')
submission['img_path'] = submission['id'].apply(lambda x: f'../input/seti-breakthrough-listen/test/{x[0]}/{x}.npy')

In [None]:
submission

In [None]:
test_images = submission.img_path.values

dummy_targets = submission.target.values

test_dataset = ClassificationDataset(image_paths=test_images,
                                     targets=dummy_targets,
                                     resize=(224, 224),
                                     augmentations=aug)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=16,
                                          shuffle=False,
                                          num_workers=4)

In [None]:
predictions, valid_targets = evaluate(test_loader, model, device=device)

In [None]:
# normalize
predictions = np.array(predictions)

predictions = (predictions - predictions.min()) / (predictions.max() - predictions.min())

In [None]:
submission.target = predictions

In [None]:
submission.drop(['img_path'], axis=1, inplace=True)

In [None]:
submission.to_csv('submission.csv', index=False)