In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import torch
import time
import copy
import sys
sys.path.append(r'../input/pytorch-image-models/pytorch-image-models-master/')
import timm
import torch
import torch.nn as nn
import random
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau
from sklearn.metrics import mean_absolute_error
from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
from tqdm import tqdm

In [2]:
# ====================================================
# CFG
# ====================================================
class CFG:
    
    # 路径
    OUTPUT_DIR = './'
    DATA_ROOT = '../input/1st-reproduce-mel-spectrogram/test'
    
    model_name='seresnet152d'#'resnext50_32x4d'
    target_size = 1
    seed = 22
    split_ratio = 0.33
    num_workers = 1
    epochs = 300
    EARLY_STOP = True
    early_stop = 10
    lr = 5e-1
    min_lr = 1e-5
    weight_decay = 1e-6
    batch_size = 32
    
    scheduler='ReduceLROnPlateau'#'CosineAnnealingWarmRestarts'
    target_col = 'time_to_eruption'
    # for warm start
    T_0 = 10
    # for normal cosine
    T_max = 10
    
    gradient_accumulation_steps=1
    max_grad_norm = 1000
    print_freq = 20
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [4]:
seed_everything(CFG.seed)

In [5]:
time_to_eruption_mean = pd.read_csv(r'../input/predict-volcanic-eruptions-ingv-oe/train.csv')['time_to_eruption'].mean()

In [6]:
class MyResNet(nn.Module):
    def __init__(self, target_size, model_name, pretrained=False):
        super(MyResNet, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.model.conv1 = nn.Conv2d(10, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        n_features = self.model.fc.in_features
        # 改成自己任务的图像类别数
        self.model.fc = nn.Linear(n_features, target_size)

    def forward(self, x):
        x = self.model(x)
        return x

In [7]:
test = pd.read_csv(r'../input/predict-volcanic-eruptions-ingv-oe/sample_submission.csv')

In [8]:
class INGVDataset(Dataset):
    def __init__(self, df, data_root, transforms=None, output_label=True):
        super().__init__()
        self.df = df
        self.data_root = data_root
        self.transforms = transforms
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            #target = self.df.iloc[index]['label']
            target = [self.df.loc[index, 'time_to_eruption'].astype('float32')]
          
        path = os.path.join(self.data_root, str(self.df.loc[index, 'segment_id'])+'-spec.npz')
        
        # (10, 256, 256)
        img = np.load(path)['arr_0'].astype('float32')
            
        # do label smoothing
        if self.output_label == True:
            return torch.tensor(img), torch.tensor(target)
        else:
            return torch.tensor(img)

In [9]:
check_point = torch.load(r'../input/1st-reproduce-seresnet-train/seresnet152d_best.pth')
model = MyResNet(CFG.target_size, CFG.model_name, False)
model.load_state_dict(check_point['model'])
model.to(CFG.device)

MyResNet(
  (model): ResNet(
    (conv1): Conv2d(10, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (

In [10]:
def inference(model, test):
    test_dataset = INGVDataset(test, CFG.DATA_ROOT, output_label=False)
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, num_workers=CFG.num_workers, shuffle=False)
    model.eval()
    preds = []
    for image in tqdm(test_loader):
        image = image.to(CFG.device)
        preds.append(model(image).detach().to('cpu').numpy()*time_to_eruption_mean)
    preds = np.concatenate(preds)
    
    return preds

In [11]:
preds = inference(model, test)

100%|██████████| 142/142 [01:40<00:00,  1.42it/s]


In [12]:
test['time_to_eruption'] = preds

In [13]:
test.to_csv('submission.csv', index=None)