In [1]:
import gc
import os
import time
import numpy as np
import pandas as pd
from glob import glob
import tqdm
import argparse
import tqdm
from collections import defaultdict, Counter
from PIL import Image
import cv2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch import nn, cuda
from torch.autograd import Variable 
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import CenterCrop

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

# from efficientnet_pytorch import EfficientNet
import torchvision.models as models

In [2]:
import os
import math
import random
import numpy as np
import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


In [3]:
from sklearn.metrics import f1_score

def mae(y_true, y_pred) :
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    over_threshold = y_true >= 0.1
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    remove_NAs = y_true >= 0
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

### **File info**
**ex. subset_010462_01**
> **orbit 010462**

> **subset 01**

> **ortbit 별로 subset 개수는 다를 수 있고 연속적이지 않을 수도 있음**


In [4]:
tr_df = pd.read_csv("../D_WEATHER//input/train_df.csv")
te_df = pd.read_csv("../D_WEATHER/input/test_df.csv")
tr_df.head()

Unnamed: 0,path,orbit,orbit_subset
0,../D_WEATHER//input/train/subset_010462_01.npy,10462,1
1,../D_WEATHER//input/train/subset_010462_02.npy,10462,2
2,../D_WEATHER//input/train/subset_010462_03.npy,10462,3
3,../D_WEATHER//input/train/subset_010462_04.npy,10462,4
4,../D_WEATHER//input/train/subset_010462_05.npy,10462,5


In [5]:
ids = tr_df['orbit'].value_counts()
unseen = list(ids[ids<4].index)

train_df = tr_df[~tr_df['orbit'].isin(unseen)]
valid_df = tr_df[tr_df['orbit'].isin(unseen)]

# train_df = tr_df[:int(len(tr_df)*0.8)]
# valid_df = tr_df[int(len(tr_df)*0.8):]

train_df.shape, valid_df.shape

((73113, 3), (3232, 3))

In [6]:
class Weather_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
        self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
            image = data[:,:,:9] # use 14 channels except target
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
            label = data[:,:,-1].reshape(40,40,1)
            label = np.transpose(label, (2,0,1))
            self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
        label = self.label_list[idx]
        
        return image, label

In [7]:
# def worker_init(worker_id):
#     np.random.seed(SEED)

def build_dataloader(df, batch_size, shuffle=False):
    dataset = Weather_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
    )
    return dataloader

def build_te_dataloader(df, batch_size, shuffle=False):
    dataset = Test_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
                            )
    return dataloader

# Build Model

In [8]:
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels # 
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512, bilinear)
        self.up2 = Up(512, 256, bilinear)
        self.up3 = Up(256, 128, bilinear)
        self.up4 = Up(128, 64 * factor, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

In [9]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels // 2, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)


    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        return self.conv(x)

In [10]:
lr = 0.001
batch_size = 512

In [11]:
train_loader = build_dataloader(train_df, batch_size, shuffle=True)
valid_loader = build_dataloader(valid_df, batch_size, shuffle=False)

In [12]:
SEED = 42
seed_everything(SEED)

device = 'cuda:0'
use_gpu = cuda.is_available()
if use_gpu:
    print("enable gpu use")
else:
    print("enable cpu for debugging")

model = UNet(n_channels=9, n_classes=1, bilinear=False) # if bilinear = True -> non deterministic : not recommended
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr, weight_decay=0.00025)
# optimizer = AdamW(model.parameters(), 2.5e-4, weight_decay=0.000025)
#optimizer = optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=0.025)

###### SCHEDULER #######
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

#eta_min = 0.00001
#T_max = 10
#T_mult = 1
#restart_decay = 0.97
#scheduler = CosineAnnealingWithRestartsLR(optimizer, T_max=T_max, eta_min=eta_min, T_mult=T_mult, restart_decay=restart_decay)

#scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

#criterion = nn.CrossEntropyLoss() 
criterion = nn.L1Loss()

def to_numpy(t):
    return t.cpu().detach().numpy()

best_mae_score = 999
best_f_score = 999
best_mof_score = 999
grad_clip_step = 100
grad_clip = 100
step = 0
# accumulation_step = 2
EPOCH = 200

model_fname = '../D_WEATHER/weight/unet_ch9_shuffle_unseen_v1.pt'
# log file
log_df = pd.DataFrame(columns=['epoch_idx', 'train_loss', 'train_mae', 'train_fs', 'train_mof', 'valid_loss', 'valid_mae', 'valid_fs', 'valid_mof'])

print("start training")

for epoch_idx in range(1, EPOCH + 1):

    start_time = time.time()

    train_loss = 0
    train_mae = 0
    train_fs = 0
    train_mof = 0 
#     train_total_correct = 0
    model.train()
    optimizer.zero_grad()

    for batch_idx, (image, labels) in enumerate(train_loader):
        if use_gpu:
            image = image.to(device)
            labels = labels.to(device)

        output = model(image)
        loss = criterion(output, labels)
        mae_score = mae(labels.cpu(), output.cpu())
        f_score = fscore(labels.cpu(), output.cpu())
        mof_score = maeOverFscore(labels.cpu(), output.cpu())

        # gradient explosion prevention
        if step > grad_clip_step:
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        step += 1

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        train_loss += loss.item() / len(train_loader)
        train_mae += mae_score.item() / len(train_loader)
        train_fs += f_score.item() / len(train_loader)
        train_mof += mof_score.item() / len(train_loader)

    model.eval()
    valid_loss = 0
    valid_mae = 0
    valid_fs = 0
    valid_mof = 0

    with torch.no_grad():
        for batch_idx, (image, labels) in enumerate(valid_loader):
            if use_gpu:
                image = image.to(device)
                labels = labels.to(device)

            output = model(image)
            loss = criterion(output, labels)
            mae_score = mae(labels.cpu(), output.cpu())
            f_score = fscore(labels.cpu(), output.cpu())
            mof_score = maeOverFscore(labels.cpu(), output.cpu())

#             output_prob = F.sigmoid(output)

            predict_vector = to_numpy(output)

            valid_loss += loss.item() / len(valid_loader)
            valid_mae += mae_score.item() / len(valid_loader)
            valid_fs += f_score.item() / len(valid_loader)
            valid_mof += mof_score.item() / len(valid_loader)

    elapsed = time.time() - start_time

    # checkpoint
    if valid_mof < best_mof_score:
        best_mof_score = valid_mof
#         print("Improved !! ")
        torch.save(model.state_dict(), model_fname)
        print("================ ༼ つ ◕_◕ ༽つ BEST epoch : {}, MOF : {} ".format(epoch_idx, best_mof_score))
        #file_save_name = 'best_acc' + '_' + str(num_fold)
        #print(file_save_name)
#     else:
#         print("val acc has not improved")

    lr = [_['lr'] for _ in optimizer.param_groups]

    #if args.scheduler == 'plateau':
    scheduler.step(valid_mof)
    #else:
    #    scheduler.step()

    # nsml.save(epoch_idx)

    print("E {}/{} tr_loss: {:.5f} tr_mae: {:.5f} tr_fs: {:.5f} tr_mof: {:.5f} val_loss: {:.5f} val_mae: {:.5f} val_fs: {:.5f} val_mof: {:.5f} lr: {:.6f} elapsed: {:.0f}".format(
           epoch_idx, EPOCH, train_loss, train_mae, train_fs, train_mof, valid_loss, valid_mae, valid_fs, valid_mof, lr[0], elapsed))
            #epoch_idx, args.epochs, train_loss, valid_loss, val_acc, lr[0], elapsed
    # log file element
#     log = []
    log_data = [epoch_idx, train_loss, train_mae, train_fs, train_mof, valid_loss, valid_mae, valid_fs, valid_mof]
#     log.append(log_data)
    log_df.loc[epoch_idx] = log_data

enable gpu use
start training
E 1/200 tr_loss: 45.12921 tr_mae: 1.87091 tr_fs: 0.51683 tr_mof: 4.41580 val_loss: 30.29279 val_mae: 1.45559 val_fs: 0.43190 val_mof: 3.61002 lr: 0.001000 elapsed: 107
E 2/200 tr_loss: 45.05886 tr_mae: 1.64468 tr_fs: 0.64756 tr_mof: 2.54328 val_loss: 30.29034 val_mae: 1.40138 val_fs: 0.44739 val_mof: 3.24813 lr: 0.001000 elapsed: 107
E 3/200 tr_loss: 45.12081 tr_mae: 1.55962 tr_fs: 0.67502 tr_mof: 2.31370 val_loss: 30.32665 val_mae: 1.55309 val_fs: 0.33596 val_mof: 4.93530 lr: 0.001000 elapsed: 107
E 4/200 tr_loss: 45.18470 tr_mae: 1.50610 tr_fs: 0.69420 tr_mof: 2.17106 val_loss: 30.28037 val_mae: 1.31290 val_fs: 0.53114 val_mof: 2.57914 lr: 0.001000 elapsed: 107
E 5/200 tr_loss: 45.14270 tr_mae: 1.48812 tr_fs: 0.69576 tr_mof: 2.14053 val_loss: 32.05016 val_mae: 2.25307 val_fs: 0.15553 val_mof: 16.20737 lr: 0.001000 elapsed: 107
E 6/200 tr_loss: 45.07951 tr_mae: 1.48402 tr_fs: 0.69521 tr_mof: 2.13691 val_loss: 30.28964 val_mae: 1.44256 val_fs: 0.44078 val_

KeyboardInterrupt: 

In [13]:
log_df.tail()

Unnamed: 0,epoch_idx,train_loss,train_mae,train_fs,train_mof,valid_loss,valid_mae,valid_fs,valid_mof
109,109.0,45.200485,1.264625,0.779711,1.621797,30.263653,1.04253,0.733071,1.426718
110,110.0,45.131319,1.266962,0.779548,1.625037,30.263702,1.041367,0.733929,1.423325
111,111.0,45.09692,1.26574,0.779596,1.623522,30.263761,1.042779,0.732582,1.428084
112,112.0,45.028127,1.266425,0.779643,1.62446,30.263668,1.041878,0.733625,1.42464
113,113.0,45.096929,1.26623,0.779458,1.624235,30.263756,1.04112,0.732753,1.425474


### Log

In [14]:
log_df.to_csv("../D_WEATHER/log/unet_ch9_shuffle_unseen_v1.csv", index=False)

## Prediction

In [15]:
class Test_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
#         self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
#             image = data[:,:,:]
            image = data[:,:,:9]#.reshape(40,40,-1)
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
#             label = data[:,:,-1].reshape(-1)
#             self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
#         label = self.label_list[idx]
        
        return image#, label

In [16]:
test_loader = build_te_dataloader(te_df, batch_size, shuffle=False)

In [17]:
test_loader.dataset.df.shape

(2416, 3)

In [18]:
train_loader.dataset[0][0].shape

(9, 40, 40)

In [19]:
test_loader.dataset[0].shape

(9, 40, 40)

In [20]:
model.load_state_dict(torch.load(model_fname))
model.eval()
predictions = np.zeros((len(test_loader.dataset), 1600))
with torch.no_grad():
    for i, image in enumerate(test_loader):
        image = image.to(device)
        output = model(image)
        
        predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy().reshape(-1, 1600)
print("predict values check : ",predictions[0])

predict values check :  [ 6.95499068e-04  4.37678816e-03  5.14530344e-03 ... -8.45150589e-06
 -8.45150589e-06 -8.45150589e-06]


In [21]:
predictions.shape

(2416, 1600)

In [22]:
predictions[0]

array([ 6.95499068e-04,  4.37678816e-03,  5.14530344e-03, ...,
       -8.45150589e-06, -8.45150589e-06, -8.45150589e-06])

In [23]:
sub = pd.read_csv("../D_WEATHER/input/sample_submission.csv")

In [24]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,029858_02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,029858_03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,029858_05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,029858_07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
sub.iloc[:,1:] = predictions

In [26]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.000695,0.004377,0.005145,-6.1e-05,0.000293,0.001819,0.025605,0.023118,0.004287,...,-8e-06,-8e-06,0.000431,0.000636,0.000571,0.000727,0.000125,-8e-06,-8e-06,-8e-06
1,029858_02,-8e-06,-8e-06,0.000166,0.00049,0.000136,-8e-06,-8e-06,-8e-06,-8e-06,...,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06
2,029858_03,-8e-06,-0.000329,0.010504,0.15992,0.056207,-0.001732,-7.6e-05,-8e-06,-8e-06,...,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06
3,029858_05,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,...,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06
4,029858_07,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,...,1.71863,2.531649,1.663102,0.815817,0.862955,1.987553,5.634937,6.0039,2.731457,2.417581


In [27]:
sub.to_csv('../D_WEATHER/sub/unet_ch9_shuffle_unseen_v1.csv', index = False)

In [28]:
new_sub = sub.copy()

In [29]:
for i in tqdm.tqdm(range(1,1601)):
    new_sub.loc[new_sub[new_sub.columns[i]]<0, new_sub.columns[i]] = 0

100%|██████████| 1600/1600 [00:01<00:00, 1351.26it/s]


In [30]:
sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.086599,0.110633,0.111881,0.132918,0.150624,0.158935,0.151734,0.154366,0.145773,0.134241,...,0.134724,0.125238,0.125947,0.120677,0.113612,0.119149,0.121848,0.120337,0.116552,0.098733
std,0.597793,0.7166,0.699595,0.825594,1.046199,1.189576,1.165583,1.180978,1.115131,1.077176,...,0.764317,0.653997,0.69118,0.766282,0.794808,0.835127,0.780684,0.728848,0.775481,0.604362
min,-0.026584,-0.011954,-0.04281,-0.077522,-0.13563,-0.165187,-0.191611,-0.145327,-0.172685,-0.194533,...,-0.01868,-0.019657,-0.020539,-0.010358,-0.033393,-0.014808,-0.013639,-0.021965,-0.016827,-0.010658
25%,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,...,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06
50%,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,...,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06
75%,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,...,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06,-8e-06
max,12.445879,13.825877,14.45206,16.526499,30.715355,38.28035,31.871758,35.959183,37.322262,31.481152,...,17.993885,11.759321,13.366447,16.802965,20.870436,19.221441,20.625252,17.121483,18.810265,13.138242


In [31]:
new_sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.086669,0.110686,0.111978,0.133046,0.150787,0.159073,0.151891,0.154512,0.145905,0.134417,...,0.134779,0.125327,0.126021,0.120734,0.113719,0.119225,0.121947,0.120419,0.116623,0.09878
std,0.597783,0.716591,0.699579,0.825572,1.046172,1.189552,1.165556,1.180955,1.115108,1.077147,...,0.764307,0.653979,0.691166,0.766273,0.794792,0.835116,0.780668,0.728834,0.775471,0.604354
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,12.445879,13.825877,14.45206,16.526499,30.715355,38.28035,31.871758,35.959183,37.322262,31.481152,...,17.993885,11.759321,13.366447,16.802965,20.870436,19.221441,20.625252,17.121483,18.810265,13.138242


In [32]:
new_sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.000695,0.004377,0.005145,0.0,0.000293,0.001819,0.025605,0.023118,0.004287,...,0.0,0.0,0.000431,0.000636,0.000571,0.000727,0.000125,0.0,0.0,0.0
1,029858_02,0.0,0.0,0.000166,0.00049,0.000136,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,029858_03,0.0,0.0,0.010504,0.15992,0.056207,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,029858_05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,029858_07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.71863,2.531649,1.663102,0.815817,0.862955,1.987553,5.634937,6.0039,2.731457,2.417581


In [33]:
new_sub.to_csv('../D_WEATHER/sub/unet_ch9_shuffle_unseen_v1_postpro.csv', index = False)