In [1]:
import gc
import os
import time
import numpy as np
import pandas as pd
from glob import glob
import tqdm
import argparse
import tqdm
from collections import defaultdict, Counter
from PIL import Image
import cv2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch import nn, cuda
from torch.autograd import Variable 
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import CenterCrop

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

# from efficientnet_pytorch import EfficientNet
import torchvision.models as models

In [2]:
import os
import math
import random
import numpy as np
import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


In [3]:
from sklearn.metrics import f1_score

def mae(y_true, y_pred) :
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    over_threshold = y_true >= 0.1
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    remove_NAs = y_true >= 0
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

### **File info**
**ex. subset_010462_01**
> **orbit 010462**

> **subset 01**

> **ortbit 별로 subset 개수는 다를 수 있고 연속적이지 않을 수도 있음**


In [4]:
tr_df = pd.read_csv("../D_WEATHER//input/train_df.csv")
te_df = pd.read_csv("../D_WEATHER/input/test_df.csv")
tr_df.head()

Unnamed: 0,path,orbit,orbit_subset
0,../D_WEATHER//input/train/subset_010462_01.npy,10462,1
1,../D_WEATHER//input/train/subset_010462_02.npy,10462,2
2,../D_WEATHER//input/train/subset_010462_03.npy,10462,3
3,../D_WEATHER//input/train/subset_010462_04.npy,10462,4
4,../D_WEATHER//input/train/subset_010462_05.npy,10462,5


In [5]:
train_df = tr_df[:int(len(tr_df)*0.8)]
valid_df = tr_df[int(len(tr_df)*0.8):]

train_df.shape, valid_df.shape

((61076, 3), (15269, 3))

In [6]:
class Weather_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
        self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
            image = data[:,:,:9] # use 14 channels except target
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
            label = data[:,:,-1].reshape(40,40,1)
            label = np.transpose(label, (2,0,1))
            self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
        label = self.label_list[idx]
        
        return image, label

In [7]:
# def worker_init(worker_id):
#     np.random.seed(SEED)

def build_dataloader(df, batch_size, shuffle=False):
    dataset = Weather_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
    )
    return dataloader

def build_te_dataloader(df, batch_size, shuffle=False):
    dataset = Test_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
                            )
    return dataloader

# Build Model

In [8]:
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels # 
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512, bilinear)
        self.up2 = Up(512, 256, bilinear)
        self.up3 = Up(256, 128, bilinear)
        self.up4 = Up(128, 64 * factor, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

In [9]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels // 2, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)


    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        return self.conv(x)

In [10]:
lr = 0.001
batch_size = 512

In [11]:
train_loader = build_dataloader(tr_df, batch_size, shuffle=True)
# valid_loader = build_dataloader(valid_df, batch_size, shuffle=False)

In [12]:
SEED = 42
seed_everything(SEED)

device = 'cuda:0'
use_gpu = cuda.is_available()
if use_gpu:
    print("enable gpu use")
else:
    print("enable cpu for debugging")

model = UNet(n_channels=9, n_classes=1, bilinear=False) # if bilinear = True -> non deterministic : not recommended
model = model.to(device)

enable gpu use


In [13]:
from torchsummary import summary
summary(model , input_size=(9,40,40))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 40, 40]           5,248
       BatchNorm2d-2           [-1, 64, 40, 40]             128
              ReLU-3           [-1, 64, 40, 40]               0
            Conv2d-4           [-1, 64, 40, 40]          36,928
       BatchNorm2d-5           [-1, 64, 40, 40]             128
              ReLU-6           [-1, 64, 40, 40]               0
        DoubleConv-7           [-1, 64, 40, 40]               0
         MaxPool2d-8           [-1, 64, 20, 20]               0
            Conv2d-9          [-1, 128, 20, 20]          73,856
      BatchNorm2d-10          [-1, 128, 20, 20]             256
             ReLU-11          [-1, 128, 20, 20]               0
           Conv2d-12          [-1, 128, 20, 20]         147,584
      BatchNorm2d-13          [-1, 128, 20, 20]             256
             ReLU-14          [-1, 128,

In [14]:
# optimizer = optim.Adam(model.parameters(), lr, weight_decay=0.00025)
# optimizer = AdamW(model.parameters(), 2.5e-4, weight_decay=0.000025)
optimizer = optim.SGD(model.parameters(), lr, momentum=0.9, weight_decay=0.025)

###### SCHEDULER #######
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

#eta_min = 0.00001
#T_max = 10
#T_mult = 1
#restart_decay = 0.97
#scheduler = CosineAnnealingWithRestartsLR(optimizer, T_max=T_max, eta_min=eta_min, T_mult=T_mult, restart_decay=restart_decay)

#scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

#criterion = nn.CrossEntropyLoss() 
criterion = nn.L1Loss()

def to_numpy(t):
    return t.cpu().detach().numpy()

best_mae_score = 999
best_f_score = 999
best_mof_score = 999
grad_clip_step = 100
grad_clip = 100
step = 0
# accumulation_step = 2
EPOCH = 200

model_fname = '../D_WEATHER/weight/unet_ch9_shuffle_trainall_sgd.pt'
# log file
log_df = pd.DataFrame(columns=['epoch_idx', 'train_loss', 'train_mae', 'train_fs', 'train_mof'])

print("start training")

for epoch_idx in range(1, EPOCH + 1):

    start_time = time.time()

    train_loss = 0
    train_mae = 0
    train_fs = 0
    train_mof = 0 
#     train_total_correct = 0
    model.train()
    optimizer.zero_grad()

    for batch_idx, (image, labels) in enumerate(train_loader):
        if use_gpu:
            image = image.to(device)
            labels = labels.to(device)

        output = model(image)
        loss = criterion(output, labels)
        mae_score = mae(labels.cpu(), output.cpu())
        f_score = fscore(labels.cpu(), output.cpu())
        mof_score = maeOverFscore(labels.cpu(), output.cpu())

        # gradient explosion prevention
        if step > grad_clip_step:
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        step += 1

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        train_loss += loss.item() / len(train_loader)
        train_mae += mae_score.item() / len(train_loader)
        train_fs += f_score.item() / len(train_loader)
        train_mof += mof_score.item() / len(train_loader)

    # checkpoint
    if train_mof < best_mof_score:
        best_mof_score = train_mof
#         print("Improved !! ")
        torch.save(model.state_dict(), model_fname)
        print("================ ༼ つ ◕_◕ ༽つ BEST epoch : {}, MOF : {} ".format(epoch_idx, best_mof_score))
        #file_save_name = 'best_acc' + '_' + str(num_fold)
        #print(file_save_name)
#     else:
#         print("val acc has not improved")
    elapsed = time.time() - start_time
    lr = [_['lr'] for _ in optimizer.param_groups]

    #if args.scheduler == 'plateau':
    scheduler.step(train_mof)
    
    print("E {}/{} tr_loss: {:.5f} tr_mae: {:.5f} tr_fs: {:.5f} tr_mof: {:.5f} lr: {:.6f} elapsed: {:.0f}".format(
           epoch_idx, EPOCH, train_loss, train_mae, train_fs, train_mof, lr[0], elapsed))
            #epoch_idx, args.epochs, train_loss, valid_loss, val_acc, lr[0], elapsed
    # log file element
#     log = []
    log_data = [epoch_idx, train_loss, train_mae, train_fs, train_mof]
#     log.append(log_data)
    log_df.loc[epoch_idx] = log_data

start training
E 1/200 tr_loss: 46.51867 tr_mae: 1.99645 tr_fs: 0.38995 tr_mof: 5.99492 lr: 0.001000 elapsed: 111
E 2/200 tr_loss: 44.40150 tr_mae: 1.81878 tr_fs: 0.55660 tr_mof: 3.29276 lr: 0.001000 elapsed: 112
E 3/200 tr_loss: 44.39545 tr_mae: 1.76572 tr_fs: 0.59264 tr_mof: 2.99097 lr: 0.001000 elapsed: 111
E 4/200 tr_loss: 44.39138 tr_mae: 1.73282 tr_fs: 0.61324 tr_mof: 2.84189 lr: 0.001000 elapsed: 110
E 5/200 tr_loss: 44.95878 tr_mae: 1.70860 tr_fs: 0.63049 tr_mof: 2.72577 lr: 0.001000 elapsed: 110
E 6/200 tr_loss: 45.42279 tr_mae: 1.68703 tr_fs: 0.64493 tr_mof: 2.61846 lr: 0.001000 elapsed: 110
E 7/200 tr_loss: 44.37979 tr_mae: 1.66674 tr_fs: 0.65433 tr_mof: 2.54733 lr: 0.001000 elapsed: 110
E 8/200 tr_loss: 44.37779 tr_mae: 1.65033 tr_fs: 0.65655 tr_mof: 2.51594 lr: 0.001000 elapsed: 110
E 9/200 tr_loss: 44.37756 tr_mae: 1.63040 tr_fs: 0.66121 tr_mof: 2.46647 lr: 0.001000 elapsed: 110
E 10/200 tr_loss: 44.37219 tr_mae: 1.61517 tr_fs: 0.66844 tr_mof: 2.41765 lr: 0.001000 elapsed

### Log

In [15]:
log_df.to_csv("../D_WEATHER/log/unet_ch9_shuffle_trainall_sgd.csv", index=False)

## Prediction

In [16]:
class Test_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
#         self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
#             image = data[:,:,:]
            image = data[:,:,:9]#.reshape(40,40,-1)
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
#             label = data[:,:,-1].reshape(-1)
#             self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
#         label = self.label_list[idx]
        
        return image#, label

In [17]:
test_loader = build_te_dataloader(te_df, batch_size, shuffle=False)

In [18]:
test_loader.dataset.df.shape

(2416, 3)

In [19]:
train_loader.dataset[0][0].shape

(9, 40, 40)

In [20]:
test_loader.dataset[0].shape

(9, 40, 40)

In [21]:
model.load_state_dict(torch.load(model_fname))
model.eval()
predictions = np.zeros((len(test_loader.dataset), 1600))
with torch.no_grad():
    for i, image in enumerate(test_loader):
        image = image.to(device)
        output = model(image)
        
        predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy().reshape(-1, 1600)
print("predict values check : ",predictions[0])

predict values check :  [ 1.42045435e-04 -1.66748092e-03 -3.56543064e-03 ...  2.98402647e-05
  8.95622907e-06 -2.74365830e-05]


In [22]:
predictions.shape

(2416, 1600)

In [23]:
predictions[0]

array([ 1.42045435e-04, -1.66748092e-03, -3.56543064e-03, ...,
        2.98402647e-05,  8.95622907e-06, -2.74365830e-05])

In [24]:
sub = pd.read_csv("../D_WEATHER/input/sample_submission.csv")

In [25]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,029858_02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,029858_03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,029858_05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,029858_07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
sub.iloc[:,1:] = predictions

In [27]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0001420454,-0.001667481,-0.003565431,-0.001482815,0.002971598,0.01805135,0.0208311,0.01291,0.001236481,...,-5e-06,-4e-06,-3e-06,-2e-06,-1e-06,-9.692004e-07,-2e-06,3e-05,9e-06,-2.7e-05
1,029858_02,-6.313867e-07,3.748894e-06,-2.570984e-06,-2.502276e-06,-1.541049e-07,1.230757e-06,-3.127893e-07,-2e-06,-1.131683e-07,...,-4e-06,-5e-06,-4e-06,-4e-06,-4e-06,-2.549684e-06,-1e-06,3.7e-05,8e-06,-2.9e-05
2,029858_03,2.840601e-06,4.038839e-06,-3.212639e-05,0.001005665,1.169641e-05,-0.001232316,-0.0001662385,1.4e-05,-2.79661e-06,...,-7e-06,-6e-06,-5e-06,-5e-06,-5e-06,-4.840686e-06,-3e-06,4.2e-05,-1e-06,-4.4e-05
3,029858_05,3.829724e-06,6.457018e-06,-1.508675e-07,2.777665e-08,3.525572e-06,4.301689e-06,6.308465e-06,5e-06,3.379578e-06,...,-6e-06,-6e-06,-6e-06,-6e-06,-5e-06,-3.18204e-06,-2e-06,3.4e-05,1.2e-05,-2.8e-05
4,029858_07,-2.082747e-06,-6.772109e-08,-5.581309e-06,2.675309e-06,9.427983e-07,3.519585e-07,-9.359679e-07,-2e-06,-1.133617e-06,...,1.691117,2.034608,1.726876,1.293826,1.495271,2.457459,3.490596,3.771606,3.020589,1.592311


In [28]:
sub.to_csv('../D_WEATHER/sub/unet_ch9_shuffle_trainall_sgd.csv', index = False)

In [29]:
new_sub = sub.copy()

In [30]:
for i in tqdm.tqdm(range(1,1601)):
    new_sub.loc[new_sub[new_sub.columns[i]]<0, new_sub.columns[i]] = 0

100%|██████████| 1600/1600 [00:01<00:00, 1336.26it/s]


In [31]:
sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.05559341,0.08875949,0.09809333,0.107009,0.11284,0.1116722,0.1072851,0.1036032,0.1004457,0.09757449,...,0.1015955,0.09986147,0.09565565,0.08982704,0.08504089,0.08358944,0.084432,0.088947,0.087128,0.057302
std,0.310465,0.5024365,0.566161,0.607298,0.627617,0.6153193,0.5870121,0.572203,0.5694665,0.5549266,...,0.5036964,0.4871227,0.4753048,0.4594024,0.4432956,0.4419767,0.448452,0.456458,0.425732,0.272556
min,-0.001643382,-0.00443458,-0.004922745,-0.007161,-0.005236,-0.00737753,-0.01017762,-0.006293891,-0.008222144,-0.006106488,...,-0.008168703,-0.008157551,-0.0120322,-0.01532285,-0.01106421,-0.01594329,-0.017979,-0.014005,-0.005273,-0.0039
25%,-9.45612e-07,-3.403284e-07,-5.035908e-06,-2e-06,-2e-06,-8.228396e-07,-8.551439e-07,-8.390618e-07,-6.641594e-07,-7.745552e-07,...,-4.857403e-06,-4.74788e-06,-4.624227e-06,-4.688958e-06,-4.701681e-06,-3.941757e-06,-6e-06,2.7e-05,-5e-06,-3.6e-05
50%,2.906165e-06,4.838827e-06,-1.578589e-07,3e-06,5e-06,5.17019e-06,5.070242e-06,5.09308e-06,5.11847e-06,5.319169e-06,...,3.355035e-08,1.797968e-07,2.258598e-07,1.017508e-07,1.97525e-07,6.587668e-07,-2e-06,3.4e-05,9e-06,-2.7e-05
75%,5.104125e-06,7.994816e-06,5.492681e-06,8e-06,1.2e-05,1.102107e-05,1.092714e-05,1.104188e-05,1.111188e-05,1.126517e-05,...,4.396179e-06,4.536681e-06,4.534815e-06,4.563564e-06,4.458083e-06,4.575743e-06,3e-06,3.8e-05,2.2e-05,-1.4e-05
max,3.710314,6.214237,7.453029,9.106943,8.183431,8.330743,8.737432,9.534902,9.696298,10.10775,...,7.304361,6.38633,5.869792,6.304834,7.08817,7.578837,7.628572,7.20071,5.871171,3.314133


In [32]:
new_sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.055601,0.088781,0.098126,0.107047,0.11287,0.111705,0.107324,0.103639,0.100494,0.097617,...,0.1016247,0.09988687,0.09568436,0.08986553,0.08508066,0.08363517,0.08449,0.088991,0.087169,0.057344
std,0.310464,0.502433,0.566155,0.607292,0.627612,0.615313,0.587005,0.572196,0.569458,0.554919,...,0.5036904,0.4871174,0.4752989,0.4593947,0.4432878,0.4419678,0.44844,0.456449,0.425724,0.272547
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.7e-05,0.0,0.0
50%,3e-06,5e-06,0.0,3e-06,5e-06,5e-06,5e-06,5e-06,5e-06,5e-06,...,3.355035e-08,1.797968e-07,2.258598e-07,1.017508e-07,1.97525e-07,6.587668e-07,0.0,3.4e-05,9e-06,0.0
75%,5e-06,8e-06,5e-06,8e-06,1.2e-05,1.1e-05,1.1e-05,1.1e-05,1.1e-05,1.1e-05,...,4.396179e-06,4.536681e-06,4.534815e-06,4.563564e-06,4.458083e-06,4.575743e-06,3e-06,3.8e-05,2.2e-05,0.0
max,3.710314,6.214237,7.453029,9.106943,8.183431,8.330743,8.737432,9.534902,9.696298,10.107747,...,7.304361,6.38633,5.869792,6.304834,7.08817,7.578837,7.628572,7.20071,5.871171,3.314133


In [33]:
new_sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.000142,0.0,0.0,0.0,0.002971598,0.01805135,0.020831,0.01291,0.001236,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3e-05,9e-06,0.0
1,029858_02,0.0,4e-06,0.0,0.0,0.0,1.230757e-06,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.7e-05,8e-06,0.0
2,029858_03,3e-06,4e-06,0.0,0.001005665,1.169641e-05,0.0,0.0,1.4e-05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.2e-05,0.0,0.0
3,029858_05,4e-06,6e-06,0.0,2.777665e-08,3.525572e-06,4.301689e-06,6e-06,5e-06,3e-06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.4e-05,1.2e-05,0.0
4,029858_07,0.0,0.0,0.0,2.675309e-06,9.427983e-07,3.519585e-07,0.0,0.0,0.0,...,1.691117,2.034608,1.726876,1.293826,1.495271,2.457459,3.490596,3.771606,3.020589,1.592311


In [34]:
new_sub.to_csv('../D_WEATHER/sub/unet_ch9_shuffle_trainall_sgd_postpro.csv', index = False)