In [1]:
import gc
import os
import time
import numpy as np
import pandas as pd
from glob import glob
import tqdm
import argparse
import tqdm
from collections import defaultdict, Counter
from PIL import Image
import cv2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch import nn, cuda
from torch.autograd import Variable 
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import CenterCrop

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

# from efficientnet_pytorch import EfficientNet
import torchvision.models as models

In [2]:
import os
import math
import random
import numpy as np
import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


In [3]:
from sklearn.metrics import f1_score

def mae(y_true, y_pred) :
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    over_threshold = y_true >= 0.1
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    remove_NAs = y_true >= 0
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

### **File info**
**ex. subset_010462_01**
> **orbit 010462**

> **subset 01**

> **ortbit 별로 subset 개수는 다를 수 있고 연속적이지 않을 수도 있음**


In [4]:
tr_df = pd.read_csv("../D_WEATHER//input/train_df.csv")
te_df = pd.read_csv("../D_WEATHER/input/test_df.csv")
tr_df.head()

Unnamed: 0,path,orbit,orbit_subset
0,../D_WEATHER//input/train/subset_010462_01.npy,10462,1
1,../D_WEATHER//input/train/subset_010462_02.npy,10462,2
2,../D_WEATHER//input/train/subset_010462_03.npy,10462,3
3,../D_WEATHER//input/train/subset_010462_04.npy,10462,4
4,../D_WEATHER//input/train/subset_010462_05.npy,10462,5


In [5]:
train_df = tr_df[:int(len(tr_df)*0.8)]
valid_df = tr_df[int(len(tr_df)*0.8):]

In [6]:
train_df.shape, valid_df.shape

((61076, 3), (15269, 3))

In [7]:
def resize(image, size=(80, 80)):
    return cv2.resize(image, size)

class Weather_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
        self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
            image = data[:,:,:9] # use 14 channels except target
            image = resize(image)
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
            
            label = data[:,:,-1].reshape(40,40,1)
            label = np.transpose(label, (2,0,1))
            self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
        label = self.label_list[idx]
        
        return image, label

In [8]:
# def worker_init(worker_id):
#     np.random.seed(SEED)

def build_dataloader(df, batch_size, shuffle=False):
    dataset = Weather_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
    )
    return dataloader

def build_te_dataloader(df, batch_size, shuffle=False):
    dataset = Test_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
                            )
    return dataloader

# Build Model

In [9]:
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels # 
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)
        factor = 2 if bilinear else 1
        self.down5 = Down(1024, 2048 // factor)
        
        self.up1 = Up(2048, 1024, bilinear)
        self.up2 = Up(1024, 512, bilinear)
        self.up3 = Up(512, 256, bilinear)
        self.up4 = Up(256, 128, bilinear)
        self.up5 = Up(128, 64 * factor, bilinear)
        self.outc = OutConv(64, n_classes)
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x6 = self.down5(x5)
        
        x = self.up1(x6, x5)
        x = self.up2(x, x4)
        x = self.up3(x, x3)
        x = self.up4(x, x2)
        x = self.up5(x, x1)
        x = self.pool(x)
        logits = self.outc(x)
        return logits

In [10]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels // 2, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)


    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        return self.conv(x)

In [11]:
lr = 0.001
batch_size = 128

In [12]:
%%time
train_loader = build_dataloader(train_df, batch_size, shuffle=True)
valid_loader = build_dataloader(valid_df, batch_size, shuffle=False)

CPU times: user 32.7 s, sys: 13.7 s, total: 46.4 s
Wall time: 4min 19s


In [89]:
SEED = 42
seed_everything(SEED)

device = 'cuda:0'
use_gpu = cuda.is_available()
if use_gpu:
    print("enable gpu use")
else:
    print("enable cpu for debugging")

model = UNet(n_channels=9, n_classes=1, bilinear=False) # if bilinear = True -> non deterministic : not recommended
model = model.to(device)

enable gpu use


In [90]:
from torchsummary import summary
summary(model, input_size=(9,80,80))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 80, 80]           5,248
       BatchNorm2d-2           [-1, 64, 80, 80]             128
              ReLU-3           [-1, 64, 80, 80]               0
            Conv2d-4           [-1, 64, 80, 80]          36,928
       BatchNorm2d-5           [-1, 64, 80, 80]             128
              ReLU-6           [-1, 64, 80, 80]               0
        DoubleConv-7           [-1, 64, 80, 80]               0
         MaxPool2d-8           [-1, 64, 40, 40]               0
            Conv2d-9          [-1, 128, 40, 40]          73,856
      BatchNorm2d-10          [-1, 128, 40, 40]             256
             ReLU-11          [-1, 128, 40, 40]               0
           Conv2d-12          [-1, 128, 40, 40]         147,584
      BatchNorm2d-13          [-1, 128, 40, 40]             256
             ReLU-14          [-1, 128,

In [91]:
optimizer = optim.Adam(model.parameters(), lr, weight_decay=0.00025)
# optimizer = AdamW(model.parameters(), 2.5e-4, weight_decay=0.000025)
#optimizer = optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=0.025)

###### SCHEDULER #######
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

#eta_min = 0.00001
#T_max = 10
#T_mult = 1
#restart_decay = 0.97
#scheduler = CosineAnnealingWithRestartsLR(optimizer, T_max=T_max, eta_min=eta_min, T_mult=T_mult, restart_decay=restart_decay)

#scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

#criterion = nn.CrossEntropyLoss() 
criterion = nn.L1Loss()

def to_numpy(t):
    return t.cpu().detach().numpy()

best_mae_score = 999
best_f_score = 999
best_mof_score = 999
grad_clip_step = 100
grad_clip = 100
step = 0
# accumulation_step = 2
EPOCH = 200

model_fname = '../D_WEATHER/weight/unet_ch9_shuffle_80_addlayer.pt'
# log file
log_df = pd.DataFrame(columns=['epoch_idx', 'train_loss', 'train_mae', 'train_fs', 'train_mof', 'valid_loss', 'valid_mae', 'valid_fs', 'valid_mof'])

print("start training")

for epoch_idx in range(1, EPOCH + 1):

    start_time = time.time()

    train_loss = 0
    train_mae = 0
    train_fs = 0
    train_mof = 0 
#     train_total_correct = 0
    model.train()
    optimizer.zero_grad()

    for batch_idx, (image, labels) in enumerate(train_loader):
        if use_gpu:
            image = image.to(device)
            labels = labels.to(device)

        output = model(image)
        loss = criterion(output, labels)
        mae_score = mae(labels.cpu(), output.cpu())
        f_score = fscore(labels.cpu(), output.cpu())
        mof_score = maeOverFscore(labels.cpu(), output.cpu())

        # gradient explosion prevention
        if step > grad_clip_step:
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        step += 1

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        train_loss += loss.item() / len(train_loader)
        train_mae += mae_score.item() / len(train_loader)
        train_fs += f_score.item() / len(train_loader)
        train_mof += mof_score.item() / len(train_loader)

    model.eval()
    valid_loss = 0
    valid_mae = 0
    valid_fs = 0
    valid_mof = 0

    with torch.no_grad():
        for batch_idx, (image, labels) in enumerate(valid_loader):
            if use_gpu:
                image = image.to(device)
                labels = labels.to(device)

            output = model(image)
            loss = criterion(output, labels)
            mae_score = mae(labels.cpu(), output.cpu())
            f_score = fscore(labels.cpu(), output.cpu())
            mof_score = maeOverFscore(labels.cpu(), output.cpu())

#             output_prob = F.sigmoid(output)

            predict_vector = to_numpy(output)

            valid_loss += loss.item() / len(valid_loader)
            valid_mae += mae_score.item() / len(valid_loader)
            valid_fs += f_score.item() / len(valid_loader)
            valid_mof += mof_score.item() / len(valid_loader)

    elapsed = time.time() - start_time

    # checkpoint
    if valid_mof < best_mof_score:
        best_mof_score = valid_mof
#         print("Improved !! ")
        torch.save(model.state_dict(), model_fname)
        print("================ ༼ つ ◕_◕ ༽つ BEST epoch : {}, MOF : {} ".format(epoch_idx, best_mof_score))
        #file_save_name = 'best_acc' + '_' + str(num_fold)
        #print(file_save_name)
#     else:
#         print("val acc has not improved")

    lr = [_['lr'] for _ in optimizer.param_groups]

    #if args.scheduler == 'plateau':
    scheduler.step(valid_mof)
    #else:
    #    scheduler.step()

    # nsml.save(epoch_idx)

    print("E {}/{} tr_loss: {:.5f} tr_mae: {:.5f} tr_fs: {:.5f} tr_mof: {:.5f} val_loss: {:.5f} val_mae: {:.5f} val_fs: {:.5f} val_mof: {:.5f} lr: {:.6f} elapsed: {:.0f}".format(
           epoch_idx, EPOCH, train_loss, train_mae, train_fs, train_mof, valid_loss, valid_mae, valid_fs, valid_mof, lr[0], elapsed))
            #epoch_idx, args.epochs, train_loss, valid_loss, val_acc, lr[0], elapsed
    # log file element
#     log = []
    log_data = [epoch_idx, train_loss, train_mae, train_fs, train_mof, valid_loss, valid_mae, valid_fs, valid_mof]
#     log.append(log_data)
    log_df.loc[epoch_idx] = log_data

start training
E 1/200 tr_loss: 42.64174 tr_mae: 1.71046 tr_fs: 0.57175 tr_mof: 3.34741 val_loss: 52.44971 val_mae: 1.78854 val_fs: 0.24516 val_mof: 8.21263 lr: 0.001000 elapsed: 306
E 2/200 tr_loss: 42.61876 tr_mae: 1.53132 tr_fs: 0.65577 tr_mof: 2.34345 val_loss: 52.19124 val_mae: 1.65210 val_fs: 0.29952 val_mof: 5.90350 lr: 0.001000 elapsed: 307
E 3/200 tr_loss: 42.61553 tr_mae: 1.49910 tr_fs: 0.66749 tr_mof: 2.25218 val_loss: 52.06011 val_mae: 1.67151 val_fs: 0.63720 val_mof: 2.61673 lr: 0.001000 elapsed: 305
E 4/200 tr_loss: 42.61275 tr_mae: 1.46271 tr_fs: 0.68183 tr_mof: 2.15095 val_loss: 52.04921 val_mae: 1.70354 val_fs: 0.64347 val_mof: 2.64899 lr: 0.001000 elapsed: 304
E 5/200 tr_loss: 42.61196 tr_mae: 1.44434 tr_fs: 0.68936 tr_mof: 2.09799 val_loss: 52.05238 val_mae: 1.80031 val_fs: 0.54599 val_mof: 3.36902 lr: 0.001000 elapsed: 302
E 6/200 tr_loss: 42.61010 tr_mae: 1.42125 tr_fs: 0.70317 tr_mof: 2.02196 val_loss: 52.04537 val_mae: 1.69844 val_fs: 0.59079 val_mof: 2.92217 lr:

KeyboardInterrupt: 

### Log

In [92]:
log_df.to_csv("../D_WEATHER/log/unet_ch9_shuffle_80_addlayer.csv", index=False)

## Prediction

In [93]:
class Test_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
#         self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
#             image = data[:,:,:]
            image = data[:,:,:9]#.reshape(40,40,-1)
            image = resize(image)
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
#             label = data[:,:,-1].reshape(-1)
#             self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
#         label = self.label_list[idx]
        
        return image#, label

In [94]:
test_loader = build_te_dataloader(te_df, batch_size, shuffle=False)

In [95]:
test_loader.dataset.df.shape

(2416, 3)

In [96]:
train_loader.dataset[0][0].shape

(9, 80, 80)

In [97]:
test_loader.dataset[0].shape

(9, 80, 80)

In [98]:
model.load_state_dict(torch.load(model_fname))
model.eval()
predictions = np.zeros((len(test_loader.dataset), 1600))
with torch.no_grad():
    for i, image in enumerate(test_loader):
        image = image.to(device)
        output = model(image)
        
        predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy().reshape(-1, 1600)
print("predict values check : ",predictions[0])

predict values check :  [-1.05879700e-03 -5.70502423e-04  2.64364062e-04 ...  1.41910368e-06
  1.41910368e-06  1.41910368e-06]


In [99]:
predictions.shape

(2416, 1600)

In [100]:
predictions[0]

array([-1.05879700e-03, -5.70502423e-04,  2.64364062e-04, ...,
        1.41910368e-06,  1.41910368e-06,  1.41910368e-06])

In [101]:
sub = pd.read_csv("../D_WEATHER/input/sample_submission.csv")

In [102]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,029858_02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,029858_03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,029858_05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,029858_07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [103]:
sub.iloc[:,1:] = predictions

In [104]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,-0.001059,-0.000571,0.000264,-9.9e-05,-0.000539,0.00049,-0.00044,6.1e-05,1.9e-05,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
1,029858_02,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
2,029858_03,1e-06,1e-06,0.00248,0.194166,0.190048,-0.003235,-3.5e-05,1e-06,2e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
3,029858_05,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
4,029858_07,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1.412889,1.795902,1.728144,1.068403,1.261538,2.285024,5.272021,4.55189,2.911235,1.786888


In [105]:
sub.to_csv('../D_WEATHER/sub/unet_ch9_shuffle_80_addlayer.csv', index = False)

In [106]:
new_sub = sub.copy()

In [107]:
for i in tqdm.tqdm(range(1,1601)):
    new_sub.loc[new_sub[new_sub.columns[i]]<0, new_sub.columns[i]] = 0

100%|██████████| 1600/1600 [00:01<00:00, 1348.72it/s]


In [108]:
sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.087703,0.098287,0.115198,0.130252,0.142543,0.152179,0.14445,0.145486,0.140736,0.137137,...,0.136138,0.123536,0.127918,0.118868,0.111941,0.117313,0.11739,0.115735,0.118482,0.109388
std,0.550729,0.638534,0.730233,0.84101,0.995037,1.174311,1.101458,1.083037,1.126396,1.319877,...,0.789923,0.626655,0.671476,0.723412,0.719121,0.761327,0.688016,0.652214,0.729394,0.691027
min,-0.012213,-0.006044,-0.00632,-0.007172,-0.010134,-0.004591,-0.007038,-0.007392,-0.00643,-0.006304,...,-0.004905,-0.006951,-0.004216,-0.006556,-0.007774,-0.007544,-0.011743,-0.008487,-0.013675,-0.003485
25%,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
50%,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
75%,1e-06,1e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
max,10.572797,15.632951,16.070831,19.92112,28.26049,39.819275,32.074749,33.545818,34.654499,40.028915,...,19.898088,10.025855,13.49853,19.613789,17.500278,16.960773,16.343311,13.875289,16.494083,15.824573


In [109]:
new_sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.087726,0.098296,0.115222,0.130266,0.142561,0.152202,0.144467,0.1455,0.14076,0.137155,...,0.136152,0.123548,0.127929,0.118877,0.111967,0.117328,0.117407,0.115754,0.118499,0.109392
std,0.550726,0.638532,0.730229,0.841008,0.995034,1.174308,1.101456,1.083036,1.126393,1.319875,...,0.78992,0.626653,0.671474,0.72341,0.719117,0.761325,0.688012,0.65221,0.729391,0.691026
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
50%,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
75%,1e-06,1e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,2e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
max,10.572797,15.632951,16.070831,19.92112,28.26049,39.819275,32.074749,33.545818,34.654499,40.028915,...,19.898088,10.025855,13.49853,19.613789,17.500278,16.960773,16.343311,13.875289,16.494083,15.824573


In [110]:
new_sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0,0.0,0.000264,0.0,0.0,0.00049,0.0,6.1e-05,1.9e-05,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
1,029858_02,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
2,029858_03,1e-06,1e-06,0.00248,0.194166,0.190048,0.0,0.0,1e-06,2e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
3,029858_05,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06
4,029858_07,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,...,1.412889,1.795902,1.728144,1.068403,1.261538,2.285024,5.272021,4.55189,2.911235,1.786888


In [111]:
new_sub.to_csv('../D_WEATHER/sub/unet_ch9_shuffle_80_addlayer_postpro.csv', index = False)