In [1]:
import gc
import os
import time
import numpy as np
import pandas as pd
from glob import glob
import tqdm
import argparse
import tqdm
from collections import defaultdict, Counter
from PIL import Image
import cv2

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch import nn, cuda
from torch.autograd import Variable 
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import CenterCrop

from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR

# from efficientnet_pytorch import EfficientNet
import torchvision.models as models

In [2]:
import os
import math
import random
import numpy as np
import torch
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


In [3]:
from sklearn.metrics import f1_score

def mae(y_true, y_pred) :
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    over_threshold = y_true >= 0.1
    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):
    y_true, y_pred = np.array(y_true.detach().numpy()), np.array(y_pred.detach().numpy())
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    remove_NAs = y_true >= 0
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

### **File info**
**ex. subset_010462_01**
> **orbit 010462**

> **subset 01**

> **ortbit 별로 subset 개수는 다를 수 있고 연속적이지 않을 수도 있음**


In [4]:
tr_df = pd.read_csv("../D_WEATHER//input/train_df.csv")
te_df = pd.read_csv("../D_WEATHER/input/test_df.csv")
tr_df.head()

Unnamed: 0,path,orbit,orbit_subset
0,../D_WEATHER//input/train/subset_010462_01.npy,10462,1
1,../D_WEATHER//input/train/subset_010462_02.npy,10462,2
2,../D_WEATHER//input/train/subset_010462_03.npy,10462,3
3,../D_WEATHER//input/train/subset_010462_04.npy,10462,4
4,../D_WEATHER//input/train/subset_010462_05.npy,10462,5


In [5]:
train_df = tr_df[:int(len(tr_df)*0.967)]
valid_df = tr_df[int(len(tr_df)*0.967):]

train_df.shape, valid_df.shape

((73825, 3), (2520, 3))

In [6]:
class Weather_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
        self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
            image = data[:,:,:9] # use 14 channels except target
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
            label = data[:,:,-1].reshape(40,40,1)
            label = np.transpose(label, (2,0,1))
            self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
        label = self.label_list[idx]
        
        return image, label

In [7]:
# def worker_init(worker_id):
#     np.random.seed(SEED)

def build_dataloader(df, batch_size, shuffle=False):
    dataset = Weather_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
    )
    return dataloader

def build_te_dataloader(df, batch_size, shuffle=False):
    dataset = Test_Dataset(df)
    dataloader = DataLoader(
                            dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=0,
#                             worker_init_fn=worker_init
                            )
    return dataloader

# Build Model

In [8]:
class UNetX2(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNetX2, self).__init__()
        self.n_channels = n_channels # 
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512, bilinear)
        self.up2 = Up(512, 256, bilinear)
        self.up3 = Up(256, 128, bilinear)
        self.up4 = Up(128, 64 * factor, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1) #64
        x3 = self.down2(x2) #128
        x4 = self.down3(x3) #256
        x5 = self.down4(x4) #512
        x = self.up1(x5, x4) #1024
        x = self.up2(x, x3) #512
        x = self.up3(x, x2) #256
        x = self.up4(x, x1) #128 - 64
        ####################
        x2 = self.down1(x) #64
        x3 = self.down2(x2) #128
        x4 = self.down3(x3) #256
        x5 = self.down4(x4) #512
        x = self.up1(x5, x4) #1024
        x = self.up2(x, x3) #512
        x = self.up3(x, x2) #256
        x = self.up4(x, x1) #128 - 64
        ###
        logits = self.outc(x) # 64
        return logits

In [9]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels // 2, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels , in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)


    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = torch.tensor([x2.size()[2] - x1.size()[2]])
        diffX = torch.tensor([x2.size()[3] - x1.size()[3]])

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        return self.conv(x)

In [10]:
lr = 0.001
batch_size = 512

In [11]:
train_loader = build_dataloader(train_df, batch_size, shuffle=True)
valid_loader = build_dataloader(valid_df, batch_size, shuffle=False)

In [12]:
SEED = 42
seed_everything(SEED)

device = 'cuda:0'
use_gpu = cuda.is_available()
if use_gpu:
    print("enable gpu use")
else:
    print("enable cpu for debugging")

model = UNetX2(n_channels=9, n_classes=1, bilinear=False) # if bilinear = True -> non deterministic : not recommended
model = model.to(device)

enable gpu use


In [13]:
from torchsummary import summary
summary(model, input_size=(9,40,40))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 40, 40]           5,248
       BatchNorm2d-2           [-1, 64, 40, 40]             128
              ReLU-3           [-1, 64, 40, 40]               0
            Conv2d-4           [-1, 64, 40, 40]          36,928
       BatchNorm2d-5           [-1, 64, 40, 40]             128
              ReLU-6           [-1, 64, 40, 40]               0
        DoubleConv-7           [-1, 64, 40, 40]               0
         MaxPool2d-8           [-1, 64, 20, 20]               0
            Conv2d-9          [-1, 128, 20, 20]          73,856
      BatchNorm2d-10          [-1, 128, 20, 20]             256
             ReLU-11          [-1, 128, 20, 20]               0
           Conv2d-12          [-1, 128, 20, 20]         147,584
      BatchNorm2d-13          [-1, 128, 20, 20]             256
             ReLU-14          [-1, 128,

In [14]:
optimizer = optim.Adam(model.parameters(), lr, weight_decay=0.00025)
# optimizer = AdamW(model.parameters(), 2.5e-4, weight_decay=0.000025)
#optimizer = optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=0.025)

###### SCHEDULER #######
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

#eta_min = 0.00001
#T_max = 10
#T_mult = 1
#restart_decay = 0.97
#scheduler = CosineAnnealingWithRestartsLR(optimizer, T_max=T_max, eta_min=eta_min, T_mult=T_mult, restart_decay=restart_decay)

#scheduler = StepLR(optimizer, step_size=5, gamma=0.5)

#criterion = nn.CrossEntropyLoss() 
criterion = nn.L1Loss()

def to_numpy(t):
    return t.cpu().detach().numpy()

best_mae_score = 999
best_f_score = 999
best_mof_score = 999
grad_clip_step = 100
grad_clip = 100
step = 0
# accumulation_step = 2
EPOCH = 200

model_fname = '../D_WEATHER/weight/unetx2_ch9_shuffle_ho0.967.pt'
# log file
log_df = pd.DataFrame(columns=['epoch_idx', 'train_loss', 'train_mae', 'train_fs', 'train_mof', 'valid_loss', 'valid_mae', 'valid_fs', 'valid_mof'])

print("start training")

for epoch_idx in range(1, EPOCH + 1):

    start_time = time.time()

    train_loss = 0
    train_mae = 0
    train_fs = 0
    train_mof = 0 
#     train_total_correct = 0
    model.train()
    optimizer.zero_grad()

    for batch_idx, (image, labels) in enumerate(train_loader):
        if use_gpu:
            image = image.to(device)
            labels = labels.to(device)

        output = model(image)
        loss = criterion(output, labels)
        mae_score = mae(labels.cpu(), output.cpu())
        f_score = fscore(labels.cpu(), output.cpu())
        mof_score = maeOverFscore(labels.cpu(), output.cpu())

        # gradient explosion prevention
        if step > grad_clip_step:
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        step += 1

        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

        train_loss += loss.item() / len(train_loader)
        train_mae += mae_score.item() / len(train_loader)
        train_fs += f_score.item() / len(train_loader)
        train_mof += mof_score.item() / len(train_loader)

    model.eval()
    valid_loss = 0
    valid_mae = 0
    valid_fs = 0
    valid_mof = 0

    with torch.no_grad():
        for batch_idx, (image, labels) in enumerate(valid_loader):
            if use_gpu:
                image = image.to(device)
                labels = labels.to(device)

            output = model(image)
            loss = criterion(output, labels)
            mae_score = mae(labels.cpu(), output.cpu())
            f_score = fscore(labels.cpu(), output.cpu())
            mof_score = maeOverFscore(labels.cpu(), output.cpu())

#             output_prob = F.sigmoid(output)

            predict_vector = to_numpy(output)

            valid_loss += loss.item() / len(valid_loader)
            valid_mae += mae_score.item() / len(valid_loader)
            valid_fs += f_score.item() / len(valid_loader)
            valid_mof += mof_score.item() / len(valid_loader)

    elapsed = time.time() - start_time

    # checkpoint
    if valid_mof < best_mof_score:
        best_mof_score = valid_mof
#         print("Improved !! ")
        torch.save(model.state_dict(), model_fname)
        print("================ ༼ つ ◕_◕ ༽つ BEST epoch : {}, MOF : {} ".format(epoch_idx, best_mof_score))
        #file_save_name = 'best_acc' + '_' + str(num_fold)
        #print(file_save_name)
#     else:
#         print("val acc has not improved")

    lr = [_['lr'] for _ in optimizer.param_groups]

    #if args.scheduler == 'plateau':
    scheduler.step(valid_mof)
    #else:
    #    scheduler.step()

    # nsml.save(epoch_idx)

    print("E {}/{} tr_loss: {:.5f} tr_mae: {:.5f} tr_fs: {:.5f} tr_mof: {:.5f} val_loss: {:.5f} val_mae: {:.5f} val_fs: {:.5f} val_mof: {:.5f} lr: {:.6f} elapsed: {:.0f}".format(
           epoch_idx, EPOCH, train_loss, train_mae, train_fs, train_mof, valid_loss, valid_mae, valid_fs, valid_mof, lr[0], elapsed))
            #epoch_idx, args.epochs, train_loss, valid_loss, val_acc, lr[0], elapsed
    # log file element
#     log = []
    log_data = [epoch_idx, train_loss, train_mae, train_fs, train_mof, valid_loss, valid_mae, valid_fs, valid_mof]
#     log.append(log_data)
    log_df.loc[epoch_idx] = log_data

start training
E 1/200 tr_loss: 44.74203 tr_mae: 1.87819 tr_fs: 0.53167 tr_mof: 4.14861 val_loss: 67.92583 val_mae: 1.44039 val_fs: 0.51176 val_mof: 2.81461 lr: 0.001000 elapsed: 154
E 2/200 tr_loss: 44.13590 tr_mae: 1.67777 tr_fs: 0.63269 tr_mof: 2.65587 val_loss: 67.91354 val_mae: 1.39849 val_fs: 0.55376 val_mof: 2.52459 lr: 0.001000 elapsed: 157
E 3/200 tr_loss: 43.80403 tr_mae: 1.57415 tr_fs: 0.66437 tr_mof: 2.37181 val_loss: 67.92707 val_mae: 1.72045 val_fs: 0.25551 val_mof: 7.00667 lr: 0.001000 elapsed: 155
E 4/200 tr_loss: 43.54637 tr_mae: 1.51016 tr_fs: 0.69131 tr_mof: 2.18604 val_loss: 68.17601 val_mae: 1.33588 val_fs: 0.23486 val_mof: 5.76153 lr: 0.001000 elapsed: 155
E 5/200 tr_loss: 44.12064 tr_mae: 1.48453 tr_fs: 0.69807 tr_mof: 2.12864 val_loss: 68.49149 val_mae: 1.32259 val_fs: 0.23923 val_mof: 5.63568 lr: 0.001000 elapsed: 155
E 6/200 tr_loss: 44.12463 tr_mae: 1.53192 tr_fs: 0.67229 tr_mof: 2.28763 val_loss: 68.49422 val_mae: 1.62105 val_fs: 0.12752 val_mof: 13.05534 lr

### Log

In [15]:
log_df.to_csv("../D_WEATHER/log/unetx2_ch9_shuffle_ho0.967.csv", index=False)

## Prediction

In [16]:
class Test_Dataset(Dataset):
    def __init__(self, df):
        self.df = df
        
        self.image_list = []
#         self.label_list = []

        for file in self.df['path']:
            data = np.load(file)
#             image = data[:,:,:]
            image = data[:,:,:9]#.reshape(40,40,-1)
            image = np.transpose(image, (2,0,1))
            image = image.astype(np.float32)
            self.image_list.append(image)
            
#             label = data[:,:,-1].reshape(-1)
#             self.label_list.append(label)
            
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        image = self.image_list[idx]
#         label = self.label_list[idx]
        
        return image#, label

In [17]:
test_loader = build_te_dataloader(te_df, batch_size, shuffle=False)

In [18]:
test_loader.dataset.df.shape

(2416, 3)

In [19]:
train_loader.dataset[0][0].shape

(9, 40, 40)

In [20]:
test_loader.dataset[0].shape

(9, 40, 40)

In [21]:
model.load_state_dict(torch.load(model_fname))
model.eval()
predictions = np.zeros((len(test_loader.dataset), 1600))
with torch.no_grad():
    for i, image in enumerate(test_loader):
        image = image.to(device)
        output = model(image)
        
        predictions[i*batch_size: (i+1)*batch_size] = output.detach().cpu().numpy().reshape(-1, 1600)
print("predict values check : ",predictions[0])

predict values check :  [-1.45988958e-03 -2.75963498e-03 -3.00486805e-04 ... -2.01881267e-06
  1.17579475e-06  4.57767674e-05]


In [22]:
predictions.shape

(2416, 1600)

In [23]:
predictions[0]

array([-1.45988958e-03, -2.75963498e-03, -3.00486805e-04, ...,
       -2.01881267e-06,  1.17579475e-06,  4.57767674e-05])

In [24]:
sub = pd.read_csv("../D_WEATHER/input/sample_submission.csv")

In [25]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,029858_02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,029858_03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,029858_05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,029858_07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
sub.iloc[:,1:] = predictions

In [27]:
sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,-0.00146,-0.00276,-0.0003,6e-05,-0.000465,-0.003905,0.003824,0.00885,0.001254,...,1.5e-05,1.5e-05,1.3e-05,1.1e-05,1.3e-05,1.3e-05,1.5e-05,-2e-06,1e-06,4.6e-05
1,029858_02,-2.1e-05,4e-06,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.5e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,2e-05,1.1e-05,5e-06,1.6e-05
2,029858_03,1.4e-05,5.1e-05,0.001456,0.075137,0.07536,0.007247,0.000122,1.5e-05,1.5e-05,...,1.4e-05,1.4e-05,1.4e-05,1.3e-05,1.4e-05,1.3e-05,2.1e-05,1.3e-05,5e-06,1.2e-05
3,029858_05,-5.6e-05,-6e-06,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.3e-05,1.6e-05,1.1e-05,1e-05,1.7e-05
4,029858_07,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.545871,2.178585,1.545741,0.6424,0.609874,1.825723,4.686034,4.102203,2.479179,1.916396


In [28]:
sub.to_csv('../D_WEATHER/sub/unetx2_ch9_shuffle_ho0.967.csv', index = False)

In [29]:
new_sub = sub.copy()

In [30]:
for i in tqdm.tqdm(range(1,1601)):
    new_sub.loc[new_sub[new_sub.columns[i]]<0, new_sub.columns[i]] = 0

100%|██████████| 1600/1600 [00:01<00:00, 1332.72it/s]


In [31]:
sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.104913,0.111849,0.117341,0.139682,0.154402,0.160311,0.155682,0.15867,0.150576,0.13643,...,0.141868,0.131865,0.137651,0.132617,0.124986,0.129675,0.127915,0.124093,0.134919,0.118554
std,0.709942,0.824579,0.825175,0.958694,1.209006,1.318852,1.250994,1.233046,1.189687,1.164012,...,0.804105,0.679419,0.76583,0.891037,0.956082,0.931358,0.851681,0.787558,0.947178,0.841641
min,-0.003111,-0.006097,-0.008363,-0.0201,-0.009402,-0.008624,-0.011399,-0.019055,-0.011426,-0.018408,...,-0.017196,-0.006861,-0.005564,-0.01001,-0.015025,-0.012802,-0.024759,-0.008956,-0.005438,-0.002782
25%,-3.3e-05,-2e-06,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.3e-05,1.2e-05,1.3e-05,5e-06,1.2e-05
50%,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.2e-05
75%,1.4e-05,1.5e-05,1.5e-05,1.6e-05,1.6e-05,1.5e-05,1.5e-05,1.5e-05,1.5e-05,1.5e-05,...,1.6e-05,1.6e-05,1.7e-05,1.6e-05,1.6e-05,1.6e-05,1.6e-05,3.9e-05,2.1e-05,3.2e-05
max,17.829464,24.760633,24.395767,19.312544,40.705914,44.983555,36.911903,35.060871,38.008114,32.354378,...,20.393888,9.897181,18.444706,22.834747,25.35231,21.755194,22.44739,19.4582,25.406208,25.454906


In [32]:
new_sub.describe()

Unnamed: 0,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,px_10,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
count,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,...,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0,2416.0
mean,0.104942,0.111881,0.117359,0.139719,0.154439,0.16034,0.155714,0.158745,0.15061,0.136497,...,0.141888,0.131883,0.137661,0.132635,0.125008,0.129701,0.127943,0.12412,0.134936,0.118565
std,0.709938,0.824574,0.825172,0.958689,1.209001,1.318849,1.25099,1.233036,1.189683,1.164004,...,0.804101,0.679416,0.765829,0.891035,0.956079,0.931354,0.851677,0.787554,0.947176,0.841639
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.3e-05,1.2e-05,1.3e-05,5e-06,1.2e-05
50%,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.2e-05
75%,1.4e-05,1.5e-05,1.5e-05,1.6e-05,1.6e-05,1.5e-05,1.5e-05,1.5e-05,1.5e-05,1.5e-05,...,1.6e-05,1.6e-05,1.7e-05,1.6e-05,1.6e-05,1.6e-05,1.6e-05,3.9e-05,2.1e-05,3.2e-05
max,17.829464,24.760633,24.395767,19.312544,40.705914,44.983555,36.911903,35.060871,38.008114,32.354378,...,20.393888,9.897181,18.444706,22.834747,25.35231,21.755194,22.44739,19.4582,25.406208,25.454906


In [33]:
new_sub.head()

Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.0,0.0,0.0,6e-05,0.0,0.0,0.003824,0.00885,0.001254,...,1.5e-05,1.5e-05,1.3e-05,1.1e-05,1.3e-05,1.3e-05,1.5e-05,0.0,1e-06,4.6e-05
1,029858_02,0.0,4e-06,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.5e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,2e-05,1.1e-05,5e-06,1.6e-05
2,029858_03,1.4e-05,5.1e-05,0.001456,0.075137,0.07536,0.007247,0.000122,1.5e-05,1.5e-05,...,1.4e-05,1.4e-05,1.4e-05,1.3e-05,1.4e-05,1.3e-05,2.1e-05,1.3e-05,5e-06,1.2e-05
3,029858_05,0.0,0.0,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.3e-05,1.6e-05,1.1e-05,1e-05,1.7e-05
4,029858_07,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,1.4e-05,...,1.545871,2.178585,1.545741,0.6424,0.609874,1.825723,4.686034,4.102203,2.479179,1.916396


In [34]:
new_sub.to_csv('../D_WEATHER/sub/unetx2_ch9_shuffle_ho0.967_postpro.csv', index = False)