# Libraries import 

In [1]:
import random
import os
import math
from typing import List

import pickle
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torchsummary import summary


# Set parameters

In [2]:
CFG = {
    "epochs": 3000,
    "batch_size": 16,
    "lr": 1e-6,
    "generator_hidden_size": [16, 8],
    "discriminator_hidden_size": [8],
    "alpha": 0.9,
}


# Seed hold

In [3]:
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    # torch.backends.cudnn.deterministic = True  # type: ignore # 속도 이슈
    # torch.backends.cudnn.benchmark = False  # type: ignore # 속도 이슈


seed = 42
seed_everything(seed)

# Data load

In [4]:
# load pickle
with open("train_list.pkl", "rb") as f:
    train_list = pickle.load(f)
with open("test_list.pkl", "rb") as f:
    test_list = pickle.load(f)

In [5]:
train_list[0].shape


(432, 7)

In [6]:
# for i in range(len(train_list)):
#     train_list[i] = train_list[i] * 10
#     test_list[i] = test_list[i] * 10


# Dataset class

In [7]:
# set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class CustomDataset(Dataset):
    def __init__(self, dataset):  # 초기 데이터 생성 방법을 지정
        self.dataset = dataset

    def __len__(self):  # 데이터의 전체 길이
        return len(self.dataset)

    def __getitem__(self, idx):
        self.data = self.dataset[idx]
        return torch.tensor(self.data, dtype=torch.float32).to(device)
        # "y":torch.tensor(data, dtype=torch.float32)

# Model

In [8]:
class Generator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Generator, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden_size[0])
        self.fc2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.fc3 = nn.Linear(hidden_size[1], hidden_size[0])
        self.fc4 = nn.Linear(hidden_size[0], output_size)
        self.layer_norm_hidden_0 = nn.LayerNorm(hidden_size[0])
        self.layer_norm_hidden_1 = nn.LayerNorm(hidden_size[1])
        self.rrelu = nn.RReLU()
        self.drop = nn.Dropout(0.0)

        self.apply(self._init_weights)

    def forward(self, x):
        x = self.rrelu(self.fc1(x))
        x = self.drop(x)
        x = self.layer_norm_hidden_0(x)
        x = self.rrelu(self.fc2(x))
        x = self.drop(x)
        x = self.layer_norm_hidden_1(x)
        x = self.rrelu(self.fc3(x))
        x = self.drop(x)
        x = self.layer_norm_hidden_0(x)
        x = self.fc4(x)
        return x

    def _init_weights(self, module):
        for layer in module.modules():
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)


class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Discriminator, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden_size[0])
        self.fc2 = nn.Linear(hidden_size[0], output_size)
        self.layer_norm_hidden_0 = nn.LayerNorm(hidden_size[0])
        self.rrelu = nn.RReLU()
        self.sigmoid = nn.Sigmoid()
        self.drop = nn.Dropout(0.0)

        self.apply(self._init_weights)

    def forward(self, x):
        x = self.rrelu(self.fc1(x))
        x = self.drop(x)
        x = self.layer_norm_hidden_0(x)
        x = self.fc2(x)
        # x = self.sigmoid(x)
        return x

    def _init_weights(self, module):
        for layer in module.modules():
            if isinstance(layer, nn.Linear):
                nn.init.kaiming_normal_(layer.weight)

# Util

## FocalLoss

In [9]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.25):
        super(FocalLoss, self).__init__()
        self.loss_fn = nn.BCEWithLogitsLoss()
        self.gamma = gamma
        self.alpha = alpha
        self.reduction = self.loss_fn.reduction  # mean, sum, etc..

    def forward(self, pred, true):
        bceloss = self.loss_fn(pred, true)

        pred_prob = torch.sigmoid(
            pred
        )  # p  pt는 p가 true 이면 pt = p / false 이면 pt = 1 - p
        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)  # add balance
        modulating_factor = torch.abs(true - pred_prob) ** self.gamma  # focal term
        loss = alpha_factor * modulating_factor * bceloss  # bceloss에 이미 음수가 들어가 있음

        if self.reduction == "mean":
            return loss.mean()

        elif self.reduction == "sum":
            return loss.sum()

        else:  # 'none'
            return loss


## Earlystopping

In [10]:
class EarlyStopping:
    def __init__(
        self, patience=5, delta=0, checkpoint_path="checkpoint.pth", device="cpu"
    ):
        self.patience = patience
        self.delta = delta
        self.checkpoint_path = checkpoint_path
        self.device = device
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_loss, model):
        score = -val_loss
        # score = val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        torch.save(
            {"model_state_dict": model.state_dict(), "val_loss": val_loss},
            self.checkpoint_path,
        )

    def load_checkpoint(self, model):
        checkpoint = torch.load(self.checkpoint_path, map_location=self.device)
        model.load_state_dict(checkpoint["model_state_dict"])
        return checkpoint["val_loss"]

## Cosineannealingwarmrestarts|

In [11]:
class CosineAnnealingWarmUpRestarts(_LRScheduler):
    def __init__(
        self, optimizer, T_0, T_mult=1, eta_max=0.1, T_up=0, gamma=1.0, last_epoch=-1
    ):
        if T_0 <= 0 or not isinstance(T_0, int):
            raise ValueError("Expected positive integer T_0, but got {}".format(T_0))
        if T_mult < 1 or not isinstance(T_mult, int):
            raise ValueError("Expected integer T_mult >= 1, but got {}".format(T_mult))
        if T_up < 0 or not isinstance(T_up, int):
            raise ValueError("Expected positive integer T_up, but got {}".format(T_up))
        self.T_0 = T_0
        self.T_mult = T_mult
        self.base_eta_max = eta_max
        self.eta_max = eta_max
        self.T_up = T_up
        self.T_i = T_0
        self.gamma = gamma
        self.cycle = 0
        self.T_cur = last_epoch
        super(CosineAnnealingWarmUpRestarts, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.T_cur == -1:
            return self.base_lrs
        elif self.T_cur < self.T_up:
            return [
                (self.eta_max - base_lr) * self.T_cur / self.T_up + base_lr
                for base_lr in self.base_lrs
            ]
        else:
            return [
                base_lr
                + (self.eta_max - base_lr)
                * (
                    1
                    + math.cos(
                        math.pi * (self.T_cur - self.T_up) / (self.T_i - self.T_up)
                    )
                )
                / 2
                for base_lr in self.base_lrs
            ]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.T_cur = self.T_cur + 1
            if self.T_cur >= self.T_i:
                self.cycle += 1
                self.T_cur = self.T_cur - self.T_i
                self.T_i = (self.T_i - self.T_up) * self.T_mult + self.T_up
        else:
            if epoch >= self.T_0:
                if self.T_mult == 1:
                    self.T_cur = epoch % self.T_0
                    self.cycle = epoch // self.T_0
                else:
                    n = int(
                        math.log(
                            (epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult
                        )
                    )
                    self.cycle = n
                    self.T_cur = epoch - self.T_0 * (self.T_mult**n - 1) / (
                        self.T_mult - 1
                    )
                    self.T_i = self.T_0 * self.T_mult ** (n)
            else:
                self.T_i = self.T_0
                self.T_cur = epoch

        self.eta_max = self.base_eta_max * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group["lr"] = lr

## Lion

In [12]:
class Lion(Optimizer):
    r"""Implements Lion algorithm."""

    def __init__(
        self,
        params,
        lr=1e-4,
        betas=(0.9, 0.99),
        weight_decay=0.0,
        maximize=False,
        foreach=None,
    ):
        """Initialize the hyperparameters.
        Args:
          params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
          lr (float, optional): learning rate (default: 1e-4)
          betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.99))
          weight_decay (float, optional): weight decay coefficient (default: 0)
        """

        if not 0.0 <= lr:
            raise ValueError("Invalid learning rate: {}".format(lr))
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
        defaults = dict(
            lr=lr,
            betas=betas,
            weight_decay=weight_decay,
            foreach=foreach,
            maximize=maximize,
        )
        super().__init__(params, defaults)

    def __setstate__(self, state):
        super().__setstate__(state)
        for group in self.param_groups:
            group.setdefault("maximize", False)
            group.setdefault("foreach", None)

    @torch.no_grad()
    def step(self, closure=None):
        """Performs a single optimization step.
        Args:
          closure (callable, optional): A closure that reevaluates the model
            and returns the loss.
        Returns:
          the loss.
        """
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            params_with_grad = []
            grads = []
            exp_avgs = []
            beta1, beta2 = group["betas"]

            for p in group["params"]:
                if p.grad is None:
                    continue
                params_with_grad.append(p)
                if p.grad.is_sparse:
                    raise RuntimeError("Lion does not support sparse gradients")
                grads.append(p.grad)

                state = self.state[p]

                # State initialization
                if len(state) == 0:
                    state["exp_avg"] = torch.zeros_like(
                        p, memory_format=torch.preserve_format
                    )

                exp_avgs.append(state["exp_avg"])

            lion(
                params_with_grad,
                grads,
                exp_avgs,
                beta1=beta1,
                beta2=beta2,
                lr=group["lr"],
                weight_decay=group["weight_decay"],
                maximize=group["maximize"],
                foreach=group["foreach"],
            )

        return loss


def lion(
    params: List[torch.Tensor],
    grads: List[torch.Tensor],
    exp_avgs: List[torch.Tensor],
    # kwonly args with defaults are not supported by functions compiled with torchscript issue #70627
    # setting this as kwarg for now as functional API is compiled by torch/distributed/optim
    maximize: bool = False,
    foreach: bool = None,
    *,
    beta1: float,
    beta2: float,
    lr: float,
    weight_decay: float,
):
    r"""Functional API that performs Lion algorithm computation."""
    if foreach is None:
        # Placeholder for more complex foreach logic to be added when value is not set
        foreach = False

    if foreach and torch.jit.is_scripting():
        raise RuntimeError("torch.jit.script not supported with foreach optimizers")

    if foreach and not torch.jit.is_scripting():
        func = _multi_tensor_lion
    else:
        func = _single_tensor_lion

    func(
        params,
        grads,
        exp_avgs,
        beta1=beta1,
        beta2=beta2,
        lr=lr,
        weight_decay=weight_decay,
        maximize=maximize,
    )


def _single_tensor_lion(
    params: List[torch.Tensor],
    grads: List[torch.Tensor],
    exp_avgs: List[torch.Tensor],
    *,
    beta1: float,
    beta2: float,
    lr: float,
    weight_decay: float,
    maximize: bool,
):
    for i, param in enumerate(params):
        grad = grads[i] if not maximize else -grads[i]
        exp_avg = exp_avgs[i]

        if torch.is_complex(param):
            grad = torch.view_as_real(grad)
            exp_avg = torch.view_as_real(exp_avg)
            param = torch.view_as_real(param)

        # Perform stepweight decay
        param.mul_(1 - lr * weight_decay)

        # Weight update
        update = exp_avg.mul(beta1).add_(grad, alpha=1 - beta1)
        param.add_(torch.sign(update), alpha=-lr)

        # Decay the momentum running average coefficient
        exp_avg.lerp_(grad, 1 - beta2)


def _multi_tensor_lion(
    params: List[torch.Tensor],
    grads: List[torch.Tensor],
    exp_avgs: List[torch.Tensor],
    *,
    beta1: float,
    beta2: float,
    lr: float,
    weight_decay: float,
    maximize: bool,
):
    if len(params) == 0:
        return

    if maximize:
        grads = torch._foreach_neg(tuple(grads))  # type: ignore[assignment]

    grads = [torch.view_as_real(x) if torch.is_complex(x) else x for x in grads]
    exp_avgs = [torch.view_as_real(x) if torch.is_complex(x) else x for x in exp_avgs]
    params = [torch.view_as_real(x) if torch.is_complex(x) else x for x in params]

    # Perform stepweight decay
    torch._foreach_mul_(params, 1 - lr * weight_decay)

    # Weight update
    updates = torch._foreach_mul(exp_avgs, beta1)
    torch._foreach_add_(updates, grads, alpha=1 - beta1)

    updates = [u.sign() for u in updates]
    torch._foreach_add_(params, updates, alpha=-lr)

    # Decay the momentum running average coefficient
    torch._foreach_mul_(exp_avgs, beta2)
    torch._foreach_add_(exp_avgs, grads, alpha=1 - beta2)

# Train

## train definition

In [13]:
def worker_init_fn(worker_id):
    np.random.seed(torch.initial_seed() % (2**32 - 1))


def train_gan(
    generator, discriminator, data, epochs=1000, batch_size=32, lr=1e-6, alpha=0.5
):
    # optimizers
    # optimizer_G = Lion(generator.parameters(), lr=lr)
    # optimizer_D = Lion(discriminator.parameters(), lr=lr)
    optimizer_G = torch.optim.Adam(generator.parameters(), lr=lr)
    optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=lr)

    # loss function
    criterion_G = F.mse_loss
    # criterion_D = FocalLoss()
    criterion_D = F.binary_cross_entropy_with_logits

    # early stopping
    early_stopping = EarlyStopping(
        patience=200, checkpoint_path="model_checkpoint.pth", device=device
    )

    # lr scheduler
    scheduler_G = CosineAnnealingWarmUpRestarts(
        optimizer_G, T_0=100, T_mult=2, eta_max=0.01, T_up=10, gamma=0.9
    )
    scheduler_D = CosineAnnealingWarmUpRestarts(
        optimizer_D, T_0=100, T_mult=2, eta_max=0.1, T_up=10, gamma=0.9
    )

    for epoch in range(1, epochs + 1):
        dataset = CustomDataset(data)
        train_size = int(len(dataset) * 0.8)
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

        train_dataloader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            worker_init_fn=worker_init_fn,
            num_workers=0,
        )
        val_dataloader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            shuffle=False,
            worker_init_fn=worker_init_fn,
            num_workers=0,
        )

        # Train
        train_g_loss = 0.0
        train_d_loss = 0.0

        generator.train()
        discriminator.train()

        for inputs in train_dataloader:
            # train discriminator
            optimizer_D.zero_grad()
            batch_size = inputs.size(0)
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            real_outputs = discriminator(inputs)
            real_loss = criterion_D(real_outputs, real_labels)

            fake_inputs = generator(inputs)
            fake_outputs = discriminator(fake_inputs.detach())
            fake_loss = criterion_D(fake_outputs, fake_labels)

            d_loss = real_loss + fake_loss
            d_loss.backward()
            optimizer_D.step()
            train_d_loss += d_loss.item()

            # train generator
            optimizer_G.zero_grad()
            fake_outputs = discriminator(fake_inputs)

            # generator가 생성한 가짜 데이터와 실제 데이터 사이의 차이를 최소화, 생성된 가짜 데이터에 대해 discriminator가 1을 출력한 결과를 실제 레이블과 비교
            g_loss = criterion_G(fake_inputs, inputs) * alpha + criterion_D(
                fake_outputs, real_labels
            ) * (1 - alpha)
            g_loss.backward()
            optimizer_G.step()
            train_g_loss += g_loss.item()

        train_d_loss /= len(train_dataloader)
        train_g_loss /= len(train_dataloader)

        # Validate
        with torch.no_grad():
            val_g_loss = 0.0
            val_d_loss = 0.0

            generator.eval()
            discriminator.eval()

            for inputs in val_dataloader:
                batch_size = inputs.size(0)
                real_labels = torch.ones(batch_size, 1).to(device)
                fake_labels = torch.zeros(batch_size, 1).to(device)

                real_outputs = discriminator(inputs)
                real_loss = criterion_D(real_outputs, real_labels)

                fake_inputs = generator(inputs)
                fake_outputs = discriminator(fake_inputs.detach())
                fake_loss = criterion_D(fake_outputs, fake_labels)

                d_loss = real_loss + fake_loss
                val_d_loss += d_loss.item()

                fake_outputs = discriminator(fake_inputs)
                g_loss = criterion_G(fake_inputs, inputs) * alpha + criterion_D(
                    fake_outputs, real_labels
                ) * (1 - alpha)
                val_g_loss += g_loss.item()

            val_d_loss /= len(val_dataloader)
            val_g_loss /= len(val_dataloader)

        # lr scheduler step
        scheduler_G.step(epoch)
        scheduler_D.step(epoch)

        # early stopping 호출
        early_stopping(val_g_loss, generator)

        if early_stopping.early_stop:
            # load the last checkpoint with the best model
            early_stopping.load_checkpoint(generator)
            print("Early stopping")
            break

        if epoch % 10 == 0:
            print(
                f"Epoch {epoch}/{epochs} | Train D Loss: {train_d_loss:.4f} | Train G Loss: {train_g_loss:.4f} | Val D Loss: {val_d_loss:.4f} | Val G Loss: {val_g_loss:.4f}"
            )

## predict definition

In [14]:
def predict(generator, data):
    prediction = []
    dataset = CustomDataset(data)
    dataloader = DataLoader(
        dataset,
        batch_size=1,
        shuffle=False,
        worker_init_fn=worker_init_fn,
        num_workers=0,
    )

    generator.eval()
    with torch.no_grad():
        for inputs in dataloader:
            output = generator(inputs).float()
            prediction.append(output.numpy())

    return prediction

# check

In [15]:
predicted_labels = []

for i in range(len(train_list)):
    print()
    print(f"{i} 번째 설비 학습 & 예측 시작")

    # generator and discriminator이라는 변수명이 존재하면 삭제
    if "generator" in globals():
        del generator
        print("delete generator completed")
    if "discriminator" in globals():
        del discriminator
        print("delete discriminator completed")

    input_size = train_list[i].shape[1]
    output_size = train_list[i].shape[1]

    generator = Generator(input_size, CFG["generator_hidden_size"], output_size)
    discriminator = Discriminator(output_size, CFG["discriminator_hidden_size"], 1)
    summary(generator, (input_size,))
    generator.to(device)

    # train
    train_gan(
        generator=generator,
        discriminator=discriminator,
        data=train_list[i],
        epochs=CFG["epochs"],
        batch_size=CFG["batch_size"],
        lr=CFG["lr"],
        alpha=CFG["alpha"],
    )

    # set threshold MSE
    threshold = np.max(
        np.mean(
            np.power(
                train_list[i]
                - np.array(predict(generator, CustomDataset(train_list[i]))).squeeze(1),
                2,
            ),
            axis=1,
        )
    )
    # set threshold MAE
    # threshold = np.max(np.mean(np.abs(train_list[i] - np.array(predict(generator, CustomDataset(train_list[i]))).squeeze(1)), axis=1))
    print(f"{i} 번째 설비의 threshold:", f"{threshold}")

    # predict MSE
    predicted_label = np.where(
        np.mean(
            np.power(
                test_list[i]
                - np.array(predict(generator, CustomDataset(test_list[i]))).squeeze(1),
                2,
            ),
            axis=1,
        )
        > threshold,
        1,
        0,
    )
    # predict MAE
    # predicted_label = np.where(np.mean(np.abs(test_list[i] - np.array(predict(generator, CustomDataset(test_list[i]))).squeeze(1)), axis=1) > threshold, 1, 0)
    predicted_labels.append(predicted_label)



0 번째 설비 학습 & 예측 시작
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 16]             128
             RReLU-2                   [-1, 16]               0
           Dropout-3                   [-1, 16]               0
         LayerNorm-4                   [-1, 16]              32
            Linear-5                    [-1, 8]             136
             RReLU-6                    [-1, 8]               0
           Dropout-7                    [-1, 8]               0
         LayerNorm-8                    [-1, 8]              16
            Linear-9                   [-1, 16]             144
            RReLU-10                   [-1, 16]               0
          Dropout-11                   [-1, 16]               0
        LayerNorm-12                   [-1, 16]              32
           Linear-13                    [-1, 7]             119
Total params: 607
T

  return torch.tensor(self.data, dtype=torch.float32).to(device)


Epoch 10/3000 | Train D Loss: 1.5706 | Train G Loss: 0.1362 | Val D Loss: 1.5642 | Val G Loss: 0.1057
Epoch 20/3000 | Train D Loss: 1.3862 | Train G Loss: 0.0850 | Val D Loss: 1.3830 | Val G Loss: 0.0745
Epoch 30/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0762 | Val D Loss: 1.3870 | Val G Loss: 0.0758
Epoch 40/3000 | Train D Loss: 1.3864 | Train G Loss: 0.0780 | Val D Loss: 1.3863 | Val G Loss: 0.0781
Epoch 50/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0734 | Val D Loss: 1.3863 | Val G Loss: 0.0728
Epoch 60/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0714 | Val D Loss: 1.3863 | Val G Loss: 0.0703
Epoch 70/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0705 | Val D Loss: 1.3863 | Val G Loss: 0.0698
Epoch 80/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0700 | Val D Loss: 1.3863 | Val G Loss: 0.0699
Epoch 90/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0699 | Val D Loss: 1.3863 | Val G Loss: 0.0696
Epoch 100/3000 | Train D Loss: 1.3863 | Train G Loss: 0.0698 | Val D Loss: 1.3863 

# Submission

In [16]:
from itertools import chain

subminssion = pd.read_csv("./dataset/answer_sample.csv")
subminssion["label"] = list(chain(*predicted_labels))
subminssion.to_csv("answer.csv", index=False)
print(subminssion["label"].value_counts())
display(subminssion)

0    7032
1     357
Name: label, dtype: int64


Unnamed: 0,type,label
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
7384,7,0
7385,7,0
7386,7,0
7387,7,0


In [17]:
subminssion_sota = pd.read_csv(
    "sota/robust_8_4_RReLU_layernorm_henormal_lion_lrsche_etamax001_batch16_95점.csv"
)
display(subminssion_sota[subminssion_sota["label"] == 1])
len(
    [
        i
        for i in subminssion_sota[subminssion_sota["label"] == 1].index
        if i in subminssion[subminssion["label"] == 1].index
    ]
)

Unnamed: 0,type,label
585,0,1
586,0,1
587,0,1
588,0,1
589,0,1
...,...,...
7342,7,1
7348,7,1
7355,7,1
7359,7,1


340