In [1]:
# PlantXMamba/mamba_block/pscan.py
import math

import torch
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os
from tqdm import tqdm
import torchvision.models as models
from torchvision.models import VGG16_Weights


def npo2(len):
    """
    Returns the next power of 2 above len
    """

    return 2 ** math.ceil(math.log2(len))


def pad_npo2(X):
    """
    Pads input length dim to the next power of 2

    Args:
        X : (B, L, D, N)

    Returns:
        Y : (B, npo2(L), D, N)
    """

    len_npo2 = npo2(X.size(1))
    pad_tuple = (0, 0, 0, 0, 0, len_npo2 - X.size(1))
    return F.pad(X, pad_tuple, "constant", 0)


class PScan(torch.autograd.Function):
    @staticmethod
    def pscan(A, X):
        # A : (B, D, L, N)
        # X : (B, D, L, N)

        # modifies X in place by doing a parallel scan.
        # more formally, X will be populated by these values :
        # H[t] = A[t] * H[t-1] + X[t] with H[0] = 0
        # which are computed in parallel (2*log2(T) sequential steps (ideally), instead of T sequential steps)

        # only supports L that is a power of two (mainly for a clearer code)

        B, D, L, _ = A.size()
        num_steps = int(math.log2(L))

        # up sweep (last 2 steps unfolded)
        Aa = A
        Xa = X
        for _ in range(num_steps - 2):
            T = Xa.size(2)
            Aa = Aa.view(B, D, T // 2, 2, -1)
            Xa = Xa.view(B, D, T // 2, 2, -1)

            Xa[:, :, :, 1].add_(Aa[:, :, :, 1].mul(Xa[:, :, :, 0]))
            Aa[:, :, :, 1].mul_(Aa[:, :, :, 0])

            Aa = Aa[:, :, :, 1]
            Xa = Xa[:, :, :, 1]

        # we have only 4, 2 or 1 nodes left
        if Xa.size(2) == 4:
            Xa[:, :, 1].add_(Aa[:, :, 1].mul(Xa[:, :, 0]))
            Aa[:, :, 1].mul_(Aa[:, :, 0])

            Xa[:, :, 3].add_(
                Aa[:, :, 3].mul(Xa[:, :, 2] + Aa[:, :, 2].mul(Xa[:, :, 1]))
            )
        elif Xa.size(2) == 2:
            Xa[:, :, 1].add_(Aa[:, :, 1].mul(Xa[:, :, 0]))
            return
        else:
            return

        # down sweep (first 2 steps unfolded)
        Aa = A[:, :, 2 ** (num_steps - 2) - 1 : L : 2 ** (num_steps - 2)]
        Xa = X[:, :, 2 ** (num_steps - 2) - 1 : L : 2 ** (num_steps - 2)]
        Xa[:, :, 2].add_(Aa[:, :, 2].mul(Xa[:, :, 1]))
        Aa[:, :, 2].mul_(Aa[:, :, 1])

        for k in range(num_steps - 3, -1, -1):
            Aa = A[:, :, 2**k - 1 : L : 2**k]
            Xa = X[:, :, 2**k - 1 : L : 2**k]

            T = Xa.size(2)
            Aa = Aa.view(B, D, T // 2, 2, -1)
            Xa = Xa.view(B, D, T // 2, 2, -1)

            Xa[:, :, 1:, 0].add_(Aa[:, :, 1:, 0].mul(Xa[:, :, :-1, 1]))
            Aa[:, :, 1:, 0].mul_(Aa[:, :, :-1, 1])

    @staticmethod
    def pscan_rev(A, X):
        # A : (B, D, L, N)
        # X : (B, D, L, N)

        # the same function as above, but in reverse
        # (if you flip the input, call pscan, then flip the output, you get what this function outputs)
        # it is used in the backward pass

        # only supports L that is a power of two (mainly for a clearer code)

        B, D, L, _ = A.size()
        num_steps = int(math.log2(L))

        # up sweep (last 2 steps unfolded)
        Aa = A
        Xa = X
        for _ in range(num_steps - 2):
            T = Xa.size(2)
            Aa = Aa.view(B, D, T // 2, 2, -1)
            Xa = Xa.view(B, D, T // 2, 2, -1)

            Xa[:, :, :, 0].add_(Aa[:, :, :, 0].mul(Xa[:, :, :, 1]))
            Aa[:, :, :, 0].mul_(Aa[:, :, :, 1])

            Aa = Aa[:, :, :, 0]
            Xa = Xa[:, :, :, 0]

        # we have only 4, 2 or 1 nodes left
        if Xa.size(2) == 4:
            Xa[:, :, 2].add_(Aa[:, :, 2].mul(Xa[:, :, 3]))
            Aa[:, :, 2].mul_(Aa[:, :, 3])

            Xa[:, :, 0].add_(
                Aa[:, :, 0].mul(Xa[:, :, 1].add(Aa[:, :, 1].mul(Xa[:, :, 2])))
            )
        elif Xa.size(2) == 2:
            Xa[:, :, 0].add_(Aa[:, :, 0].mul(Xa[:, :, 1]))
            return
        else:
            return

        # down sweep (first 2 steps unfolded)
        Aa = A[:, :, 0 : L : 2 ** (num_steps - 2)]
        Xa = X[:, :, 0 : L : 2 ** (num_steps - 2)]
        Xa[:, :, 1].add_(Aa[:, :, 1].mul(Xa[:, :, 2]))
        Aa[:, :, 1].mul_(Aa[:, :, 2])

        for k in range(num_steps - 3, -1, -1):
            Aa = A[:, :, 0 : L : 2**k]
            Xa = X[:, :, 0 : L : 2**k]

            T = Xa.size(2)
            Aa = Aa.view(B, D, T // 2, 2, -1)
            Xa = Xa.view(B, D, T // 2, 2, -1)

            Xa[:, :, :-1, 1].add_(Aa[:, :, :-1, 1].mul(Xa[:, :, 1:, 0]))
            Aa[:, :, :-1, 1].mul_(Aa[:, :, 1:, 0])

    @staticmethod
    def forward(ctx, A_in, X_in):
        """
        Applies the parallel scan operation, as defined above. Returns a new tensor.
        If you can, privilege sequence lengths that are powers of two.

        Args:
            A_in : (B, L, D, N)
            X_in : (B, L, D, N)

        Returns:
            H : (B, L, D, N)
        """

        L = X_in.size(1)

        # cloning is requiered because of the in-place ops
        if L == npo2(L):
            A = A_in.clone()
            X = X_in.clone()
        else:
            # pad tensors (and clone btw)
            A = pad_npo2(A_in)  # (B, npo2(L), D, N)
            X = pad_npo2(X_in)  # (B, npo2(L), D, N)

        # prepare tensors
        A = A.transpose(2, 1)  # (B, D, npo2(L), N)
        X = X.transpose(2, 1)  # (B, D, npo2(L), N)

        # parallel scan (modifies X in-place)
        PScan.pscan(A, X)

        ctx.save_for_backward(A_in, X)

        # slice [:, :L] (cut if there was padding)
        return X.transpose(2, 1)[:, :L]

    @staticmethod
    def backward(ctx, grad_output_in):
        """
        Flows the gradient from the output to the input. Returns two new tensors.

        Args:
            ctx : A_in : (B, L, D, N), X : (B, D, L, N)
            grad_output_in : (B, L, D, N)

        Returns:
            gradA : (B, L, D, N), gradX : (B, L, D, N)
        """

        A_in, X = ctx.saved_tensors

        L = grad_output_in.size(1)

        # cloning is requiered because of the in-place ops
        if L == npo2(L):
            grad_output = grad_output_in.clone()
            # the next padding will clone A_in
        else:
            grad_output = pad_npo2(grad_output_in)  # (B, npo2(L), D, N)
            A_in = pad_npo2(A_in)  # (B, npo2(L), D, N)

        # prepare tensors
        grad_output = grad_output.transpose(2, 1)
        A_in = A_in.transpose(2, 1)  # (B, D, npo2(L), N)
        A = torch.nn.functional.pad(
            A_in[:, :, 1:], (0, 0, 0, 1)
        )  # (B, D, npo2(L), N) shift 1 to the left (see hand derivation)

        # reverse parallel scan (modifies grad_output in-place)
        PScan.pscan_rev(A, grad_output)

        Q = torch.zeros_like(X)
        Q[:, :, 1:].add_(X[:, :, :-1] * grad_output[:, :, 1:])

        return Q.transpose(2, 1)[:, :L], grad_output.transpose(2, 1)[:, :L]


pscan = PScan.apply


In [2]:
# PlantXMamba/mamba_block/backbone.py
import math
from dataclasses import dataclass
from typing import Union

@dataclass
class MambaConfig:
    d_model: int  # D
    n_layers: int
    dt_rank: Union[int, str] = "auto"
    d_state: int = 16  # N in paper/comments
    expand_factor: int = 2  # E in paper/comments
    d_conv: int = 4

    dt_min: float = 0.001
    dt_max: float = 0.1
    dt_init: str = "random"  # "random" or "constant"
    dt_scale: float = 1.0
    dt_init_floor = 1e-4

    rms_norm_eps: float = 1e-5
    base_std: float = 0.02

    dropout: float = 0.1

    bias: bool = False
    conv_bias: bool = True
    inner_layernorms: bool = False  # apply layernorms to internal activations

    mup: bool = False
    mup_base_width: float = 128  # width=d_model

    pscan: bool = True  # use parallel scan mode or sequential mode when training
    use_cuda: bool = False  # use official CUDA implementation when training (not compatible with (b)float16)

    def __post_init__(self):
        self.d_inner = self.expand_factor * self.d_model  # E*D = ED in comments

        if self.dt_rank == "auto":
            self.dt_rank = math.ceil(self.d_model / 16)

        # muP
        if self.mup:
            self.mup_width_mult = self.d_model / self.mup_base_width


class Mamba(nn.Module):
    def __init__(self, config: MambaConfig):
        super().__init__()

        self.config = config

        self.layers = nn.ModuleList(
            [ResidualBlock(config) for _ in range(config.n_layers)]
        )

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)

        return x

    def step(self, x, caches):
        for i, layer in enumerate(self.layers):
            x, caches[i] = layer.step(x, caches[i])

        return x, caches


class ResidualBlock(nn.Module):
    def __init__(self, config: MambaConfig):
        super().__init__()

        self.mixer = MambaBlock(config)
        self.norm = RMSNorm(config.d_model, config.rms_norm_eps, config.mup)

    def forward(self, x):

        output = self.mixer(self.norm(x)) + x
        return output

    def step(self, x, cache):
        output, cache = self.mixer.step(self.norm(x), cache)
        output = output + x
        return output, cache


class MambaBlock(nn.Module):
    def __init__(self, config: MambaConfig):
        super().__init__()

        self.config = config

        # projects block input from D to 2*ED (two branches)
        self.in_proj = nn.Linear(config.d_model, 2 * config.d_inner, bias=config.bias)

        self.conv1d = nn.Conv1d(
            in_channels=config.d_inner,
            out_channels=config.d_inner,
            kernel_size=config.d_conv,
            bias=config.conv_bias,
            groups=config.d_inner,
            padding=config.d_conv - 1,
        )

        # projects x to input-dependent delta, B, C
        self.x_proj = nn.Linear(
            config.d_inner, config.dt_rank + 2 * config.d_state, bias=False
        )

        # projects delta from dt_rank to d_inner
        self.dt_proj = nn.Linear(config.dt_rank, config.d_inner, bias=True)

        # dt initialization
        # dt weights
        dt_init_std = config.dt_rank**-0.5 * config.dt_scale
        if config.dt_init == "constant":
            nn.init.constant_(self.dt_proj.weight, dt_init_std)
        elif config.dt_init == "random":
            nn.init.uniform_(self.dt_proj.weight, -dt_init_std, dt_init_std)
        else:
            raise NotImplementedError

        # delta bias
        dt = torch.exp(
            torch.rand(config.d_inner)
            * (math.log(config.dt_max) - math.log(config.dt_min))
            + math.log(config.dt_min)
        ).clamp(min=config.dt_init_floor)
        inv_dt = dt + torch.log(
            -torch.expm1(-dt)
        )  # inverse of softplus: https://github.com/pytorch/pytorch/issues/72759
        with torch.no_grad():
            self.dt_proj.bias.copy_(inv_dt)

        # S4D real initialization
        A = torch.arange(1, config.d_state + 1, dtype=torch.float32).repeat(
            config.d_inner, 1
        )
        self.A_log = nn.Parameter(
            torch.log(A)
        )  # why store A in log ? to keep A < 0 (cf -torch.exp(...)) ? for gradient stability ?
        self.A_log._no_weight_decay = True

        self.D = nn.Parameter(torch.ones(config.d_inner))
        self.D._no_weight_decay = True

        # projects block output from ED back to D
        self.out_proj = nn.Linear(config.d_inner, config.d_model, bias=config.bias)

        # used in jamba
        if self.config.inner_layernorms:
            self.dt_layernorm = RMSNorm(
                self.config.dt_rank, config.rms_norm_eps, config.mup
            )
            self.B_layernorm = RMSNorm(
                self.config.d_state, config.rms_norm_eps, config.mup
            )
            self.C_layernorm = RMSNorm(
                self.config.d_state, config.rms_norm_eps, config.mup
            )
        else:
            self.dt_layernorm = None
            self.B_layernorm = None
            self.C_layernorm = None

        if self.config.use_cuda:
            try:
                from mamba_ssm.ops.selective_scan_interface import selective_scan_fn

                self.selective_scan_cuda = selective_scan_fn
            except ImportError:
                print("Failed to import mamba_ssm. Falling back to mamba.py.")
                self.config.use_cuda = False

    def _apply_layernorms(self, dt, B, C):
        if self.dt_layernorm is not None:
            dt = self.dt_layernorm(dt)
        if self.B_layernorm is not None:
            B = self.B_layernorm(B)
        if self.C_layernorm is not None:
            C = self.C_layernorm(C)
        return dt, B, C

    def forward(self, x):

        _, L, _ = x.shape

        xz = self.in_proj(x)  # (B, L, 2*ED)
        x, z = xz.chunk(2, dim=-1)  # (B, L, ED), (B, L, ED)

        # x branch
        x = x.transpose(1, 2)  # (B, ED, L)
        x = self.conv1d(x)[
            :, :, :L
        ]  # depthwise convolution over time, with a short filter
        x = x.transpose(1, 2)  # (B, L, ED)

        x = F.silu(x)
        y = self.ssm(x, z)

        if self.config.use_cuda:
            output = self.out_proj(y)  # (B, L, D)
            return output  # the rest of the operations are done in the ssm function (fused with the CUDA pscan)

        # z branch
        z = F.silu(z)

        output = y * z
        output = self.out_proj(output)  # (B, L, D)

        return output

    def ssm(self, x, z):

        A = -torch.exp(self.A_log.float())  # (ED, N)
        D = self.D.float()

        deltaBC = self.x_proj(x)  # (B, L, dt_rank+2*N)
        delta, B, C = torch.split(
            deltaBC,
            [self.config.dt_rank, self.config.d_state, self.config.d_state],
            dim=-1,
        )  # (B, L, dt_rank), (B, L, N), (B, L, N)
        delta, B, C = self._apply_layernorms(delta, B, C)
        delta = self.dt_proj.weight @ delta.transpose(
            1, 2
        )  # (ED, dt_rank) @ (B, L, dt_rank) -> (B, ED, L)

        if self.config.use_cuda:
            # these are unfortunately needed for the selective_scan_cuda function
            x = x.transpose(1, 2)
            B = B.transpose(1, 2)
            C = C.transpose(1, 2)
            z = z.transpose(1, 2)

            # "softplus" + "bias" + "y * silu(z)" operations are fused
            y = self.selective_scan_cuda(
                x,
                delta,
                A,
                B,
                C,
                D,
                z=z,
                delta_softplus=True,
                delta_bias=self.dt_proj.bias.float(),
            )
            y = y.transpose(1, 2)  # (B, L, ED)

        else:
            delta = delta.transpose(1, 2)
            delta = F.softplus(delta + self.dt_proj.bias)

            if self.config.pscan:
                y = self.selective_scan(x, delta, A, B, C, D)
            else:
                y = self.selective_scan_seq(x, delta, A, B, C, D)

        return y

    def selective_scan(self, x, delta, A, B, C, D):

        deltaA = torch.exp(delta.unsqueeze(-1) * A)  # (B, L, ED, N)
        deltaB = delta.unsqueeze(-1) * B.unsqueeze(2)  # (B, L, ED, N)

        BX = deltaB * (x.unsqueeze(-1))  # (B, L, ED, N)

        hs = pscan(deltaA, BX)

        y = (hs @ C.unsqueeze(-1)).squeeze(
            3
        )  # (B, L, ED, N) @ (B, L, N, 1) -> (B, L, ED, 1)

        y = y + D * x

        return y

    def selective_scan_seq(self, x, delta, A, B, C, D):
        _, L, _ = x.shape

        deltaA = torch.exp(delta.unsqueeze(-1) * A)  # (B, L, ED, N)
        deltaB = delta.unsqueeze(-1) * B.unsqueeze(2)  # (B, L, ED, N)

        BX = deltaB * (x.unsqueeze(-1))  # (B, L, ED, N)

        h = torch.zeros(
            x.size(0), self.config.d_inner, self.config.d_state, device=deltaA.device
        )  # (B, ED, N)
        hs = []

        for t in range(0, L):
            h = deltaA[:, t] * h + BX[:, t]
            hs.append(h)

        hs = torch.stack(hs, dim=1)  # (B, L, ED, N)

        y = (hs @ C.unsqueeze(-1)).squeeze(
            3
        )  # (B, L, ED, N) @ (B, L, N, 1) -> (B, L, ED, 1)

        y = y + D * x

        return y

    # -------------------------- inference -------------------------- #

    def step(self, x, cache):
        # x : (B, D)
        # cache : (h, inputs)
        # h : (B, ED, N)
        # inputs : (B, ED, d_conv-1)

        # y : (B, D)
        # cache : (h, inputs)

        h, inputs = cache

        xz = self.in_proj(x)  # (B, 2*ED)
        x, z = xz.chunk(2, dim=1)  # (B, ED), (B, ED)

        # x branch
        x_cache = x.unsqueeze(2)
        x = self.conv1d(torch.cat([inputs, x_cache], dim=2))[
            :, :, self.config.d_conv - 1
        ]  # (B, ED)

        x = F.silu(x)
        y, h = self.ssm_step(x, h)

        # z branch
        z = F.silu(z)

        output = y * z
        output = self.out_proj(output)  # (B, D)

        # prepare cache for next call
        inputs = torch.cat([inputs[:, :, 1:], x_cache], dim=2)  # (B, ED, d_conv-1)
        cache = (h, inputs)

        return output, cache

    def ssm_step(self, x, h):
        # x : (B, ED)
        # h : (B, ED, N)

        # y : (B, ED)
        # h : (B, ED, N)

        A = -torch.exp(
            self.A_log.float()
        )  # (ED, N) # todo : ne pas le faire tout le temps, puisque c'est indépendant de la timestep
        D = self.D.float()

        deltaBC = self.x_proj(x)  # (B, dt_rank+2*N)

        delta, B, C = torch.split(
            deltaBC,
            [self.config.dt_rank, self.config.d_state, self.config.d_state],
            dim=-1,
        )  # (B, dt_rank), (B, N), (B, N)
        delta, B, C = self._apply_layernorms(delta, B, C)
        delta = F.softplus(self.dt_proj(delta))  # (B, ED)

        deltaA = torch.exp(delta.unsqueeze(-1) * A)  # (B, ED, N)
        deltaB = delta.unsqueeze(-1) * B.unsqueeze(1)  # (B, ED, N)

        BX = deltaB * (x.unsqueeze(-1))  # (B, ED, N)

        if h is None:
            h = torch.zeros(
                x.size(0),
                self.config.d_inner,
                self.config.d_state,
                device=deltaA.device,
            )  # (B, ED, N)

        h = deltaA * h + BX  # (B, ED, N)

        y = (h @ C.unsqueeze(-1)).squeeze(2)  # (B, ED, N) @ (B, N, 1) -> (B, ED, 1)

        y = y + D * x

        return y, h


class RMSNorm(nn.Module):
    def __init__(self, d_model: int, eps: float = 1e-5, use_mup: bool = False):
        super().__init__()

        self.use_mup = use_mup
        self.eps = eps

        # https://arxiv.org/abs/2404.05728, RMSNorm gains prevents muTransfer (section 4.2.3)
        if not use_mup:
            self.weight = nn.Parameter(torch.ones(d_model))

    def forward(self, x):
        output = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)

        if not self.use_mup:
            return output * self.weight
        else:
            return output


In [3]:
# PlantXMamba/mamba_block/head.py
import torch
import torch.nn as nn
import torch.nn.functional as F


class MambaHead(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.0):
        super().__init__()
        self.norm = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.norm(x)
        x = self.dropout(x)
        return x  # (batch_size, seq_len, d_model)


In [4]:
# PlantXMamba/mamba_block/model.py
from typing import Optional

import torch
import torch.nn as nn
import torch.nn.functional as F

class MambaModule(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.d_model = self.args.d_model
        self.n_layers = self.args.n_layers

        config = MambaConfig(d_model=self.d_model, n_layers=self.n_layers,
                           d_state=self.args.d_state, d_conv=self.args.d_conv,
                           expand_factor=self.args.expand,dropout=self.args.dropout)
        self.backbone = Mamba(config)
        self.head = MambaHead(d_model=self.d_model, dropout=self.args.dropout)

    def forward(self, x):
        sequence_output = self.backbone(x)  # (batch_size, seq_len, d_model)
        output = self.head(sequence_output)  # (batch_size, seq_len, d_model)
        return output

In [5]:
!git clone https://github.com/sakanaowo/PlantXViT

Cloning into 'PlantXViT'...
remote: Enumerating objects: 104825, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 104825 (delta 6), reused 20 (delta 4), pack-reused 104802 (from 1)[K
Receiving objects: 100% (104825/104825), 2.45 GiB | 54.30 MiB/s, done.
Resolving deltas: 100% (30447/30447), done.
Updating files: 100% (104353/104353), done.


In [6]:
from PlantXViT.utils.config_loader import load_config
config=load_config('PlantXViT/configs/config.yaml')

In [7]:
import pandas as pd
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import pickle

In [9]:
csv_path='./PlantXViT/data/raw/plant-pathology-2020-fgvc7/train.csv'
label_encoder_path='./PlantXViT/data/processed/apple_label_encoder.pkl'
print(label_encoder_path)

./PlantXViT/data/processed/apple_label_encoder.pkl


In [10]:
df = pd.read_csv(csv_path)
df['label'] = df[['healthy', 'multiple_diseases', 'rust', 'scab']].idxmax(axis=1)

# encode label
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# split train/val
train_df, val_df = train_test_split(df, test_size=0.2,
                                    stratify=df['label'],
                                    random_state=42)

# Encode labels
label_encoder = LabelEncoder()
train_df['label_idx'] = label_encoder.fit_transform(train_df['label'])
val_df['label_idx'] = label_encoder.transform(val_df['label'])

image_dir = "./PlantXViT/data/raw/plant-pathology-2020-fgvc7/images"
train_df = pd.read_csv("./PlantXViT/data/processed/apple/apple_train.csv")
val_df = pd.read_csv("./PlantXViT/data/processed/apple/apple_val.csv")

In [11]:
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

class AppleDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform):
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_dir, row['image_id'] + ".jpg")
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(row['label_idx'])
        return image, label

In [12]:
from torch.utils.data import DataLoader

batch_size = config["training"]["batch_size"]

train_dataset = AppleDataset(train_df, img_dir, transform)
val_dataset = AppleDataset(val_df, img_dir, transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Lớp InceptionBlock (điều chỉnh từ mã gốc)
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_channels=512):
        super().__init__()
        # Nhánh 1: 1x1
        self.branch1x1 = nn.Sequential(
            nn.Conv2d(in_channels, 128, kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm2d(128)
        )

        # Nhánh 2: 1x1 -> 3x1 + 1x3
        self.branch3x3 = nn.Sequential(
            nn.Conv2d(in_channels, 96, kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm2d(96),
            nn.Conv2d(96, 128, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Conv2d(128, 128, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(128)
        )

        # Nhánh 3: 1x1 -> 3x1 + 1x3 -> 3x1 + 1x3
        self.branch5x5 = nn.Sequential(
            nn.Conv2d(in_channels, 64, kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Conv2d(64, 96, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(96),
            nn.Conv2d(96, 96, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(96),
            nn.Conv2d(96, 192, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(192),
            nn.Conv2d(192, 192, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(192)
        )

        # Nhánh 4: MaxPool -> 1x1
        self.branch_pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
            nn.Conv2d(in_channels, 64, kernel_size=1),
            nn.ReLU(),
            nn.BatchNorm2d(64)
        )

        # Tầng cuối để điều chỉnh số kênh đầu ra
        self.adjust_channels = nn.Conv2d(128 + 128 + 192 + 64, out_channels, kernel_size=1)

    def forward(self, x):
        b1 = self.branch1x1(x)
        b2 = self.branch3x3(x)
        b3 = self.branch5x5(x)
        b4 = self.branch_pool(x)
        out = torch.cat([b1, b2, b3, b4], dim=1)
        out = self.adjust_channels(out)
        return out

# Lớp PatchEmbedding (giữ nguyên)
class PatchEmbedding(nn.Module):
    def __init__(self, in_channels, patch_size=5, emb_size=16):
        super().__init__()
        self.patch_size = patch_size
        self.emb_size = emb_size
        self.proj = nn.Linear(in_channels * patch_size * patch_size, emb_size)

    def forward(self, x):
        B, C, H, W = x.shape
        x = x.unfold(2, self.patch_size, self.patch_size).unfold(3, self.patch_size, self.patch_size)
        x = x.permute(0, 2, 3, 1, 4, 5).contiguous()
        x = x.view(B, -1, C * self.patch_size * self.patch_size)
        return self.proj(x)

# Lớp MambaModule (giữ nguyên)
class MambaModule(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.d_model = self.args.d_model
        self.n_layers = self.args.n_layers
        config = MambaConfig(d_model=self.d_model, n_layers=self.n_layers,
                            d_state=self.args.d_state, d_conv=self.args.d_conv,
                            expand_factor=self.args.expand, dropout=self.args.dropout)
        self.backbone = Mamba(config)
        self.head = MambaHead(d_model=self.d_model, dropout=self.args.dropout)

    def forward(self, x):
        sequence_output = self.backbone(x)
        output = self.head(sequence_output)
        return output

# Lớp MambaHead (giữ nguyên)
class MambaHead(nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.0):
        super().__init__()
        self.norm = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.norm(x)
        x = self.dropout(x)
        return x

# Mô hình PlantXMamba chỉ dùng Inception
class InceptionPlantXMamba(nn.Module):
    def __init__(self, num_classes=93, patch_size=5, emb_size=16, d_state=64, d_conv=64, expand=4, n_layers=2, num_blocks=4, dropout=0.1):
        super().__init__()

        # Inception Blocks với MaxPooling
        self.inception = nn.Sequential(
            InceptionBlock(in_channels=3, out_channels=256),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 224x224 -> 112x112
            InceptionBlock(in_channels=256, out_channels=512),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 112x112 -> 56x56
        )

        # Patch Embedding
        self.patch_embed = PatchEmbedding(in_channels=512, patch_size=patch_size, emb_size=emb_size)

        # Mamba blocks
        mamba_args = type('Args', (), {
            'd_model': emb_size,
            'd_state': d_state,
            'd_conv': d_conv,
            'expand': expand,
            'n_layers': n_layers,
            'dropout': dropout
        })()
        self.mamba = nn.Sequential(*[MambaModule(mamba_args) for _ in range(num_blocks)])

        # Classification head
        self.norm = nn.LayerNorm(emb_size)
        self.global_pool = nn.AdaptiveAvgPool1d(1)
        self.classifier = nn.Linear(emb_size, num_classes)

    def forward(self, x):
        x = self.inception(x)  # (B, 512, 56, 56)
        x = self.patch_embed(x)  # (B, 121, 16)
        x = self.mamba(x)  # (B, 121, 16)
        x = self.norm(x)  # (B, 121, 16)
        x = x.permute(0, 2, 1)  # (B, 16, 121)
        x = self.global_pool(x).squeeze(-1)  # (B, 16)
        return self.classifier(x)  # (B, num_classes)

In [18]:
%cd PlantXViT

/content/PlantXViT


In [19]:
import torch
import torch.optim as optim
from tqdm import tqdm

DEVICE=torch.device('cuda')

model = InceptionPlantXMamba(num_classes=4).to(DEVICE)  # Hoặc InceptionPlantXMamba
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)

def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for inputs, labels in tqdm(loader, desc="Training"):
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc

def evaluate(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Evaluating"):
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    avg_loss = running_loss / total
    acc = correct / total
    return avg_loss, acc

# Vòng lặp huấn luyện
EPOCHS = 50
MODEL_PATH = "./outputs/apple/models/InceptionMamba_apple_286.pth"
best_val_acc = 0
patience, wait = 5, 0
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    print(f"Train Loss: {train_loss:.4f} | Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} | Acc: {val_acc:.4f}")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), MODEL_PATH)
        print(f"✅ Saved best model to {MODEL_PATH}")
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break


Epoch 1/50


Training: 100%|██████████| 91/91 [01:05<00:00,  1.39it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.74it/s]


Train Loss: 1.2084 | Acc: 0.4224
Val   Loss: 1.1502 | Acc: 0.4548
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 2/50


Training: 100%|██████████| 91/91 [01:07<00:00,  1.35it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.61it/s]


Train Loss: 1.1433 | Acc: 0.4725
Val   Loss: 1.0677 | Acc: 0.5342
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 3/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.61it/s]


Train Loss: 1.1126 | Acc: 0.5062
Val   Loss: 1.0695 | Acc: 0.5342

Epoch 4/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.63it/s]


Train Loss: 1.0768 | Acc: 0.5460
Val   Loss: 1.0215 | Acc: 0.5644
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 5/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.62it/s]


Train Loss: 1.0388 | Acc: 0.5831
Val   Loss: 0.9527 | Acc: 0.6466
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 6/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.33it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.63it/s]


Train Loss: 1.0053 | Acc: 0.5975
Val   Loss: 0.9200 | Acc: 0.6466

Epoch 7/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.61it/s]


Train Loss: 0.9448 | Acc: 0.6209
Val   Loss: 0.8977 | Acc: 0.6493
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 8/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.33it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.61it/s]


Train Loss: 0.8974 | Acc: 0.6587
Val   Loss: 0.9555 | Acc: 0.6082

Epoch 9/50


Training: 100%|██████████| 91/91 [01:07<00:00,  1.35it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.65it/s]


Train Loss: 0.8522 | Acc: 0.6731
Val   Loss: 0.7848 | Acc: 0.7151
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 10/50


Training: 100%|██████████| 91/91 [01:06<00:00,  1.37it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.68it/s]


Train Loss: 0.7986 | Acc: 0.7115
Val   Loss: 0.8027 | Acc: 0.7151

Epoch 11/50


Training: 100%|██████████| 91/91 [01:06<00:00,  1.36it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.65it/s]


Train Loss: 0.7570 | Acc: 0.7301
Val   Loss: 0.7390 | Acc: 0.7452
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 12/50


Training: 100%|██████████| 91/91 [01:06<00:00,  1.37it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.66it/s]


Train Loss: 0.7139 | Acc: 0.7603
Val   Loss: 0.6826 | Acc: 0.7781
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 13/50


Training: 100%|██████████| 91/91 [01:05<00:00,  1.40it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.66it/s]


Train Loss: 0.6718 | Acc: 0.7802
Val   Loss: 0.6274 | Acc: 0.8110
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 14/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.33it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.64it/s]


Train Loss: 0.6247 | Acc: 0.7988
Val   Loss: 0.5701 | Acc: 0.8247
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 15/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.34it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.73it/s]


Train Loss: 0.5648 | Acc: 0.8413
Val   Loss: 0.6007 | Acc: 0.8082

Epoch 16/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.73it/s]


Train Loss: 0.5524 | Acc: 0.8310
Val   Loss: 0.6289 | Acc: 0.7890

Epoch 17/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.61it/s]


Train Loss: 0.5436 | Acc: 0.8331
Val   Loss: 0.5873 | Acc: 0.8027

Epoch 18/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.33it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.62it/s]


Train Loss: 0.4958 | Acc: 0.8537
Val   Loss: 0.5258 | Acc: 0.8411
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 19/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.33it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.66it/s]


Train Loss: 0.4830 | Acc: 0.8647
Val   Loss: 0.6342 | Acc: 0.7753

Epoch 20/50


Training: 100%|██████████| 91/91 [01:07<00:00,  1.36it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.67it/s]


Train Loss: 0.4475 | Acc: 0.8784
Val   Loss: 0.5613 | Acc: 0.8192

Epoch 21/50


Training: 100%|██████████| 91/91 [01:06<00:00,  1.36it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.67it/s]


Train Loss: 0.4256 | Acc: 0.8832
Val   Loss: 0.4757 | Acc: 0.8630
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 22/50


Training: 100%|██████████| 91/91 [01:07<00:00,  1.36it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.60it/s]


Train Loss: 0.4319 | Acc: 0.8757
Val   Loss: 0.4779 | Acc: 0.8411

Epoch 23/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.60it/s]


Train Loss: 0.3968 | Acc: 0.8832
Val   Loss: 0.4592 | Acc: 0.8548

Epoch 24/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.32it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.62it/s]


Train Loss: 0.4104 | Acc: 0.8723
Val   Loss: 0.4517 | Acc: 0.8658
✅ Saved best model to ./outputs/apple/models/InceptionMamba_apple_286.pth

Epoch 25/50


Training: 100%|██████████| 91/91 [01:08<00:00,  1.33it/s]
Evaluating: 100%|██████████| 23/23 [00:14<00:00,  1.59it/s]


Train Loss: 0.3515 | Acc: 0.9073
Val   Loss: 0.4801 | Acc: 0.8548

Epoch 26/50


Training: 100%|██████████| 91/91 [01:07<00:00,  1.35it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.65it/s]


Train Loss: 0.3409 | Acc: 0.9093
Val   Loss: 0.4329 | Acc: 0.8466

Epoch 27/50


Training: 100%|██████████| 91/91 [01:06<00:00,  1.37it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.65it/s]


Train Loss: 0.3255 | Acc: 0.9087
Val   Loss: 0.4537 | Acc: 0.8658

Epoch 28/50


Training: 100%|██████████| 91/91 [01:06<00:00,  1.37it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.67it/s]


Train Loss: 0.3188 | Acc: 0.9032
Val   Loss: 0.3995 | Acc: 0.8658

Epoch 29/50


Training: 100%|██████████| 91/91 [01:07<00:00,  1.36it/s]
Evaluating: 100%|██████████| 23/23 [00:13<00:00,  1.65it/s]

Train Loss: 0.2890 | Acc: 0.9190
Val   Loss: 0.4711 | Acc: 0.8548
Early stopping at epoch 29





In [21]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score, accuracy_score
from torch.nn.functional import softmax
from torchvision import transforms
import pandas as pd
from PIL import Image
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Định nghĩa các lớp và hàm từ log
# ... (Copy các định nghĩa lớp: PScan, MambaConfig, Mamba, ResidualBlock, MambaBlock, RMSNorm, MambaHead, MambaModule, PatchEmbedding, VGGPlantXMamba từ log)

# Định nghĩa AppleDataset
class AppleDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform):
        self.df = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_dir, row['image_id'] + ".jpg")
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(row['label_idx'])
        return image, label

# Khởi tạo mô hình
model = InceptionPlantXMamba(num_classes=4)  # 4 lớp cho Apple dataset
MODEL_PATH = "./outputs/apple/models/InceptionMamba_apple_286.pth"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Tải trọng số mô hình
model.load_state_dict(torch.load(MODEL_PATH))
model.to(DEVICE)
model.eval()

# Chuẩn bị dữ liệu kiểm tra
image_size = (224, 224)  # Từ log
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Tạo tập kiểm tra từ train.csv
csv_path = "./data/raw/plant-pathology-2020-fgvc7/train.csv"
image_dir = "./data/raw/plant-pathology-2020-fgvc7/images"
df = pd.read_csv(csv_path)

# Xử lý nhãn
df['label'] = df[['healthy', 'multiple_diseases', 'rust', 'scab']].idxmax(axis=1)
label_encoder = LabelEncoder()
df['label_idx'] = label_encoder.fit_transform(df['label'])

# Chia dữ liệu thành train, val, test
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

# Tạo dataset và dataloader cho tập kiểm tra
test_dataset = AppleDataset(test_df, image_dir, transform)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Hàm đánh giá
criterion = nn.CrossEntropyLoss()
all_preds = []
all_labels = []
all_probs = []
total_loss = 0.0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        total_loss += loss.item() * inputs.size(0)

        # Lấy xác suất và nhãn dự đoán
        probs = softmax(outputs, dim=1).cpu().numpy()  # Xác suất cho tất cả lớp
        preds = torch.argmax(outputs, dim=1).cpu().numpy()  # Nhãn dự đoán
        labels = labels.cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(labels)
        all_probs.extend(probs)

# Chuyển sang numpy array
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
all_probs = np.array(all_probs)

# Tính các chỉ số
loss = total_loss / len(test_dataset)  # Mất mát trung bình
accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds, average='weighted')
recall = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')
kappa = cohen_kappa_score(all_labels, all_preds)

# AUC cho bài toán đa lớp (one-vs-rest)
auc = roc_auc_score(all_labels, all_probs, multi_class='ovr', average='weighted')

# In kết quả
print(f"Loss (Mất mát): {loss:.4f}")
print(f"Accuracy (Độ chính xác): {accuracy:.4f}")
print(f"Precision (Độ chính xác dự đoán dương): {precision:.4f}")
print(f"Recall (Tỷ lệ phát hiện dương): {recall:.4f}")
print(f"F1 Score (Trung bình điều hòa): {f1:.4f}")
print(f"AUC (Diện tích dưới đường cong ROC): {auc:.4f}")
print(f"Kappa Score (Độ đo Cohen’s Kappa): {kappa:.4f}")

Loss (Mất mát): 0.4538
Accuracy (Độ chính xác): 0.8613
Precision (Độ chính xác dự đoán dương): 0.8745
Recall (Tỷ lệ phát hiện dương): 0.8613
F1 Score (Trung bình điều hòa): 0.8463
AUC (Diện tích dưới đường cong ROC): 0.9624
Kappa Score (Độ đo Cohen’s Kappa): 0.7973
