## Notebook for the Crossflow Paper

### Import Libaries

In [1]:
#!mkdir data
#!gdown 1CVAQDuPOiwm8h9LJ8a_oOs6zOWS6EgkB
#!gdown 1ykZ9fjTxUwdiEwqagoYZiMcD5aG-7rHe
#!unzip -o test.zip -d data
#!unzip -o train.zip -d data
# from google.colab import drive
# drive.mount('/content/drive')
!git clone https://github.com/Mamiglia/challenge.git

fatal: destination path 'challenge' already exists and is not an empty directory.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from pathlib import Path
from tqdm import tqdm

from challenge.src.common import load_data, prepare_train_data, generate_submission

### Create Neural Network Architectures

- VAE-ENCODER (1024) -> LATENT SPACE (1536) -> VAE-DECODER (1024) train a VAE in parallel with the crossflow network
- CROSSFLOW GETS THE LATENT SPACE FROM VAE AS INPUT
-> INPUT (1024) -> VAE-ENCODER (1536) -> INPUT FOR CROSSFLOW -> CROSSFLOW TRANSFORMER -> OUTPUT FOR CROSSFLOW (1536)
- Use Text Embeddings as input for vae and image embeddings for crossflow
![image.png](attachment:image.png)

In [3]:
class MLPBlock(nn.Module):
    """Simple MLP block with LayerNorm, activation, and dropout"""
    def __init__(self, in_dim, out_dim, dropout=0.0, activation=nn.GELU):
        super().__init__()
        self.block = nn.Sequential(
            nn.LayerNorm(in_dim),
            nn.Linear(in_dim, out_dim),
            activation(),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.block(x)


class ContextMapperVAE(nn.Module):
    """
    Configurable VAE:
    - input_dim: dimension of the context embeddings
    - latent_dim: dimension of the target/image latent
    - num_layers: number of hidden layers for encoder/decoder
    - hidden_dim: width of hidden layers
    - dropout: dropout probability
    """
    def __init__(self, input_dim, latent_dim, num_layers=6, hidden_dim=512, dropout=0.1):
        super().__init__()
        # ---------------- Encoder ----------------
        enc_layers = []
        dim_in = input_dim
        for _ in range(num_layers):
            enc_layers.append(MLPBlock(dim_in, hidden_dim, dropout))
            dim_in = hidden_dim
        self.encoder_backbone = nn.Sequential(*enc_layers)
        self.encoder_head = nn.Linear(hidden_dim, latent_dim * 2)  # μ and logσ

        # ---------------- Decoder ----------------
        dec_layers = []
        dim_in = latent_dim
        for _ in range(num_layers):
            dec_layers.append(MLPBlock(dim_in, hidden_dim, dropout))
            dim_in = hidden_dim
        self.decoder_backbone = nn.Sequential(*dec_layers)
        self.decoder_head = nn.Linear(hidden_dim, input_dim)

    def forward(self, x):
        # --- Encoder path ---
        h = self.encoder_backbone(x)
        stats = self.encoder_head(h)
        mu, log_sigma = stats.chunk(2, dim=-1)
        sigma = torch.exp(log_sigma)
        eps = torch.randn_like(mu)
        z0 = mu + sigma * eps

        # --- Decoder path ---
        h_dec = self.decoder_backbone(z0)
        x_recon = self.decoder_head(h_dec)
        return z0, mu, log_sigma, x_recon

    def kl_loss(self, mu, log_sigma):
        # KL(q(z|x) || N(0,1))
        return -0.5 * torch.sum(1 + 2 * log_sigma - mu.pow(2) - torch.exp(2 * log_sigma), dim=-1).mean()


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

# --- Classi di Supporto per il U-ViT ---

class TimestepEmbedding(nn.Module):
    """
    Crea embedding sinusoidali per il tempo e li proietta con un MLP.
    Questo è un modo standard e potente per condizionare sul tempo.
    """
    def __init__(self, embed_dim, hidden_dim, out_dim):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(embed_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, out_dim),
        )
        self.freq_embed = nn.Parameter(torch.randn(1, embed_dim // 2), requires_grad=False)

    def forward(self, t):
        # Crea l'embedding sinusoidale
        t_freq = t.unsqueeze(-1) * self.freq_embed.to(t.device) * 2 * math.pi
        emb = torch.cat([torch.sin(t_freq), torch.cos(t_freq)], dim=-1)
        
        # Proietta attraverso l'MLP
        return self.mlp(emb)

class AdaLNorm(nn.Module):
    """
    Adaptive Layer Normalization.
    Usa l'embedding temporale per calcolare scale e shift.
    """
    def __init__(self, embed_dim, time_embed_dim):
        super().__init__()
        self.norm = nn.LayerNorm(embed_dim, elementwise_affine=False)
        self.proj = nn.Linear(time_embed_dim, 2 * embed_dim)

    def forward(self, x, time_emb):
        # Proietta l'embedding temporale per ottenere scale e shift
        scale, shift = self.proj(time_emb).chunk(2, dim=-1)
        # Applica la modulazione: y = scale * norm(x) + shift
        # Aggiungiamo le dimensioni necessarie per il broadcasting
        return self.norm(x) * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)

class UViTBlock(nn.Module):
    """
    Un singolo blocco del nostro U-ViT.
    Contiene Self-Attention e un Feed-Forward, entrambi modulati dal tempo.
    """
    def __init__(self, embed_dim, num_heads, time_embed_dim, ff_dim, dropout=0.1):
        super().__init__()
        self.norm1 = AdaLNorm(embed_dim, time_embed_dim)
        self.attn = nn.MultiheadAttention(embed_dim, num_heads, batch_first=True)
        
        self.norm2 = AdaLNorm(embed_dim, time_embed_dim)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(ff_dim, embed_dim),
        )

    def forward(self, x, time_emb):
        # Blocco di attenzione con connessione residua
        x = x + self.attn(self.norm1(x, time_emb), self.norm1(x, time_emb), self.norm1(x, time_emb))[0]
        # Blocco feed-forward con connessione residua
        x = x + self.ffn(self.norm2(x, time_emb))
        return x

# --- La Nuova Architettura: UViTFlow ---

class UViTFlow(nn.Module):
    """
    Transformer Flow basato su architettura U-ViT.
    Prende un vettore latente, lo rimodella in un formato spaziale,
    lo processa con un'architettura U-Net di blocchi Transformer,
    e lo riappiattisce in un vettore.
    """
    def __init__(self, latent_dim=1536, channels=24, size=8, patch_size=2,
                 embed_dim=768, depth=6, num_heads=8, ff_dim=2048, dropout=0.1):
        super().__init__()
        
        assert latent_dim == channels * size * size, "Dimensioni latenti non compatibili con la forma spaziale"
        assert size % patch_size == 0, "La dimensione deve essere divisibile per la patch_size"
        
        self.latent_dim = latent_dim
        self.channels = channels
        self.size = size
        self.patch_size = patch_size
        num_patches = (size // patch_size) ** 2

        # 1. Embedding per il tempo
        self.time_embed = TimestepEmbedding(256, 1024, embed_dim)

        # 2. Patch Embedding
        self.patch_embed = nn.Conv2d(channels, embed_dim, kernel_size=patch_size, stride=patch_size)
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches, embed_dim))

        # 3. Blocchi U-ViT (Encoder + Decoder con skip connections)
        self.depth = depth
        self.encoder_blocks = nn.ModuleList([
            UViTBlock(embed_dim, num_heads, embed_dim, ff_dim, dropout) for _ in range(depth)
        ])
        self.decoder_blocks = nn.ModuleList([
            UViTBlock(embed_dim, num_heads, embed_dim, ff_dim, dropout) for _ in range(depth)
        ])

        # 4. Layer finale
        self.final_norm = nn.LayerNorm(embed_dim)
        self.final_proj = nn.Linear(embed_dim, channels * patch_size * patch_size)

    def forward(self, z_t, t):
        # --- Preparazione Input ---
        # 0. Rimodella il vettore in un formato spaziale
        # Input z_t: [B, 1536] -> [B, 24, 8, 8]
        x = z_t.view(-1, self.channels, self.size, self.size)

        # 1. Calcola l'embedding temporale
        # Input t: [B] -> time_emb: [B, embed_dim]
        time_emb = self.time_embed(t)

        # --- Encoder U-ViT ---
        # 2. Patch & Position Embedding
        # x: [B, 24, 8, 8] -> [B, 768, 4, 4]
        x = self.patch_embed(x)
        # x: [B, 768, 4, 4] -> [B, 16, 768] (flatten spaziale)
        x = x.flatten(2).transpose(1, 2)
        x = x + self.pos_embed

        # 3. Applica i blocchi dell'encoder, salvando gli output per le skip connections
        skip_connections = []
        for block in self.encoder_blocks:
            x = block(x, time_emb)
            skip_connections.append(x)

        # --- Decoder U-ViT ---
        # 4. Applica i blocchi del decoder, aggiungendo le skip connections
        for block in self.decoder_blocks:
            x = x + skip_connections.pop() # Aggiunge la skip connection
            x = block(x, time_emb)

        # --- Output ---
        # 5. Proiezione finale per tornare allo spazio originale
        x = self.final_norm(x)
        x = self.final_proj(x)
        
        # 6. "Un-patch" e appiattimento
        # x: [B, 16, 24*2*2] -> [B, 16, 96]
        # Ricostruisci la forma spaziale [B, C, H, W]
        # Questo è l'inverso del patch embedding
        num_patches_h = self.size // self.patch_size
        x = x.transpose(1, 2).view(-1, self.channels, self.patch_size, self.patch_size, num_patches_h, num_patches_h)
        x = x.permute(0, 1, 4, 2, 5, 3).reshape(-1, self.channels, self.size, self.size)
        
        # 7. Appiattisci l'output per tornare a un vettore
        # x: [B, 24, 8, 8] -> [B, 1536]
        v_pred = x.view(-1, self.latent_dim)
        
        return v_pred

In [5]:
@torch.no_grad()
def integrate_flow(flow, z0, n_steps=20):
    z = z0.clone()
    t_values = torch.linspace(0, 1, n_steps, device=z0.device)
    dt = 1.0 / n_steps
    for t in t_values:
        v = flow(z, t.repeat(z.size(0)))
        z = z + dt * v
    return z


In [6]:
import torch
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np

def train_epoch(train_loader, vae, flow, optimizer,
                lambda_kl=1e-4,
                temperature=0.07, queue_size=4098, device="cuda", epoch = 0):
    vae.train()
    flow.train()
    total_loss, total_fm, total_enc, total_kl = 0, 0, 0, 0
    criterion = QueueInfoNCELoss(dim=1536, temperature=temperature, queue_size=queue_size).to(device)

    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch: {epoch}"):
        context = X_batch.to(device)
        image = y_batch.to(device)

        z0, mu, log_sigma, recon = vae(context)
        z1 = image

        t = torch.rand(z0.size(0), 1, device=device)
        z_t = (1 - t) * z0 + t * z1
        v_hat = z1 - z0
        v_pred = flow(z_t, t.squeeze())

        L_Enc = criterion(z0, z1)
        L_FM = F.mse_loss(v_pred, v_hat)
        L_KL = vae.kl_loss(mu, log_sigma)

        loss = L_FM + L_Enc + lambda_kl * L_KL

        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_fm += L_FM.item()
        total_enc += L_Enc.item()
        total_kl += L_KL.item()

        with torch.no_grad():
          img_keys = F.normalize(image, dim=1).detach()
          # put them into the queue
          criterion._enqueue(keys=img_keys)

    n = len(train_loader)
    return {
        "loss": total_loss / n,
        "L_FM": total_fm / n,
        "L_Enc": total_enc / n,
        "L_KL": total_kl / n
    }


@torch.no_grad()
def validate_epoch(val_loader, vae, flow, device="cuda", n_steps=20):
    vae.eval()
    flow.eval()
    cos_sims = []

    for X_batch, y_batch in tqdm(val_loader, desc="Validation"):
        context = X_batch.to(device)
        image = y_batch.to(device)

        # Encode to z0
        z0, _, _, _ = vae(context)

        # Integrate flow to predict target embedding
        z1_pred = integrate_flow(flow, z0, n_steps=n_steps)
        z1_true = image

        # Compute cosine similarity between predicted and true image embeddings
        cos_sim = F.cosine_similarity(z1_pred, z1_true, dim=-1)
        cos_sims.append(cos_sim.cpu().numpy())

    cos_sims = np.concatenate(cos_sims)
    mean_cosine = np.mean(cos_sims)
    acc_80 = np.mean(cos_sims > 0.8)  # how often similarity > 0.8
    acc_90 = np.mean(cos_sims > 0.9)

    return {
        "mean_cosine": mean_cosine,
        "acc@0.8": acc_80,
        "acc@0.9": acc_90
    }


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class QueueInfoNCELoss(nn.Module):
    """
    One-directional (text → image) InfoNCE with optional sinusoidal time embedding.
    """
    def __init__(self, dim, temperature=0.07, queue_size=4096, use_time_embedding=True):
        super().__init__()
        self.temperature = temperature
        self.queue_size = queue_size

        # Single queue for image embeddings
        self.register_buffer("queue", F.normalize(torch.randn(queue_size, dim), dim=1))
        self.register_buffer("queue_ptr", torch.zeros(1, dtype=torch.long))



    # ------------------------------
    # Queue Management
    # ------------------------------
    @torch.no_grad()
    def _enqueue(self, keys):
        """Add image keys (B, dim) to the circular queue after backward()."""
        bsz = keys.shape[0]
        keys = F.normalize(keys, dim=1)

        ptr = int(self.queue_ptr.item())
        end_ptr = (ptr + bsz) % self.queue_size

        if end_ptr > ptr:
            self.queue[ptr:end_ptr] = keys
        else:
            first_len = self.queue_size - ptr
            self.queue[ptr:] = keys[:first_len]
            self.queue[:end_ptr] = keys[first_len:]

        self.queue_ptr[0] = end_ptr

    # ------------------------------
    # Forward Pass
    # ------------------------------
    def forward(self, z_text, z_img):
        """
        z_text: (B, dim) predicted text→image latent
        z_img:  (B, dim) target image latent
        """

        # Normalize embeddings
        z_text = F.normalize(z_text, dim=1)
        z_img = F.normalize(z_img, dim=1)

        # Positive logits: (B, 1)
        l_pos = torch.sum(z_text * z_img, dim=-1, keepdim=True)

        # Negatives: (B, queue_size)
        l_neg = torch.matmul(z_text, self.queue.T)

        # Combine and scale by temperature
        logits = torch.cat([l_pos, l_neg], dim=1) / self.temperature
        labels = torch.zeros(logits.size(0), dtype=torch.long, device=z_text.device)

        loss = F.cross_entropy(logits, labels)
        return loss


In [8]:
# ====== Procrustes initialization ======
def procrustes_init(text_embs, img_embs):
    """
    text_embs: (N, d_text)
    img_embs:  (N, d_img)
    returns: weight matrix (d_img, d_text)
    """
    # Center both
    X = text_embs - text_embs.mean(0, keepdim=True)
    Y = img_embs - img_embs.mean(0, keepdim=True)

    # Compute SVD of cross-covariance
    U, _, Vt = torch.linalg.svd(X.T @ Y, full_matrices=False)
    W = U @ Vt  # orthogonal map d_text→d_img
    return W.T   # shape (d_img, d_text) for nn.Linear weight


def apply_procrustes_init_to_final(model, text_sample, img_sample):
    """
    Apply Procrustes initialization to a model
    """
    with torch.no_grad():
        # Compute Procrustes matrix
        W = procrustes_init(text_embs=text_sample, img_embs=img_sample)

        # Apply to the appropriate layer
        applied = False
        for name, m in model.named_modules():
            if isinstance(m, nn.Linear):
                # Transformer: apply to first projection (proj_in)
                if isinstance(model, TransformerFlow) and name.endswith("output"):
                    print(m.weight.shape, W.shape)
                    if m.weight.shape == W.shape:
                        m.weight.copy_(W)
                        applied = True
                        break
        if not applied:
            print("⚠️ Warning: Could not find matching layer for Procrustes init")
    return model


### Load Data

In [9]:
# 3. Crossflow
# 4. Data Augmentation
# 5. Zero Shot Stitching
# 6. Diffusion Priors
# Configuration
EPOCHS = 60
BATCH_SIZE = 256
LR = 0.0001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load data
train_data = load_data("./data/train/train.npz")
X, y, label = prepare_train_data(train_data)
DATASET_SIZE = len(X)
# Split train/val
# This is done only to measure generalization capabilities, you don't have to
# use a validation set (though we encourage this)
n_train = int(0.9 * len(X))
TRAIN_SPLIT = torch.zeros(len(X), dtype=torch.bool)
TRAIN_SPLIT[:n_train] = 1
X_train, X_val = X[TRAIN_SPLIT], X[~TRAIN_SPLIT]
y_train, y_val = y[TRAIN_SPLIT], y[~TRAIN_SPLIT]


train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)
y_train.shape, X_train.shape, train_loader.batch_size, val_loader.batch_size

(125000,)
Train data: 125000 captions, 125000 images


(torch.Size([112500, 1536]), torch.Size([112500, 1024]), 256, 256)

In [None]:

vae = ContextMapperVAE(
    input_dim=1024, latent_dim=1536,
    num_layers=3, hidden_dim=1024, dropout=0.1
).to(DEVICE)

# I parametri sono scelti per essere potenti ma gestibili.
# Puoi fare tuning su embed_dim, depth, num_heads, etc.
flow = UViTFlow(
    latent_dim=1536,    # Dimensione totale del vettore
    channels=24,        # Canali della rappresentazione spaziale (24*8*8=1536)
    size=8,             # Altezza/Larghezza della rappresentazione spaziale
    patch_size=2,       # Dimensione di ogni patch (8x8 -> 4x4 patches)
    embed_dim=768,      # Dimensione interna del Transformer
    depth=6,            # Numero di blocchi encoder/decoder
    num_heads=12,       # Numero di teste di attenzione
    ff_dim=3072,        # Dimensione del layer feed-forward (spesso 4*embed_dim)
    dropout=0.1
).to(DEVICE)

optimizer = torch.optim.AdamW(
    list(vae.parameters()) + list(flow.parameters()),
    lr=1e-4, weight_decay=0.03, betas=(0.9, 0.9)
)

procrustes_init = False
if procrustes_init:
  print("Computing Procrustes initialization...")
  text_list, img_list = [], []
  for i, (X, y) in enumerate(train_loader):
      text_list.append(X.cpu())
      img_list.append(y.cpu())
      if sum(t.shape[0] for t in text_list) >= 20000:
            break
  text_sample = torch.cat(text_list, dim=0)[:20000]
  img_sample = torch.cat(img_list, dim=0)[:20000]
  flow = apply_procrustes_init_to_final(flow, text_sample, img_sample)

for epoch in range(EPOCHS):
    train_metrics = train_epoch(train_loader, vae, flow, optimizer, device=DEVICE, epoch=epoch)
    val_metrics = validate_epoch(val_loader, vae, flow, device=DEVICE)
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print(f"Train: {train_metrics}")
    print(f"Val: {val_metrics}")

Epoch: 0: 100%|██████████| 440/440 [02:32<00:00,  2.88it/s]
Validation: 100%|██████████| 49/49 [01:40<00:00,  2.05s/it]



Epoch 1/60
Train: {'loss': 6.5542689182541585, 'L_FM': 0.30120736418122596, 'L_Enc': 6.02543553980914, 'L_KL': 2276.2602645007046}
Val: {'mean_cosine': np.float32(0.7561125), 'acc@0.8': np.float64(0.21416), 'acc@0.9': np.float64(8e-05)}


Epoch: 1: 100%|██████████| 440/440 [02:27<00:00,  2.99it/s]
Validation: 100%|██████████| 49/49 [01:40<00:00,  2.06s/it]



Epoch 2/60
Train: {'loss': 5.676858457652005, 'L_FM': 0.19580019099468535, 'L_Enc': 5.220968061143702, 'L_KL': 2600.902276056463}
Val: {'mean_cosine': np.float32(0.7567622), 'acc@0.8': np.float64(0.22808), 'acc@0.9': np.float64(0.00096)}


Epoch: 2: 100%|██████████| 440/440 [02:31<00:00,  2.90it/s]
Validation: 100%|██████████| 49/49 [01:44<00:00,  2.14s/it]



Epoch 3/60
Train: {'loss': 5.422777050191706, 'L_FM': 0.17212211608209393, 'L_Enc': 4.983886927366257, 'L_KL': 2667.680331143466}
Val: {'mean_cosine': np.float32(0.76182187), 'acc@0.8': np.float64(0.25984), 'acc@0.9': np.float64(0.0016)}


Epoch: 3: 100%|██████████| 440/440 [02:33<00:00,  2.86it/s]
Validation: 100%|██████████| 49/49 [01:46<00:00,  2.17s/it]



Epoch 4/60
Train: {'loss': 5.2583437702872535, 'L_FM': 0.15989220941608603, 'L_Enc': 4.826927531849254, 'L_KL': 2715.2403875177556}
Val: {'mean_cosine': np.float32(0.7652783), 'acc@0.8': np.float64(0.28624), 'acc@0.9': np.float64(0.0028)}


Epoch: 4: 100%|██████████| 440/440 [02:24<00:00,  3.05it/s]
Validation: 100%|██████████| 49/49 [01:38<00:00,  2.01s/it]



Epoch 5/60
Train: {'loss': 5.1312492094256665, 'L_FM': 0.15177169100127436, 'L_Enc': 4.704134620319714, 'L_KL': 2753.429152610085}
Val: {'mean_cosine': np.float32(0.76829845), 'acc@0.8': np.float64(0.3124), 'acc@0.9': np.float64(0.00392)}


Epoch: 5: 100%|██████████| 440/440 [02:28<00:00,  2.96it/s]
Validation: 100%|██████████| 49/49 [01:42<00:00,  2.10s/it]



Epoch 6/60
Train: {'loss': 5.024801966818896, 'L_FM': 0.14530806277285924, 'L_Enc': 4.600688010996039, 'L_KL': 2788.0589976917613}
Val: {'mean_cosine': np.float32(0.7679533), 'acc@0.8': np.float64(0.31496), 'acc@0.9': np.float64(0.00536)}


Epoch: 6:  99%|█████████▉| 436/440 [02:25<00:01,  3.04it/s]

### Training and Hyperparameter Optimization

### Inference

In [None]:

test_data = load_data("./data/test/test.clean.npz")

test_embds = test_data['captions/embeddings']
test_embds = torch.from_numpy(test_embds).float().to(DEVICE)

with torch.no_grad():
    # Encode to z0
    z0, _, _, _ = vae(test_embds)
    # Integrate flow to predict target embedding
    z1_pred = integrate_flow(flow, z0, n_steps=50)

submission = generate_submission(test_data['captions/ids'], z1_pred, './data/crossflow_submission.csv')
MODEL_PATH = "./data/models/crossflow.pth"
print(f"Model saved to: {MODEL_PATH}")

Generating submission file...
✓ Saved submission to ./data/crossflow_submission.csv
Model saved to: ./data/models/crossflow.pth


In [None]:
import torch
MODEL_PATH = "./data/models/crossflow.pth"
torch.save({
    'vae_state_dict': vae.state_dict(),
    'flow_state_dict': flow.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
}, MODEL_PATH)
print(f"Model saved to: {MODEL_PATH}")