# CloakHat Patch Generation Pipeline

## 1: Conda Setup

Set up the environment

`conda create -n cloakhat python=3.10 -y` <br>
`conda activate cloakhat`

PyTorch with CUDA <br>
`conda install pytorch torchvision pytorch-cuda=11.8 -c pytorch -c nvidia -y`

PyTorch3D for differentiable rendering <br>
`pip install "git+https://github.com/facebookresearch/pytorch3d.git"`

Detection models <br>
`pip install ultralytics`

Pip stuff <br>
`pip install opencv-python-headless matplotlib tqdm tensorboard pyyaml trimesh`

## 2: Python Setup

Get the libraries we need

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
from datetime import datetime
import logging

from pytorch3d.io import load_obj
from pytorch3d.structures import Meshes
from pytorch3d.renderer import (look_at_view_transform, FoVPerspectiveCameras, RasterizationSettings, MeshRenderer, MeshRasterizer, SoftPhongShader, TexturesUV, PointLights)

from ultralytics import YOLO

logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(message)s')
logger = logging.getLogger(__name__)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info(f"Device: {device}")

## 3: Config

Control variables

In [None]:
CONFIG = {
    'dataset_dir': './data/drone_footage', #Drone footage
    'mesh_path': './assets/hat.obj', #Hat meshes
    'output_dir': './outputs',
    
    #Generator
    'latent_channels': 128,
    'latent_size': 9, #Spatial size of latent input
    'texture_size': 288, #Output texture size from generator
    
    #Viewpoint sampling
    'elevation_range': (60, 90), #Degrees from horizontal (90 = overhead)
    'scale_range': (0.3, 1.2), #Altitude proxy
    
    #Training Stage 1
    'stage1_epochs': 100,
    'stage1_batch_size': 8,
    'stage1_lr': 2e-4,
    
    #Training Stage 2  
    'stage2_iterations': 2000,
    'stage2_lr': 0.01,
    'local_latent_size': 18, #Size of optimizable latent pattern
    
    #Loss weights
    'lambda_tv': 2.5, #Total variation
    'lambda_nps': 0.01, #Non-printability score
    'lambda_info': 0.1, #Mutual information (Stage 1 only)
    
    #T-SEA Stuff
    'cutout_prob': 0.9,
    'cutout_ratio': 0.4,
    'shakedrop_prob': 0.5,
    
    #Rendering
    'render_size': 256,
    
    #Printing (PLACEHOLDER, need details from FABLAB)
    'nps_threshold': 0.7,  #Saturation * brightness threshold
    
    #Attack config (white, gray, black)
    'attack_mode': 'gray',
}

Path(CONFIG['output_dir']).mkdir(parents=True, exist_ok=True)

## 4: FCN Generator

Make the texture (turn noise into an image)

In [None]:
class FCNGenerator(nn.Module):
    
    def __init__(self, latent_channels=128):
        super().__init__()
        
        self.net = nn.Sequential(
            #9 -> 9
            nn.Conv2d(latent_channels, 512, 3, 1, 1, padding_mode='zeros'),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),     
            #9 -> 18
            nn.ConvTranspose2d(512, 512, 4, 2, 1),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),
            #18 -> 36
            nn.ConvTranspose2d(512, 256, 4, 2, 1),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            # 36 -> 72
            nn.ConvTranspose2d(256, 128, 4, 2, 1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            # 72 -> 144
            nn.ConvTranspose2d(128, 64, 4, 2, 1),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.2, inplace=True),
            # 144 -> 288
            nn.ConvTranspose2d(64, 32, 4, 2, 1),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.2, inplace=True),
            # 288 -> 288 (to RGB)
            nn.Conv2d(32, 3, 3, 1, 1, padding_mode='zeros'),
            nn.Tanh()
        )
        self.output_size = 288
        self._init_weights()
        
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                nn.init.kaiming_normal_(m.weight, a=0.2, nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
    
    def forward(self, z):
        return self.net(z)
    
    def generate(self, z=None, batch_size=1):
        if z is None:
            z = torch.randn(batch_size, 128, 9, 9, device=next(self.parameters()).device)
        return (self.forward(z) + 1) / 2

# Test
generator = FCNGenerator().to(device)
test_out = generator.generate(batch_size=1)
logger.info(f"Generator output: {test_out.shape}")  #Should be (1, 3, 288, 288)

## 5: Auxiliary Network

In [None]:
class AuxiliaryNetwork(nn.Module):
    def __init__(self, latent_channels=128):
        super().__init__()
        
        #Texture encoder
        self.tex_enc = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1), nn.LeakyReLU(0.2),
            nn.Conv2d(64, 128, 4, 2, 1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2),
            nn.Conv2d(128, 256, 4, 2, 1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2),
            nn.AdaptiveAvgPool2d(1), nn.Flatten()
        )
        
        #Latent encoder
        self.lat_enc = nn.Sequential(
            nn.Conv2d(latent_channels, 256, 3, 1, 1), nn.LeakyReLU(0.2),
            nn.AdaptiveAvgPool2d(1), nn.Flatten()
        )
        
        #Joint network
        self.joint = nn.Sequential(
            nn.Linear(512, 256), nn.LeakyReLU(0.2),
            nn.Linear(256, 1)
        )
        
    def forward(self, texture, z):
        tex_feat = self.tex_enc(texture)
        lat_feat = self.lat_enc(z)
        return self.joint(torch.cat([tex_feat, lat_feat], dim=1))


def compute_mi_loss(aux_net, texture, z):

    #Matched pairs
    T_joint = aux_net(texture, z)
    pos_term = -F.softplus(-T_joint).mean()
    
    #Mismatched pairs (shuffle z)
    z_shuffle = z[torch.randperm(z.size(0))]
    T_marginal = aux_net(texture, z_shuffle)
    neg_term = F.softplus(T_marginal).mean()
    
    mi = pos_term - neg_term
    return -mi  #Negate because we minimize loss but want to maximize MI