In [1]:
import os
import random
import string
from PIL import Image, ImageDraw, ImageFont
# os.environ["CUDA_VISIBLE_DEVICES"] = '0'
import torch
import torchvision  # Add this line
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import torch.optim as optim
from tqdm import tqdm
import torch.nn.functional as F
import shutil
# Debugging information
print(torch.__version__)
print(torchvision.__version__)  
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Current device: {torch.cuda.current_device() if torch.cuda.is_available() else 'CPU'}")
if torch.cuda.is_available():
    print(f"Device name: {torch.cuda.get_device_name(0)}")

2.5.1+cu118
0.20.1+cu118
PyTorch version: 2.5.1+cu118
CUDA available: True
Current device: 0
Device name: NVIDIA GeForce RTX 4070 Laptop GPU


In [1]:
# Paths
BASE_DIR = "./dataset"
NO_WATERMARK_DIR = os.path.join(BASE_DIR, "no_watermark")
WATERMARKED_DIR = os.path.join(BASE_DIR, "watermarked")
MASKS_DIR = os.path.join(BASE_DIR, "masks")

OUTPUT_DIR = "./dataset_split"
TRAIN_DIR = os.path.join(OUTPUT_DIR, "train1")
TEST_DIR = os.path.join(OUTPUT_DIR, "test1")
TEST2_DIR = os.path.join(OUTPUT_DIR, "test2")

# Create output directories
os.makedirs(os.path.join(TRAIN_DIR, "no_watermark"), exist_ok=True)
os.makedirs(os.path.join(TRAIN_DIR, "watermarked"), exist_ok=True)
os.makedirs(os.path.join(TRAIN_DIR, "masks"), exist_ok=True)

os.makedirs(os.path.join(TEST_DIR, "no_watermark"), exist_ok=True)
os.makedirs(os.path.join(TEST_DIR, "watermarked"), exist_ok=True)
os.makedirs(os.path.join(TEST_DIR, "masks"), exist_ok=True)

os.makedirs(os.path.join(TEST2_DIR, "no_watermark"), exist_ok=True)
os.makedirs(os.path.join(TEST2_DIR, "watermarked"), exist_ok=True)

# File names
file_names = sorted(os.listdir(NO_WATERMARK_DIR))
total_files = len(file_names)

# Split sizes
TRAIN_SPLIT = 6000  # 6000 for training
TEST_SPLIT = 10000  # Remaining 50% for testing

train_files = file_names[:TRAIN_SPLIT]
test_files = file_names[TRAIN_SPLIT:TEST_SPLIT]
test2_files = file_names[TEST_SPLIT:]

# Function to copy files
def copy_files(file_list, src_dir, dest_dir):
    for file_name in file_list:
        src_path = os.path.join(src_dir, file_name)
        dest_path = os.path.join(dest_dir, file_name)
        if os.path.exists(src_path):
            shutil.copy(src_path, dest_path)

# Copy training files
print("Copying training files...")
copy_files(train_files, NO_WATERMARK_DIR, os.path.join(TRAIN_DIR, "no_watermark"))
copy_files(train_files, WATERMARKED_DIR, os.path.join(TRAIN_DIR, "watermarked"))
copy_files(train_files, MASKS_DIR, os.path.join(TRAIN_DIR, "masks"))

# Copy testing files (only watermarked and masks)
print("Copying testing files...")
copy_files(test_files, NO_WATERMARK_DIR, os.path.join(TEST_DIR, "no_watermark"))
copy_files(test_files, WATERMARKED_DIR, os.path.join(TEST_DIR, "watermarked"))
copy_files(test_files, MASKS_DIR, os.path.join(TEST_DIR, "masks"))

print("Copying testing files2...")
copy_files(test2_files, NO_WATERMARK_DIR, os.path.join(TEST2_DIR, "no_watermark"))
copy_files(test2_files, WATERMARKED_DIR, os.path.join(TEST2_DIR, "watermarked"))

# Summary
print(f"Dataset split complete.")
print(f"Training files: {len(train_files)}")
print(f"Testing files: {len(test_files)}")
print(f"Testing files2: {len(test2_files)}")

Copying training files...
Copying testing files...
Copying testing files2...
Dataset split complete.
Training files: 6000
Testing files: 4000
Testing files2: 2477


In [3]:
# Set device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# Hyperparameters
EPOCHS = 20
LEARNING_RATE = 0.0001
WINDOW_SIZE = 64
STRIDE = 32

# Dataset paths
TRAIN_DIR = "./dataset_split/train1"
TEST_DIR = "./dataset_split/test1"
MASK_OUTPUT_DIR = "./test_mask_results"
os.makedirs(MASK_OUTPUT_DIR, exist_ok=True)

# Data transformations (no resizing to retain original sizes)
transform = transforms.Compose([
    transforms.ToTensor()  # Convert to tensor while retaining original size
])

# Custom Dataset
class WatermarkSlidingWindowDataset(Dataset):
    def __init__(self, watermark_dir, mask_dir, transform=None):
        self.watermark_dir = watermark_dir
        self.mask_dir = mask_dir
        self.images = sorted(os.listdir(watermark_dir))
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        watermark_path = os.path.join(self.watermark_dir, self.images[idx])
        watermark_img = Image.open(watermark_path).convert("RGB")
        
        mask_img = None  # Default to None in case masks are not used
        if self.mask_dir:
            mask_path = os.path.join(self.mask_dir, self.images[idx])
            mask_img = Image.open(mask_path).convert("L")  # Mask is single-channel

        original_size = watermark_img.size  # Save original image size (width, height)
        filename = self.images[idx]  # Save the filename for reference

        if self.transform:
            watermark_img = self.transform(watermark_img)
            if mask_img is not None:
                mask_img = self.transform(mask_img)

        return watermark_img, mask_img, original_size, filename


# Custom collate_fn to handle varying image sizes
def collate_fn(batch):
    return batch

# Load training and testing datasets
train_dataset = WatermarkSlidingWindowDataset(
    watermark_dir=os.path.join(TRAIN_DIR, "watermarked"),
    mask_dir=os.path.join(TRAIN_DIR, "masks"),
    transform=transform
)

test_dataset = WatermarkSlidingWindowDataset(
    watermark_dir=os.path.join(TEST_DIR, "watermarked"),
    mask_dir=os.path.join(TEST_DIR, "masks"),
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

class UNetFeatureExtractor(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(UNetFeatureExtractor, self).__init__()

        # Encoder: Downsampling layers
        self.encoder1 = self.conv_block(in_channels, 64)
        self.encoder2 = self.conv_block(64, 128)
        self.encoder3 = self.conv_block(128, 256)
        self.encoder4 = self.conv_block(256, 512)
        
        # Middle layer
        self.middle = self.conv_block(512, 1024, is_middle=True)
        
        # Decoder: Upsampling layers
        self.decoder4 = self.upconv_block(1024, 512)
        self.decoder3 = self.upconv_block(512, 256)
        self.decoder2 = self.upconv_block(256, 128)
        self.decoder1 = self.upconv_block(128, 64)

        # Layers to adjust channel dimensions after skip connections
        self.conv4 = nn.Conv2d(1024, 512, kernel_size=1)  # Adjust channels
        self.conv3 = nn.Conv2d(512, 256, kernel_size=1)
        self.conv2 = nn.Conv2d(256, 128, kernel_size=1)
        self.conv1 = nn.Conv2d(128, 64, kernel_size=1)
        
        # Final output layer
        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)
        self.sigmoid = nn.Sigmoid()  # Outputs a mask with values in [0, 1]

    def conv_block(self, in_channels, out_channels, is_middle=False):
        """Helper function to create convolutional blocks, mainly for the encoder and middle layers."""
        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        ]
        if not is_middle:
            layers.append(nn.MaxPool2d(2))  # Downsample
        return nn.Sequential(*layers)

    def upconv_block(self, in_channels, out_channels):
        """Upsampling block in the decoder, using transpose convolution."""
        return nn.Sequential(
            nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        # Encoding phase
        enc1 = self.encoder1(x)  # Output: 64 channels
        enc2 = self.encoder2(enc1)  # Output: 128 channels
        enc3 = self.encoder3(enc2)  # Output: 256 channels
        enc4 = self.encoder4(enc3)  # Output: 512 channels

        # Middle layer
        middle = self.middle(enc4)  # Output: 1024 channels
        
        # Decoding phase with skip connections
        dec4 = self.decoder4(middle)  # Output: 512 channels
        dec4 = F.interpolate(dec4, size=enc4.shape[2:], mode='bilinear', align_corners=False)  # Match size
        dec4 = torch.cat([dec4, enc4], dim=1)  # Skip connection
        dec4 = self.conv4(dec4)  # Adjust channels

        dec3 = self.decoder3(dec4)  # Output: 256 channels
        dec3 = F.interpolate(dec3, size=enc3.shape[2:], mode='bilinear', align_corners=False)  # Match size
        dec3 = torch.cat([dec3, enc3], dim=1)  # Skip connection
        dec3 = self.conv3(dec3)  # Adjust channels

        dec2 = self.decoder2(dec3)  # Output: 128 channels
        dec2 = F.interpolate(dec2, size=enc2.shape[2:], mode='bilinear', align_corners=False)  # Match size
        dec2 = torch.cat([dec2, enc2], dim=1)  # Skip connection
        dec2 = self.conv2(dec2)  # Adjust channels

        dec1 = self.decoder1(dec2)  # Output: 64 channels
        dec1 = F.interpolate(dec1, size=enc1.shape[2:], mode='bilinear', align_corners=False)  # Match size
        dec1 = torch.cat([dec1, enc1], dim=1)  # Skip connection
        dec1 = self.conv1(dec1)  # Adjust channels

        # Final output
        out = self.final_conv(dec1)  # Output: 1 channel
        out = self.sigmoid(out)  # Mask output with values in [0, 1]
        
        return out



# Initialize model, optimizer, and loss function
model = UNetFeatureExtractor(in_channels=3, out_channels=1).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss

# Training loop
print("Starting training...")
for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        watermarked_imgs, masks, original_sizes, filenames = batch[0]
        watermarked_imgs = watermarked_imgs.to(DEVICE).unsqueeze(0)  # Add batch dimension
        masks = masks.to(DEVICE).unsqueeze(0)

        optimizer.zero_grad()
        mask_pred = model(watermarked_imgs)
        # Resize predicted mask to target size
        _, _, target_h, target_w = masks.shape
        mask_pred = torch.nn.functional.interpolate(mask_pred, size=(target_h, target_w), mode='bilinear', align_corners=True)
        loss = criterion(mask_pred, masks)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {epoch_loss / len(train_loader):.4f}")

# Save the model
FEATURE_EXTRACTOR_PATH = "feature_extractor.pth"
torch.save(model.state_dict(), FEATURE_EXTRACTOR_PATH)
print("Feature Extractor saved.")

Using device: cuda
Starting training...


Epoch 1/20: 100%|██████████| 6000/6000 [03:53<00:00, 25.74it/s]


Epoch 1/20, Loss: 0.0790


Epoch 2/20: 100%|██████████| 6000/6000 [04:02<00:00, 24.79it/s]


Epoch 2/20, Loss: 0.0450


Epoch 3/20: 100%|██████████| 6000/6000 [04:09<00:00, 24.07it/s]


Epoch 3/20, Loss: 0.0380


Epoch 4/20: 100%|██████████| 6000/6000 [04:21<00:00, 22.92it/s]


Epoch 4/20, Loss: 0.0347


Epoch 5/20: 100%|██████████| 6000/6000 [04:24<00:00, 22.68it/s]


Epoch 5/20, Loss: 0.0326


Epoch 6/20: 100%|██████████| 6000/6000 [04:13<00:00, 23.70it/s]


Epoch 6/20, Loss: 0.0311


Epoch 7/20: 100%|██████████| 6000/6000 [04:27<00:00, 22.43it/s]


Epoch 7/20, Loss: 0.0298


Epoch 8/20: 100%|██████████| 6000/6000 [04:12<00:00, 23.74it/s]


Epoch 8/20, Loss: 0.0287


Epoch 9/20: 100%|██████████| 6000/6000 [04:05<00:00, 24.47it/s]


Epoch 9/20, Loss: 0.0276


Epoch 10/20: 100%|██████████| 6000/6000 [04:08<00:00, 24.11it/s]


Epoch 10/20, Loss: 0.0268


Epoch 11/20: 100%|██████████| 6000/6000 [04:07<00:00, 24.24it/s]


Epoch 11/20, Loss: 0.0261


Epoch 12/20: 100%|██████████| 6000/6000 [04:11<00:00, 23.89it/s]


Epoch 12/20, Loss: 0.0254


Epoch 13/20: 100%|██████████| 6000/6000 [04:16<00:00, 23.38it/s]


Epoch 13/20, Loss: 0.0247


Epoch 14/20: 100%|██████████| 6000/6000 [04:10<00:00, 23.91it/s]


Epoch 14/20, Loss: 0.0240


Epoch 15/20: 100%|██████████| 6000/6000 [04:06<00:00, 24.30it/s]


Epoch 15/20, Loss: 0.0236


Epoch 16/20: 100%|██████████| 6000/6000 [04:10<00:00, 23.99it/s]


Epoch 16/20, Loss: 0.0231


Epoch 17/20: 100%|██████████| 6000/6000 [04:13<00:00, 23.64it/s]


Epoch 17/20, Loss: 0.0227


Epoch 18/20: 100%|██████████| 6000/6000 [04:13<00:00, 23.64it/s]


Epoch 18/20, Loss: 0.0222


Epoch 19/20: 100%|██████████| 6000/6000 [04:14<00:00, 23.61it/s]


Epoch 19/20, Loss: 0.0219


Epoch 20/20: 100%|██████████| 6000/6000 [04:15<00:00, 23.50it/s]

Epoch 20/20, Loss: 0.0216
Feature Extractor saved.





In [4]:
# torch.save(model, "feature_extractor_model.pth")
# test_dataset = WatermarkSlidingWindowDataset(
#     watermark_dir=os.path.join(TEST_DIR, "watermarked"),
#     mask_dir=os.path.join(TEST_DIR, "masks"),
#     transform=transform
# )

# test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

def test_feature_extractor(test_loader, model_path, mask_path):
    print("Testing feature extractor...")
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    # model = torch.load("feature_extractor_model.pth", map_location=DEVICE)
    model.eval()

    with torch.no_grad():
        for i, batch in enumerate(tqdm(test_loader)):
            # Extract watermarked image, mask, original size, and filename
            watermarked_imgs, masks, original_sizes, filenames = batch[0]

            # Debug: Print the filename being processed
            # print(f"Processing image: {filenames}")

            watermarked_imgs = watermarked_imgs.to(DEVICE).unsqueeze(0)
            mask_pred = model(watermarked_imgs)

            # Resize predicted mask to original size
            original_size = original_sizes  # (width, height)
            filename = filenames

            # Extract base name and ensure valid file extension
            base_name, ext = os.path.splitext(filename)
            if ext.lower() not in [".jpg", ".jpeg", ".png"]:  # Handle invalid extensions
                ext = ".png"  # Default to PNG if extension is missing or invalid

            mask_pred_img = transforms.ToPILImage()(mask_pred.squeeze(0).cpu())
            mask_pred_resized = mask_pred_img.resize(original_size, Image.BILINEAR)

            # Save predicted mask
            output_file = os.path.join(mask_path, f"{base_name}{ext}")
            mask_pred_resized.save(output_file)

            # print(f"Saved mask: {output_file}")

    print("All masks generated and saved.")

# Test the feature extractor
test_feature_extractor(test_loader, FEATURE_EXTRACTOR_PATH, MASK_OUTPUT_DIR)


  model.load_state_dict(torch.load(model_path, map_location=DEVICE))


Testing feature extractor...


100%|██████████| 4000/4000 [01:05<00:00, 60.82it/s]

All masks generated and saved.





In [6]:
# create the mask for test2

# define paramete
PART2_TEST_DIR = "./dataset_split/test2" # later will redefine
PART2_TEST_MASK_DIR = "./test_model_result/mask" # later will redefine

part2_test_dataset = WatermarkSlidingWindowDataset(
    watermark_dir=os.path.join(PART2_TEST_DIR, "watermarked"),
    mask_dir='',
    transform=transform
)

part2_test_loader = DataLoader(part2_test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

# predict mask
test_feature_extractor(part2_test_loader, FEATURE_EXTRACTOR_PATH, PART2_TEST_MASK_DIR)

  model.load_state_dict(torch.load(model_path, map_location=DEVICE))


Testing feature extractor...


100%|██████████| 2477/2477 [00:29<00:00, 84.83it/s] 

All masks generated and saved.





In [None]:
# create the mask for test2

# define paramete
PART2_TEST_DIR = "./true_web_data" # later will redefine
PART2_TEST_MASK_DIR = "./true_web_data/model1/predicted_mask" # later will redefine

part2_test_dataset = WatermarkSlidingWindowDataset(
    watermark_dir=os.path.join(PART2_TEST_DIR, "watermarked"),
    mask_dir='',
    transform=transform
)

part2_test_loader = DataLoader(part2_test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

# predict mask
test_feature_extractor(part2_test_loader, FEATURE_EXTRACTOR_PATH, PART2_TEST_MASK_DIR)

NameError: name 'WatermarkSlidingWindowDataset' is not defined