In [None]:
import torch
import timm
import numpy as np

torch.set_float32_matmul_precision('high')
device="cuda"

In [None]:
def create_default_model():
    model = timm.create_model('tf_efficientnetv2_b1', pretrained=True,drop_path_rate=0.2,drop_rate=0.2)
    model.classifier=torch.nn.Linear(1280,1)

    # for param in model.parameters():  #for freezing weights
    #     param.requires_grad = False
    
    # for blk in [model.classifier, model.global_pool,model.conv_head,model.blocks[-1],model.blocks[-2],model.blocks[-3]]:
    #     for param in blk.parameters():
    #         param.requires_grad = True

    # for module in model.modules():
    #     if isinstance(module, torch.nn.BatchNorm2d):
    #         if hasattr(module, 'weight'):
    #             module.weight.requires_grad_(False)
    #         if hasattr(module, 'bias'):
    #             module.bias.requires_grad_(False)
    #         module.eval()
    model = model.to(device)
    # model=torch.compile(model) #cant save model with this
    return model

In [None]:
model = create_default_model()

In [None]:
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os 
import random

def read_img_file(f):
    img = Image.open(f)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    return img


_transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 

_transform_w_resize=transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) 

In [None]:
class CustomDataset(Dataset):
    def __init__(self, image_paths_labels,resize=False):
        self.image_paths_labels = image_paths_labels
        self.resize=resize

    def __len__(self):
        return len(self.image_paths_labels)

    def __getitem__(self, idx):
        img_path = self.image_paths_labels[idx][0]
        label = self.image_paths_labels[idx][1]
        try:
            img = read_img_file(img_path)
            img = np.array(img)
            # img = all_transforms[0](image=img)["image"] #online augmentation
            if self.resize:
                im = _transform_w_resize(img)
            else:
                img = _transform(img)
            return (img, label)
        except Exception as e:
            print(e)
            print(f"error reading {img_path}")


image_paths_and_classes = []

for file_name in os.listdir("./train/no_watermark/"):
    image_paths_and_classes.append(("./train/no_watermark/"+file_name, 0.0))

for file_name in os.listdir("./train/watermark/"):
    image_paths_and_classes.append(("./train/watermark/"+file_name, 1.0))

# image_paths_and_classes=[]
# for file_name in os.listdir("./train_generated/no_watermark/"):
#     image_paths_and_classes.append(("./train_generated/no_watermark/"+file_name, 0.0))

# for file_name in os.listdir("./train_generated/watermark/"):
#     image_paths_and_classes.append(("./train_generated/watermark/"+file_name, 1.0))

random.shuffle(image_paths_and_classes)
training = image_paths_and_classes

testing = []
for file_name in os.listdir("./test/no_watermark/"):
    testing.append(("./test/no_watermark/"+file_name, 0.0))

for file_name in os.listdir("./test/watermark/"):
    testing.append(("./test/watermark/"+file_name, 1.0))

# training.extend(image_paths_and_classes[:int(len(image_paths_and_classes)*0.8)])
# random.shuffle(training)

# testing2 = image_paths_and_classes[int(len(image_paths_and_classes)*0.8):]   #0.2
    

In [None]:
BATCH_SIZE=48
train_dataset = CustomDataset(training)
test_dataset = CustomDataset(testing)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE,shuffle=True,num_workers=2, prefetch_factor=4,pin_memory=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE,shuffle=True,num_workers=2, prefetch_factor=4,pin_memory=True)

In [None]:
from tqdm import tqdm
from transformers import get_linear_schedule_with_warmup
from timeit import default_timer as timer
from statistics import mean
from torch.cuda.amp import GradScaler
from torch import autocast
import optuna
import wandb
import numpy as np
from sklearn.metrics import accuracy_score

#wandb = None  #to disable wandb
criterion = torch.nn.BCEWithLogitsLoss()

def train(trial=None,train_params={"lr":1e-5,"epochs":3}):
    lr, epochs = train_params["lr"], train_params["epochs"]
    # warmup_steps, weight_decay = train_params["warmup_steps"], train_params["weight_decay"]
    
    if wandb:
        wandb.init(project="effnet_b0_watermark", entity="qwertyforce",reinit=True)
        wandb.config.update({
            "learning_rate": lr,
            "epochs": epochs,
            "batch_size": BATCH_SIZE,
            })
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr) 
    scheduler = None
    # num_warmup_steps = int((len(train_dataloader)) * warmup_steps)
    # num_training_steps = len(train_dataloader)* 5
    # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps)
    scaler = GradScaler()

    loss_train=[]
    loss_test=[]
    acc_test=[]

    for epoch in tqdm(range(epochs)):
        train_loss = train_one_epoch(optimizer,criterion,scaler,scheduler)
        loss_train.append(train_loss)

        test_loss,test_acc = test(criterion)
        loss_test.append(test_loss)
        acc_test.append(test_acc)

        if wandb:
            wandb.log({"loss_train": loss_train[-1],"epoch":epoch,"lr":optimizer.param_groups[0]['lr']})
            wandb.log({"loss_test": loss_test[-1],"epoch":epoch,"lr":optimizer.param_groups[0]['lr']})
            wandb.log({"acc_test": acc_test[-1],"epoch":epoch,"lr":optimizer.param_groups[0]['lr']})
        # if trial:
        #     trial.report(loss_test[-1], epoch)
        #     if trial.should_prune():
        #         raise optuna.exceptions.TrialPruned()
    return loss_train,loss_test,acc_test


def train_one_epoch(optimizer,criterion,scaler,scheduler):
    model.train()
    temp_train_loss=[]
    start = timer()
    for batch_idx, (data, labels) in enumerate(train_dataloader):
        # labels = labels.type(torch.int64)
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        
        with autocast(device_type='cuda', dtype=torch.float16):
            outputs = model.forward(data)
            loss = criterion(outputs,labels.unsqueeze(1)) # remove unsqueeze for CE
        
        # outputs = model.forward(data)
        # loss = criterion(outputs,labels.unsqueeze(1))
        # loss.backward()
        # optimizer.step()

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        old_scale = scaler.get_scale()
        scaler.update()

        if scheduler and old_scale <= scaler.get_scale():
            scheduler.step()

        temp_train_loss.append(loss.item())
        if batch_idx % 100==0:
            if wandb:
                wandb.log({"loss_train_batch_idx": mean(temp_train_loss[-100:]),"batch_idx":batch_idx})

        if batch_idx % 500==0:
            if wandb:
                wandb.log({"loss_test_batch_idx": test(criterion,partial=True),"batch_idx":batch_idx})
                model.train()

    end = timer()
    train_loss = mean(temp_train_loss)
    print(f"Train loss = {train_loss}; epoch_training_time: {end - start}")
    return train_loss
    

def test(criterion,partial=False):
    model.eval()
    temp_loss=[]
    predictions, true_labels = [], []
    for batch_idx, (data, labels) in enumerate(test_dataloader):
        with torch.no_grad():
            # labels = labels.type(torch.int64)
            true_labels.extend(labels)
            data, labels = data.to(device), labels.to(device)
            outputs = model.forward(data)
            loss = criterion(outputs,labels.unsqueeze(1))  #remove .unsqueeze for CE
            temp_loss.append(loss.item())

            # preds = torch.argmax(outputs,1).cpu().numpy()  # for CE
            preds = np.round(torch.sigmoid(outputs).cpu().numpy())
            predictions.extend(preds)
        if partial and batch_idx == 100:
                return mean(temp_loss)

    accuracy = accuracy_score(true_labels,predictions)
    test_loss = mean(temp_loss)
    print(f"Test loss = {test_loss}; Test acc = {accuracy}")
    return test_loss, accuracy

    # return mean(temp_loss)

In [None]:
train(0.00005,EPOCHS=1)

In [None]:
# import optuna
# def objective(trial):
#     global model
#     model = create_default_model()
#     model=model.to("cuda")
#     lr = trial.suggest_loguniform('learning_rate', 1e-7, 6e-5)
#     warmup_steps = trial.suggest_float('warmup_steps', 0.0, 1.5,step=0.1)
#     weight_decay = trial.suggest_float('weight_decay', 0.0, 0.05,step=0.005)

#     train_params = {"epochs":5, "lr":lr,"warmup_steps":warmup_steps, "weight_decay":weight_decay}
#     val_loss = train(trial,train_params)
#     return val_loss

# study_name = "example-study"  # Unique identifier of the study.
# storage_name = "sqlite:///{}.db".format(study_name)

# study = optuna.create_study(study_name=study_name, storage=storage_name,direction='minimize',pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)
# study.optimize(objective, n_trials=64)

In [None]:
# from optuna.visualization import plot_contour
# from optuna.visualization import plot_edf
# from optuna.visualization import plot_intermediate_values
# from optuna.visualization import plot_optimization_history
# from optuna.visualization import plot_parallel_coordinate
# from optuna.visualization import plot_param_importances
# from optuna.visualization import plot_slice

In [None]:
# plot_slice(study)


In [None]:
# import matplotlib.pyplot as plt
# plt.plot(loss_train)
# plt.plot(loss_test)

In [None]:

# test_dataset = CustomDataset(testing2)
# test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32,shuffle=True,num_workers=2, prefetch_factor=8,pin_memory=True)

model.eval()
predictions, true_labels= [], []
raw_values=[]
import numpy as np
with torch.no_grad():
    for batch_idx, (data, labels) in enumerate(test_dataloader):
        data, labels = data.to(device), labels.to(device)
        outputs = model.forward(data)
        outputs = torch.sigmoid(outputs).cpu().numpy()
        raw_values.extend(outputs.reshape(-1))
        outputs = np.round(outputs)
        predictions.extend(outputs)
        true_labels.extend(labels.cpu().numpy())
        # loss = criterion(outputs,labels.unsqueeze(1))  

In [None]:
from sklearn.metrics import classification_report



In [None]:
print(classification_report(true_labels,predictions))

              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98      2592
         1.0       0.98      0.96      0.97      1959

    accuracy                           0.97      4551
   macro avg       0.97      0.97      0.97      4551
weighted avg       0.97      0.97      0.97      4551



In [None]:
assert 1==2

AssertionError: 

In [None]:
# p = "/media/qwertyforce/26fbdf65-ba8c-46bc-bbd9-bc503969e854/scenery_cx/scenery/public/images/"
p="./no_watermark/"
model.eval()
device="cuda"
model=model.to(device)
# p="./test/no_watermark/"
res=[]
all_outputs=[]
for file_name in tqdm(os.listdir(p)):
    if "_aug_" in file_name:
        continue
    img = read_img_file(p+file_name)
    img=img.resize((512,512),Image.Resampling.HAMMING)
    img = _transform(img).cuda()
    img.unsqueeze_(0)
    img = img.to(device)
    with torch.no_grad():
        outputs = model.forward(img)
        outputs = torch.sigmoid(outputs).cpu().numpy()
        all_outputs.append(outputs[0][0])
    if np.round(outputs[0][0]) == 1:
        res.append((file_name,outputs[0][0]))


100%|██████████| 20328/20328 [03:03<00:00, 110.86it/s]


In [None]:
res

[('w8h044.jpg', 0.63842213),
 ('6618.jpg', 0.79962945),
 ('1545.jpg', 0.53775936),
 ('3s5yuq.jpg', 0.61515945),
 ('7sy9xp.jpg', 0.732666),
 ('9ors4k.jpg', 0.5848952),
 ('wvwo95.png', 0.78488404),
 ('72156k.jpg', 0.74039346),
 ('ajw189.jpg', 0.5361958),
 ('2297.jpg', 0.5376779),
 ('3323.jpg', 0.5000562),
 ('die66k.jpg', 0.57742304),
 ('7532.jpg', 0.6067571),
 ('8446.jpg', 0.54459554),
 ('5909.jpg', 0.8027137),
 ('26du47_imgur_N1jwhB1.jpg', 0.78189695),
 ('9392.jpg', 0.7732248),
 ('92z39g.jpg', 0.6010037),
 ('6767.jpg', 0.89132035),
 ('430.jpg', 0.7065881),
 ('1320.jpg', 0.57142985),
 ('3qre3o_imgur_VSiliOm.jpg', 0.56591284),
 ('dy1ikr.jpg', 0.9142127),
 ('34.jpg', 0.7617503),
 ('5y52zn.jpg', 0.9752406),
 ('4086.jpg', 0.6361611),
 ('4567.jpg', 0.87832373),
 ('m596n6.jpg', 0.52296525),
 ('9571.jpg', 0.92655253),
 ('232bdt.jpg', 0.55627775),
 ('8fm8wn.jpg', 0.6622725),
 ('655.jpg', 0.7661105),
 ('6367.jpg', 0.54776454),
 ('2358.jpg', 0.5010291),
 ('a800gf.jpg', 0.7278174),
 ('2752.jpg', 0.

In [None]:
print(len(res))
res=[el[0] for el in res]
import json
with open("./check_watermarks","w") as file:
    json.dump(res,file)

48

In [None]:
# torch.save(model, './model.pt')

In [None]:
# import onnxruntime as onnx
# model.eval()
# model=model.to("cpu")
# x = torch.randn(1, 3, 512, 512, requires_grad=False).cpu()
# # torch_out = model(x)
# torch.onnx.export(model,                     # model being run
#                   x,                            # model input (or a tuple for multiple inputs)
#                   "model.onnx",              # where to save the model (can be a file or file-like object)
#                   export_params=True,           # store the trained parameter weights inside the model file
#                   opset_version=12,             # the ONNX version to export the model to
#                   do_constant_folding=True,     # whether to execute constant folding for optimization
#                   input_names = ['input'],      # the model's input names
#                   output_names = ['output'],
#                   dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
#                                 'output' : {0 : 'batch_size'}})    # the model's output names

verbose: False, log level: Level.ERROR



In [None]:
# import torch
# import onnxruntime
# import numpy as np

In [None]:
# import torch
# model = torch.load("model.pt")
device="cpu"
model=model.to(device)
model.eval()

# inp_arr = torch.randn(32,3,512,512)

img = read_img_file("./watermark.jpg")
# img.save("./orig_wat.jpg")
img=img.resize((512,512),Image.Resampling.LANCZOS)
img.save("./resized_watermark.jpg")
img = _transform(img).cuda()
img.unsqueeze_(0)
img = img.to(device)

with torch.no_grad():
    outputs = model.forward(img)
    outputs = torch.sigmoid(outputs).cpu().numpy()
print(outputs)
# with torch.no_grad():
#     out_1 = model(inp_arr)


# sess_options = onnxruntime.SessionOptions()
# sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
# sess_options.enable_cpu_mem_arena=False
# session = onnxruntime.InferenceSession("./model.onnx", sess_options, providers=['CPUExecutionProvider'])
# out_2 = session.run([], {'input':input_arr.numpy()})[0]

[[0.03666316]]
