In [1]:
from PIL import Image, ImageEnhance
import random
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import tensorflow as tf
import json
import argparse
import ipfshttpclient
import hashlib
import faiss

os.environ["CUDA_VISIBLE_DEVICES"] = "1"
sys.path.append("..")

from demo.image_similarity_keras.model import SiameseModel

In [2]:
def cid_to_int(cid):
    return int(hashlib.sha256(cid.encode()).hexdigest(), 16) % (2**31 - 1)

In [3]:
# Define image generation with brightness
def adjust_brightness(image, brightness_factor):
    enhancer = ImageEnhance.Brightness(image)
    return enhancer.enhance(brightness_factor)

# Define image generation with distortion
def apply_distortion(image, distortion_scale):
    width, height = image.size
    x_scale = np.random.uniform(0, distortion_scale)
    y_scale = np.random.uniform(0, distortion_scale)
    x_distortion = np.random.uniform(-x_scale, x_scale, width)
    y_distortion = np.random.uniform(-y_scale, y_scale, height)
    mesh_x, mesh_y = np.meshgrid(np.arange(width), np.arange(height))
    mesh_x_distorted = np.clip(mesh_x + x_distortion, 0, width - 1).astype(int)
    mesh_y_distorted = np.clip(mesh_y + y_distortion, 0, height - 1).astype(int)
    distorted_image = np.array(image)
    for c in range(distorted_image.shape[2]):
        distorted_image[:, :, c] = distorted_image[mesh_y_distorted, mesh_x_distorted, c]
    return Image.fromarray(distorted_image)

# Define image generation with resizing
def apply_resize(image, size):
    return image.resize(size)

In [6]:
def set_plagiarism_images(source_path, target_path, model, index, client):
    os.makedirs(target_path, exist_ok=True)
    image_list = os.listdir(source_path)
    random.shuffle(image_list)
    check_num = 0
    
    for i in range(len(image_list)):
        if check_num == 100:
            break
            
        orig_img_path = os.path.join(source_path, image_list[i])
        original_image = Image.open(orig_img_path)
        plagiarism_type = random.randint(0, 3)
        if plagiarism_type == 0:
            brightness_factor = random.uniform(1, 2)
            plagiarism_image = adjust_brightness(original_image, brightness_factor)
        elif plagiarism_type == 1:
            distortion_scale = random.uniform(1, 2)
            plagiarism_image = apply_distortion(original_image, distortion_scale)
        elif plagiarism_type == 2:
            rotation_angle = random.uniform(-15, 15)
            plagiarism_image = original_image.rotate(rotation_angle)
        else:
            target_size = (128, 128)
            plagiarism_image = apply_resize(original_image, target_size)
            
        test_img = plagiarism_image.convert("RGB")
        test_img = test_img.resize((224, 224))
        test_img = tf.keras.preprocessing.image.img_to_array(test_img) / 255.0
        test_img_embs = model.predict(tf.expand_dims(test_img, axis=0))
        distances, indices = index.search(np.array(test_img_embs), index.ntotal)
        cid = cid_to_int(client.files.stat("/" + orig_img_path)['Hash'])
        if cid == indices[0][0]:
            check_num += 1
            print(check_num)
            plagiarism_image.save(os.path.join(target_path, image_list[i]))

In [5]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass

model_path = "../demo/models/ConvNext_Large_64b_100ep_final"
augmentation_config = "../demo/configs/default_augmentation.json"

# Load model config
with open(os.path.join(model_path, "configs.json"), "r") as f:
    model_config = json.load(f)

    # Convert to Namespace
    model_config_ns = argparse.Namespace(**model_config)

# Load augmentation config
with open(augmentation_config, "r") as f:
    augmentation_config = json.load(f)
    
# Convert model_config dictionary to a namespace
model_config_ns = argparse.Namespace(**model_config)

# Get the image_size from model_config or use default value if missing
default_image_size = 224
image_size = model_config.get('image_size', default_image_size)

# Initialize model
model = SiameseModel(**model_config)

# Build and compile model
model.build(False)

# Load weights
model.model.load_weights(os.path.join(model_path, "weights"))

client = ipfshttpclient.connect(timeout=300)

index = faiss.read_index('base.index')

2023-09-12 00:37:20.164968: I tensorflow/core/platform/cpu_feature_guard.cc:152] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-12 00:37:20.636961: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22066 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:5e:00.0, compute capability: 8.6


Model: "siamese_ConvNext_Large"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ConvNext_Large (KerasLayer)  (None, 1536)             196230336 
                                                                 
 dense (Dense)               (None, 512)               786944    
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 out_emb (Dense)             (None, 128)               32896     
                                                                 
 l2_norm (Lambda)            (None, 128)               0         
                                                                 
Total params: 197,181,504
Trainable params: 951,168
Non-trainable params: 196,230,336
_________________________________________________________________


In [7]:
set_plagiarism_images("nft_images/azuki", "nft_images/azuki_plagiarism", model, index, client)

2023-09-12 00:37:54.737509: I tensorflow/stream_executor/cuda/cuda_dnn.cc:379] Loaded cuDNN version 8400
2023-09-12 00:37:56.119021: I tensorflow/stream_executor/cuda/cuda_blas.cc:1804] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [8]:
set_plagiarism_images("nft_images/bayc", "nft_images/bayc_plagiarism", model, index, client)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [9]:
set_plagiarism_images("nft_images/cryptopunks", "nft_images/cryptopunks_plagiarism", model, index, client)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
