**Importing Libraries**

In [3]:
# Install gdown and OpenCV
!pip install gdown
!pip install opencv-python


import os
import zipfile
import pathlib
import gdown
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**1. Download and Extract Dataset**

This section handles downloading the dataset from Google Drive and extracting it to the specified directory.

In [4]:

base_dir = '/content/drive/MyDrive/dk-dataset'
if not os.path.exists(base_dir):
    os.makedirs(base_dir)

zip_file_path = os.path.join(base_dir, 'dk-dataset.zip')

# Google Drive file ID
file_id = '1JWpW6JTdV__L-j18QU3wgGrcIdguHl8l'

# Construct the download URL
download_url = f'https://drive.google.com/uc?id={file_id}'

# Download the dataset
gdown.download(download_url, zip_file_path, quiet=False)

# Unzip the dataset
with zipfile.ZipFile(zip_file_path, 'r') as zf:
    zf.extractall(base_dir)

dataset_path = os.path.join(base_dir, 'dataset')

data_dir = pathlib.Path(dataset_path)

if data_dir.exists() and data_dir.is_dir():
    print("Data directory exists and is a directory.")
    print("Subdirectories in the dataset:")
    for sub_dir in os.listdir(data_dir):
        full_path = os.path.join(data_dir, sub_dir)
        if os.path.isdir(full_path) and not sub_dir.startswith('.'):
            print(f'  - {sub_dir}{os.path.sep}')
else:
    print(f"Error: The directory {data_dir} does not exist or is not a directory.")


Downloading...
From (original): https://drive.google.com/uc?id=1JWpW6JTdV__L-j18QU3wgGrcIdguHl8l
From (redirected): https://drive.google.com/uc?id=1JWpW6JTdV__L-j18QU3wgGrcIdguHl8l&confirm=t&uuid=bf5eda76-b11c-4c52-8ca0-69e9ae1b0c53
To: /content/drive/MyDrive/dk-dataset/dk-dataset.zip
100%|██████████| 210M/210M [00:12<00:00, 16.3MB/s]


Data directory exists and is a directory.
Subdirectories in the dataset:
  - train/
  - test/


In [5]:
# Define directories for training and testing

train_pos_dir = os.path.join(data_dir, 'train/positive')
train_neg_dir = os.path.join(data_dir, 'train/negative')
test_dir = os.path.join(data_dir, 'test')


**2. Data Preprocessing**

This section preprocesses the images for the watermark detection model. It includes loading the images, resizing them, normalizing pixel values, and splitting the data into training, validation, and test sets.

In [None]:


import os
import pathlib
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical


dataset_path = '/content/drive/MyDrive/dk-dataset/dataset'
data_dir = pathlib.Path(dataset_path)

# Define directories for training and testing
train_pos_dir = os.path.join(data_dir, 'train/positive')
train_neg_dir = os.path.join(data_dir, 'train/negative')
test_dir = os.path.join(data_dir, 'test')


def load_and_preprocess_image(image_path, img_size=(224, 224)):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Unable to load image at path: {image_path}")
    image = cv2.resize(image, img_size)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image / 255.0  # Normalize pixel values
    return image


def load_training_data():
    images = []
    labels = []

    # Positive images
    for filename in os.listdir(train_pos_dir):
        image_path = os.path.join(train_pos_dir, filename)
        try:
            image = load_and_preprocess_image(image_path)
            images.append(image)
            labels.append(1)  # Label for positive
        except Exception as e:
            print(f"Error processing {image_path}: {e}")


    for filename in os.listdir(train_neg_dir):
        image_path = os.path.join(train_neg_dir, filename)
        try:
            image = load_and_preprocess_image(image_path)
            images.append(image)
            labels.append(0)  # Label for negative
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    return np.array(images), np.array(labels)


def load_testing_data():
    images = []
    filenames = []

    for filename in os.listdir(test_dir):
        image_path = os.path.join(test_dir, filename)
        try:
            image = load_and_preprocess_image(image_path)
            images.append(image)
            filenames.append(filename)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    return np.array(images), filenames


X_train, y_train = load_training_data()
X_test, test_filenames = load_testing_data()


y_train = to_categorical(y_train, num_classes=2)

# Split training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")
print(f"Testing data shape: {X_test.shape}")


**3. Define and Compile the VGG16 Model**

This section sets up the VGG16 model for the watermark detection task, including adding custom layers and compiling the model.

In [None]:


import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.models import Model


base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


x = base_model.output
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(2, activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=predictions)


for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print(model.summary())


**4. Train the Model**

This section trains the model using the prepared training data and evaluates its performance on the validation set. It also plots training and validation accuracy and loss over the epochs.

In [None]:


import matplotlib.pyplot as plt
from data_preprocessing import X_train, X_val, y_train, y_val
from model import model


history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {val_accuracy:.4f}')


plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')


plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()


**5. Display Sample Images**

This section visualizes sample images from the training dataset to give an overview of the types of images used for training.

In [None]:


import os
import cv2
import matplotlib.pyplot as plt


train_pos_dir = '/content/drive/MyDrive/dk-dataset/dataset/train/positive'
train_neg_dir = '/content/drive/MyDrive/dk-dataset/dataset/train/negative'


def display_sample_images(dir_path, num_samples=10, has_watermark=True):
    files = os.listdir(dir_path)
    count = 0
    plt.figure(figsize=(12, 12))

    for filename in files:
        if count >= num_samples:
            break

        image_path = os.path.join(dir_path, filename)
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        plt.subplot(1, num_samples, count + 1)
        plt.imshow(image)
        plt.axis('off')
        title = 'Watermark' if has_watermark else 'No Watermark'
        plt.title(title)
        count += 1

# Display some positive images (with watermark)
print("Sample images with watermark:")
display_sample_images(train_pos_dir, num_samples=10, has_watermark=True)

# Display some negative images (without watermark)
print("\nSample images without watermark:")
display_sample_images(train_neg_dir, num_samples=10, has_watermark=False)


**6. Save the Trained Model**

This section saves the trained model to a file for future use.

In [None]:


from model import model


model_save_path = '/content/drive/MyDrive/dk-dataset/vgg16_watermark_detection_model.h5'


model.save(model_save_path)
print(f"Model saved to {model_save_path}")


**7. Load the Model and Make Predictions**

This section demonstrates how to load the saved model and use it to make predictions on new test images.

In [None]:


import os
import cv2
import numpy as np
from tensorflow.keras.models import load_model


model_load_path = '/content/drive/MyDrive/dk-dataset/vgg16_watermark_detection_model.h5'

model = load_model(model_load_path)


test_dir = '/content/drive/MyDrive/dk-dataset/dataset/test'


def load_and_preprocess_image(image_path, img_size=(224, 224)):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Unable to load image at path: {image_path}")
    image = cv2.resize(image, img_size)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image / 255.0
    return image


def predict_images(test_dir):
    filenames = []
    predictions = []
    for filename in os.listdir(test_dir):
        image_path = os.path.join(test_dir, filename)
        try:
            image = load_and_preprocess_image(image_path)
            image = np.expand_dims(image, axis=0)
            pred = model.predict(image)
            class_index = np.argmax(pred, axis=1)[0]
            class_label = 'Watermark' if class_index == 1 else 'No Watermark'
            filenames.append(filename)
            predictions.append(class_label)
        except Exception as e:
            print(f"Error processing {image_path}: {e}")

    return filenames, predictions


filenames, predictions = predict_images(test_dir)

# Print predictions
for filename, prediction in zip(filenames, predictions):
    print(f'{filename}: {prediction}')
