In [2]:
import hashlib
import csv

PRIVATE_KEY = "WmKz8n5q@3!TgLzX7&bV#Pf9"

# Parameters
GAMMA = 0.4  # Fraction of tuples to be marked
L = 2        # Number of least significant bits available for marking

def F(value):
    inner_hash = hashlib.sha256((PRIVATE_KEY + str(value)).encode()).hexdigest()
    outer_hash = hashlib.sha256((PRIVATE_KEY + inner_hash).encode()).hexdigest()
    return int(outer_hash, 16)  # Convert hash to integer

def mark(primary_key, attribute_value, bit_index):
    first_hash = F(primary_key)  # Compute the hash for deciding bit value
    bit_value = 0 if first_hash % 2 == 0 else 1  # Decide bit value (even -> 0, odd -> 1)
    # Modify the attribute's least significant bit
    mask = 1 << bit_index
    attribute_value = (attribute_value & ~mask) | (bit_value << bit_index)
    return attribute_value

def insert_watermark(input_file, output_file, has_header=True, primary_key_index=0):
    database = []

    with open(input_file, newline="") as csvfile:
        reader = csv.reader(csvfile)
        if has_header:
            header = next(reader)

        for row in reader:
            try:
                # Convert all except primary key to integers
                primary_key = row[primary_key_index]
                attributes = list(map(int, row[:primary_key_index] + row[primary_key_index + 1:]))
                database.append((primary_key, attributes))
            except ValueError:
                continue  # Skip invalid rows

    marked_count = 0
    total_tuples = len(database)

    for primary_key, attributes in database:
        if F(primary_key) % int(1 / GAMMA) == 0:  # Decide if tuple should be marked
            attr_index = F(primary_key) % len(attributes)  # Choose an attribute to mark
            bit_index = F(primary_key) % L  # Choose a bit position to mark
            attributes[attr_index] = mark(primary_key, attributes[attr_index], bit_index)
            marked_count += 1

    print(f"Marked {marked_count} out of {total_tuples} tuples.")

    # Write watermarked database to CSV
    with open(output_file, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        if has_header:
            writer.writerow(header)  # Write header back
        for primary_key, attributes in database:
            writer.writerow([primary_key] + attributes)


input_csv = "input.csv"
output_csv = "watermarked_output.csv"

insert_watermark(input_csv, output_csv, has_header=True, primary_key_index=0)
print(f"Watermarked data saved to {output_csv}")


Marked 4 out of 7 tuples.
Watermarked data saved to watermarked_output.csv


In [4]:

# Function to extract the bit at bit_index from a number
def get_bit(num, bit_index):
    return (num >> bit_index) & 1

def detect_watermark(input_file, has_header=True, primary_key_index=0, threshold=0.5):
    database = []

    with open(input_file, newline="") as csvfile:
        reader = csv.reader(csvfile)
        if has_header:
            header = next(reader)
        for row in reader:
            try:
                primary_key = row[primary_key_index]
                # Convert all attributes (except primary key) to integers
                attributes = list(map(int, row[:primary_key_index] + row[primary_key_index+1:]))
                database.append((primary_key, attributes))
            except ValueError:
                continue  # Skip rows with conversion errors

    total_marked = 0
    matching_bits = 0

    for primary_key, attributes in database:
        # Check if this tuple was marked (same condition as insertion)
        if F(primary_key) % int(1 / GAMMA) == 0:
            total_marked += 1
            # Determine the attribute and bit index used during insertion
            attr_index = F(primary_key) % len(attributes)
            bit_index = F(primary_key) % L
            expected_bit = 0 if F(primary_key) % 2 == 0 else 1
            actual_bit = get_bit(attributes[attr_index], bit_index)
            if actual_bit == expected_bit:
                matching_bits += 1

    if total_marked == 0:
        print("No marked tuples found.")
        return False

    match_ratio = matching_bits / total_marked
    print(f"Marked tuples: {total_marked}, Matching bits: {matching_bits}, Ratio: {match_ratio:.2f}")

    if match_ratio >= threshold:
        print("Watermark detected!")
        return True
    else:
        print("Watermark not detected.")
        return False

detect_watermark(output_csv, has_header=True, primary_key_index=0, threshold=0.5)


Marked tuples: 4, Matching bits: 2, Ratio: 0.50
Watermark detected!


True