<a href="https://colab.research.google.com/github/oriakhan/NewRepo/blob/master/Seamless_Integration_of_DNA_Based_Steganography_and_Cryptographic_Methods_for_Enhanced_Data_Security_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Tesseract and pytesseract on Google Colab
!apt-get update
!apt-get install tesseract-ocr
!apt-get install libtesseract-dev
!pip install pytesseract
!pip install transformers
!pip install pycryptodome
!pip install torch
!pip install biopython
!pip install --upgrade cryptography
# Restart the runtime after installation (required for pytesseract to work)
import os
os.kill(os.getpid(), 9)

In [None]:
from cryptography.fernet import Fernet
from transformers import BertTokenizer, BertForMaskedLM
import torch

def convert_to_binary(data):
    if data is None:
        return []
    binary_data = [int(bit) for byte in data for bit in '{:08b}'.format(byte)]
    return binary_data

def generate_dna_sequence(binary_data):
    dna_sequence = ""
    for i in range(0, len(binary_data), 2):
        nucleotide = ""
        if binary_data[i] == 0 and binary_data[i + 1] == 0:
            nucleotide = "A"
        elif binary_data[i] == 0 and binary_data[i + 1] == 1:
            nucleotide = "C"
        elif binary_data[i] == 1 and binary_data[i + 1] == 0:
            nucleotide = "G"
        elif binary_data[i] == 1 and binary_data[i + 1] == 1:
            nucleotide = "T"
        dna_sequence += nucleotide
    return dna_sequence

def decode_dna_sequence(dna_sequence):
    binary_data = []
    for nucleotide in dna_sequence:
        if nucleotide == "A":
            binary_data.extend([0, 0])
        elif nucleotide == "C":
            binary_data.extend([0, 1])
        elif nucleotide == "G":
            binary_data.extend([1, 0])
        elif nucleotide == "T":
            binary_data.extend([1, 1])
    return binary_data

def embed_dna_sequence(dna_sequence, text, model_name="bert-base-uncased"):
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForMaskedLM.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt")
    input_ids = inputs["input_ids"]

    mask_indices = torch.where(input_ids == tokenizer.mask_token_id)
    if len(mask_indices[0]) == 0:
        raise ValueError("No [MASK] tokens found in the input text.")

    num_mask_tokens = len(mask_indices[0])
    num_dna_tokens_needed = num_mask_tokens - len(dna_sequence)

    # Repeat the DNA sequence to cover all [MASK] tokens
    repeated_dna_sequence = dna_sequence * ((num_dna_tokens_needed + len(dna_sequence) - 1) // len(dna_sequence))

    # Ensure that the repeated DNA sequence is long enough
    repeated_dna_sequence += dna_sequence[:num_mask_tokens - len(repeated_dna_sequence)]

    filled_ids = input_ids.clone()
    for i, index in enumerate(mask_indices[0]):
        dna_token_id = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(repeated_dna_sequence[i]))
        filled_ids[0, index] = dna_token_id[0]

    with torch.no_grad():
        embeddings = model.bert.embeddings.word_embeddings(filled_ids)

    avg_embedding = torch.mean(embeddings, dim=1)
    return avg_embedding

def extract_original_data(binary_data):
    original_data = []
    for i in range(0, len(binary_data), 8):
        byte = binary_data[i:i+8]
        byte_value = int(''.join(map(str, byte)), 2)
        original_data.append(byte_value)
    return bytes(original_data)


# Step 1: Encryption
key = Fernet.generate_key()
f = Fernet(key)
message = b"A really secret message. Not for prying eyes."
encrypted_token = f.encrypt(message)
print("encrypted_token:", encrypted_token)
# Step 2: Conversion to Binary
binary_data = convert_to_binary(encrypted_token)
print("binary_data ", binary_data )
# Step 3: Generating DNA Sequence
dna_sequence = generate_dna_sequence(binary_data)
print("dna_sequence ", dna_sequence )
# Step 4: Embedding DNA Sequence into Text
text = "This is a [MASK] text for DNA embedding."
model_name = "bert-base-uncased"
avg_embedding = embed_dna_sequence(dna_sequence, text, model_name)
print("avg_embedding  ", avg_embedding  )
# Step 5: Decoding DNA Sequence
decoded_binary_data = decode_dna_sequence(dna_sequence)
print("decoded_binary_data  ", decoded_binary_data  )
# Step 6: Extracting Original Data
extracted_original_data = extract_original_data(decoded_binary_data)
print("extracted_original_data ", extracted_original_data  )
# Step 7: Decryption
decrypted_message = f.decrypt(extracted_original_data)

# Print results
print("Original Message:", message.decode())
print("Decrypted Message:", decrypted_message.decode())