<a href="https://colab.research.google.com/github/oriakhan/NewRepo/blob/master/Steganography_using_GPT_2_Generated_Cover_Text_and_AES_256_Encryption_in_CBC_Mode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Tesseract and pytesseract on Google Colab
!apt-get update
!apt-get install tesseract-ocr
!apt-get install libtesseract-dev
!pip install pytesseract
!pip install transformers
!pip install pycryptodome
# Restart the runtime after installation (required for pytesseract to work)
import os
os.kill(os.getpid(), 9)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:


import os
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from PIL import Image
import pytesseract
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
import base64

# Function to generate cover text from image using GPT-2
def generate_cover_text_from_image(image_path, max_text_length):
    image = Image.open(image_path)
    extracted_text = pytesseract.image_to_string(image)

    model_name = "gpt2"
    tokenizer = GPT2Tokenizer.from_pretrained(model_name, pad_token="[PAD]")
    model = GPT2LMHeadModel.from_pretrained(model_name)

    # Convert the extracted text to a string representation
    extracted_text = extracted_text.strip().replace('\n', ' ')

    # Tokenize and add special tokens
    input_text = "This is an image: " + extracted_text
    inputs = tokenizer(input_text, add_special_tokens=True, return_tensors="pt")

    # Generate cover text using model.generate with full logits
    outputs = model.generate(inputs.input_ids, max_length=max_text_length + 100, num_return_sequences=1, temperature=0.7)

    cover_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return cover_text

# Function to generate a random 256-bit secret key and 128-bit IV for AES encryption
def generate_random_key_iv():
    key = os.urandom(32)
    iv = os.urandom(16)
    return key, iv

# Function to encrypt a secret message using AES-256 in CBC mode
def encrypt_message(secret_message, key, iv):
    # Create an AES-256 cipher in CBC mode with the key and IV
    cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
    encryptor = cipher.encryptor()

    # Add PKCS7 padding to the secret message
    padder = padding.PKCS7(128).padder()
    padded_data = padder.update(secret_message.encode()) + padder.finalize()

    # Encrypt the padded secret message
    ciphertext = encryptor.update(padded_data) + encryptor.finalize()
    return ciphertext

# Function to encode a secret message into stego text
def encode_secret_message(cover_text, secret_message, position):
    # Generate a random key and IV for encryption
    key, iv = generate_random_key_iv()

    # Encrypt the secret message using AES-256 with CBC mode
    ciphertext = encrypt_message(secret_message, key, iv)

    # Combine the ciphertext, key, and IV into stego_text using Base64 encoding
    ciphertext_text = base64.b64encode(ciphertext).decode()
    key_text = base64.b64encode(key).decode()
    iv_text = base64.b64encode(iv).decode()
    stego_text = (
        cover_text[:position]
        + ciphertext_text
        + key_text
        + iv_text
        + cover_text[position:]
    )

    return stego_text

# Function to extract the hidden secret message from stego text
def extract_secret_message(stego_text, position, secret_message_length):
    # Extract the hidden secret message from the stego text at the given position
    extracted_message = stego_text[position : position + secret_message_length]
    return extracted_message

# Function to decrypt a ciphertext using AES-256 in CBC mode
def decrypt_message(ciphertext, key, iv):
    # Ensure IV size is 16 bytes (128 bits) by padding if needed
    iv = iv + b'\x00' * (16 - len(iv))

    # Create the AES cipher object in CBC mode with the provided key and IV
    cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
    decryptor = cipher.decryptor()

    # Decrypt the ciphertext and remove PKCS7 padding
    decrypted_data = decryptor.update(ciphertext) + decryptor.finalize()

    # Create an unpadder for PKCS7 padding with 128 block size
    unpadder = padding.PKCS7(128).unpadder()
    unpadded_data = unpadder.update(decrypted_data) + unpadder.finalize()

    return unpadded_data
# Example usage
image_path = "/content/drive/MyDrive/basedata1/test/cover_img10.jpg"
max_text_length = 200
secret_message = "The secret message to hide."
position = 15

# Generate cover text from image
cover_text = generate_cover_text_from_image(image_path, max_text_length)

# Encode the secret message into stego text
stego_text = encode_secret_message(cover_text, secret_message, position)
print("Generated Stego Text:", stego_text)

# Extract the secret message from the stego text
extracted_message = extract_secret_message(stego_text, position, len(secret_message))
print("Extracted Secret Message:", extracted_message)
# ... (previous code)

# Decode the Base64 encoded key and IV
ciphertext_text = stego_text[position:position + 44]
key_text_start = position + 44
iv_text_start = key_text_start + 44

# Extract the key_text and iv_text from the stego_text
key_text = stego_text[key_text_start:iv_text_start]
iv_text = stego_text[iv_text_start:iv_text_start + 24]

# Decode the ciphertext, key, and IV from Base64
ciphertext = base64.b64decode(ciphertext_text.encode())
key = base64.b64decode(key_text.encode())
iv = base64.b64decode(iv_text.encode())

# Pad the key and IV to ensure their sizes are correct
key = key + b'\x00' * (32 - len(key))
iv = iv + b'\x00' * (16 - len(iv))

# Decrypt the extracted secret message
decrypted_message = decrypt_message(ciphertext, key, iv)

print("Decrypted Secret Message:", decrypted_message)


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Downloading model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Stego Text: This is an imagneOPX0HXHWFKbq2HET43msKru1ZUzPxgx2QFSH1bRJY=VxX4HiyJWFe8XhvFrR8J1IkwSlH0YxImPWVytYvELO4=nZ1e5A/BPZ2XVYj0SMZ+Og==e:  The image is a small, white rectangle with a small, white border.  The border is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with a small, white border.  The image is a small, white rectangle with