In [None]:
# Install Kaggle API if not already installed
!pip install -q kaggle

# Upload your kaggle.json file
from google.colab import files
files.upload()  # Upload kaggle.json here

# Move kaggle.json to the correct location and set permissions
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the dataset from Kaggle
# Replace 'username/dataset-name' with the actual Kaggle dataset name
# !kaggle datasets download -d denislukovnikov/ffhq256-images-only -p /content

# Unzip the dataset
import zipfile
import os

dataset_zip_path = '/content/ffhq256-images-only.zip'  # Change this to the actual downloaded file name
dataset_extracted_dir = '/content/dataset_images'

if not os.path.exists(dataset_extracted_dir):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_zip_path, 'r') as zip_ref:
        zip_ref.extractall(dataset_extracted_dir)
    print("Dataset extracted.")
else:
    print("Dataset already extracted.")

# Now the dataset is available; continue with image processing code

import PIL
import torch
import matplotlib.pyplot as plt
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler

# Initialize the model pipeline
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
pipe.to("cuda")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

# Define output zip paths for storing original and modified images
original_zip_path = '/content/original_images.zip'
modified_zip_path = '/content/modified_images.zip'

# Function to load a local image
def load_image_from_local(path):
    image = PIL.Image.open(path)
    image = PIL.ImageOps.exif_transpose(image)  # Handle orientation based on EXIF data
    image = image.convert("RGB")                # Convert to RGB format if needed
    return image

# Define prompt for style transfer
prompt = "vincent van gogh style"

# Process and save images
processed_images = []
max_images = 1000  # Adjust max_images if needed
count = 0

# Open zip files to save images
with zipfile.ZipFile(original_zip_path, 'w') as original_zip, zipfile.ZipFile(modified_zip_path, 'w') as modified_zip:
    for dirname, _, filenames in os.walk(dataset_extracted_dir):
        for filename in filenames:
            if count >= max_images:
                break

            image_path = os.path.join(dirname, filename)
            print(f"Number {count}, Processing image: {image_path}")

            # Load the original image
            original_image = load_image_from_local(image_path)

            # Save the original image to the zip file
            with original_zip.open(filename, 'w') as img_file:
                original_image.save(img_file, format='PNG')

            # Apply style transfer to the image
            modified_images = pipe(prompt, image=original_image).images
            modified_image = modified_images[0]

            # Save the modified image to the modified zip file
            with modified_zip.open(filename, 'w') as img_file:
                modified_image.save(img_file, format='PNG')

            # Store the modified image for display
            processed_images.append(modified_image)

            count += 1

        if count >= max_images:
            break

print("Processing complete. Zipped files saved at:")
print(f"- Original images: {original_zip_path}")
print(f"- Modified images: {modified_zip_path}")
