In [1]:
import openai
import pandas as pd
import time
import os
import base64
from dotenv import load_dotenv
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

In [2]:
# Set up OpenAI client with your API key
client = openai.OpenAI(api_key=openai_api_key)

# Function to serialize image to base64
def serialize_image(image_path):
    """Convert an image to a base64-encoded string."""
    with open(image_path, "rb") as img_file:
        img_bytes = img_file.read()
        return base64.b64encode(img_bytes).decode("utf-8")  # Convert bytes to UTF-8 string

# Function to send image to OpenAI GPT-4 Turbo with Vision
def analyze_image(image_path):
    """Send base64-encoded image to OpenAI API for vision-based inference."""
    base64_image = serialize_image(image_path)  # Convert image to base64
    try:
        client = openai.OpenAI(api_key=openai_api_key)  # Replace with your API key

        response = client.chat.completions.create(
            model="gpt-4o",  # Vision-enabled GPT-4 model
            messages=[
                {"role": "system", "content": "Describe the image content in detail."},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "What is in this image?"},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                    ],
                },
            ],
            temperature=0.5,
            max_tokens=300
        )

        return response.choices[0].message.content.strip()  # Extract the AI's response

    except Exception as e:
        print(f"Error: {e}")
        return None

# Example Usage
image_path = "ex1.jpg"  # Path to your image file
summary = analyze_image(image_path)
print("Image Analysis Summary:", summary)


Image Analysis Summary: The image depicts a hand applying a force \( F \) to a wrench. The wrench is shown with a pivot point labeled \( O \) and a distance \( d \) from the pivot to where the force is applied. The wrench is being rotated around the pivot point \( O \) in a circular motion, indicated by the blue arrow around the axis labeled \( z \). This setup illustrates the concept of torque, where the force applied at a distance from the pivot causes rotation.


In [4]:
from io import BytesIO
from PIL import Image

# Your Base64 string
base64_image = serialize_image(image_path)

# Decode and open image
image_data = base64.b64decode(base64_image)
image = Image.open(BytesIO(image_data))
image.show()

In [13]:
import pickle
import base64
import openai


def serialize_object(obj):
    """Serialize an object using pickle and encode it as a base64 string."""
    obj_bytes = pickle.dumps(obj)  # Convert object to bytes
    return base64.b64encode(obj_bytes).decode('utf-8')  # Encode as base64 string

# Function to get summary from OpenAI API
def get_summary(text):
    try:
        response = client.chat.completions.create(
            model="gpt-4",  # Use "gpt-3.5-turbo" if needed
            messages=[
                {"role": "system", "content": "I have a tabular content, plz describe the contents."},
                {"role": "user", "content": text}
            ],
            temperature=0.5,
            max_tokens=300
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error: {e}")
        return None

# Example object (could be a dictionary, DataFrame, etc.)
my_dict = {"key1": "value1", "key2": "value2"}

# Step 1: Serialize the object
base64_str = serialize_object(my_dict)

# Step 2: Send the base64 string to GPT-4 for processing
bytes_data = base64.b64decode(base64_str)
obj = pickle.loads(bytes_data)
gpt_response = get_summary(obj)

# Print the response from GPT-4
print("GPT-4's response:", gpt_response)


Error: Error code: 400 - {'error': {'message': "Invalid type for 'messages[1].content': expected one of a string or array of objects, but got an object instead.", 'type': 'invalid_request_error', 'param': 'messages[1].content', 'code': 'invalid_type'}}
GPT-4's response: None


In [20]:
from pdf2image import convert_from_path
from PIL import Image
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter

# Path to your multi-column PDF
pdf_path = './data/QA/chap_1_QA.pdf'

# Convert PDF to images (one per page)
pages = convert_from_path(pdf_path, 300)  # 300 DPI for better accuracy

# List to hold new pages (images)
new_pages = []

# Process each page
for page_number, page in enumerate(pages):
    width, height = page.size
    
    # Define the left and right halves of the page (vertically split)
    left_half = page.crop((0, 0, width // 2, height))  # Left half
    right_half = page.crop((width // 2, 0, width, height))  # Right half
    
    # Save the left and right halves as separate new images
    left_image_path = f"left_page_{page_number + 1}.png"
    right_image_path = f"right_page_{page_number + 1}.png"
    
    left_half.save(left_image_path)
    right_half.save(right_image_path)
    
    # Append image paths to new_pages (for later inclusion in PDF)
    new_pages.append(left_image_path)
    new_pages.append(right_image_path)

# Create a new PDF with the new images (left and right halves)
output_pdf_path = './data/QA/new_chap_1_QA.pdf'
c = canvas.Canvas(output_pdf_path, pagesize=letter)

for image_path in new_pages:
    c.drawImage(image_path, 0, 0, width=400, height=600)  # Adjust the width/height as needed
    c.showPage()  # Move to the next page

c.save()

print(f"New PDF saved at {output_pdf_path}")


New PDF saved at ./data/QA/new_chap_1_QA.pdf
