In [1]:
%pip install langchain openai langchain-openai pydantic pillow python-dotenv 

Note: you may need to restart the kernel to use updated packages.


In [8]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from typing import List, Optional
from PIL import Image
import base64
from io import BytesIO

# Load environment variables from .env file
load_dotenv()

class ImageAnalysisResult(BaseModel):
    main_objects: List[str] = Field(description="The primary objects in the image")
    additional_objects: List[str] = Field(description="Secondary or background objects in the image")
    colors: List[str] = Field(description="Dominant colors in the image")
    texture: Optional[str] = Field(description="Texture of the main object(s)")
    material: Optional[str] = Field(description="Material of the main object(s)")
    shape: Optional[str] = Field(description="Shape of the main object(s)")
    size_estimate: Optional[str] = Field(description="Estimated size of the main object(s)")
    condition: Optional[str] = Field(description="Condition of the item(s) in the image")
    brand: Optional[str] = Field(description="Brand name if visible in the image")
    style: Optional[str] = Field(description="Style or design of the main object(s)")
    functionality: Optional[str] = Field(description="Apparent functionality of the main object(s)")
    context: Optional[str] = Field(description="Context or setting of the image")
    image_quality: Optional[str] = Field(description="Quality of the image itself")

class OpenAIVisionProcessor:
    def __init__(self):
        self.chat_model = ChatOpenAI(
            model_name="gpt-4o",
            max_tokens=1000
        )
        self.parser = PydanticOutputParser(pydantic_object=ImageAnalysisResult)

    def encode_image(self, image_path):
        with Image.open(image_path) as img:
            buffered = BytesIO()
            img.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode('utf-8')

    def process_image(self, image_path: str) -> ImageAnalysisResult:
        base64_image = self.encode_image(image_path)
        
        human_message = HumanMessage(
            content=[
                {
                    "type": "text",
                    "text": f"Analyze this image in detail. {self.parser.get_format_instructions()}"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{base64_image}"
                    }
                }
            ]
        )

        ai_message = self.chat_model.invoke([human_message])
        return self.parser.parse(ai_message.content)
    
class ImageProcessingModule:
    def __init__(self, vision_processor):
        self.vision_processor = vision_processor

    def process_image(self, image_path: str) -> ImageAnalysisResult:
        return self.vision_processor.process_image(image_path)

class AnalysisCombiner:
    def __init__(self):
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
        self.prompt = ChatPromptTemplate.from_template(
            "You are tasked with combining multiple image analyses into a single, coherent summary. "
            "Focus on the most important and consistent information across all analyses. "
            "If there are discrepancies, mention them.\n\n"
            "Image Analyses:\n{analyses}\n\n"
            "Provide a combined summary of these analyses, highlighting the key features "
            "of the item(s) being given away for free."
        )
        self.chain = self.prompt | self.llm

    def combine_analyses(self, analyses):
        analyses_str = "\n\n".join([f"Analysis {i+1}:\n{analysis.json()}" for i, analysis in enumerate(analyses)])
        return self.chain.invoke({"analyses": analyses_str})

class PostGenerationService:
    def __init__(self):
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.7)
        self.generate_prompt = ChatPromptTemplate.from_template(
            "Given the following details about an item, create a concise and appealing post for giving it away for free:\n"
            "Item details: {item_details}\n"
            "Generate a title and description for the post."
        )
        self.generate_chain = self.generate_prompt | self.llm

        self.incorporate_prompt = ChatPromptTemplate.from_template(
            "Original post: {original_post}\n\n"
            "User input: {user_input}\n\n"
            "Please modify the original post to incorporate the user's input while maintaining "
            "the overall structure and appeal of the post. If the user input contradicts the "
            "original post, prioritize the user's information."
        )
        self.incorporate_chain = self.incorporate_prompt | self.llm

    def generate_post(self, item_details):
        return self.generate_chain.invoke({"item_details": item_details})

    def incorporate_user_input(self, original_post, user_input):
        return self.incorporate_chain.invoke({
            "original_post": original_post,
            "user_input": user_input
        })

class PostOptimizationService:
    def __init__(self):
        self.llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
        self.prompt = ChatPromptTemplate.from_template(
            "Optimize the following post for a free item to maximize engagement and clarity:\n"
            "Original post: {original_post}\n"
            "Provide an optimized version of the title and description."
        )
        self.chain = self.prompt | self.llm

    def optimize_post(self, original_post):
        return self.chain.invoke({
            "original_post": original_post
        })

class Curbd:
    def __init__(self):
        self.image_processor = ImageProcessingModule(OpenAIVisionProcessor())
        self.analysis_combiner = AnalysisCombiner()
        self.post_generator = PostGenerationService()
        self.post_optimizer = PostOptimizationService()

    def process_images_and_generate_post(self, image_paths, user_input=None):
        # Process all images
        image_analyses = [self.image_processor.process_image(path) for path in image_paths]
        
        # Combine image analyses
        combined_analysis = self.analysis_combiner.combine_analyses(image_analyses)
        
        # Generate initial post
        initial_post = self.post_generator.generate_post(combined_analysis)
        
        # Optimize post
        optimized_post = self.post_optimizer.optimize_post(initial_post)
        
        # Incorporate user input if provided
        if user_input:
            final_post = self.post_generator.incorporate_user_input(optimized_post, user_input)
        else:
            final_post = optimized_post
        
        return final_post
 

    
app = Curbd()
image_paths = ["craigslist_dataset/item_0.jpg", "craigslist_dataset/item_1.jpg"]
final_post = app.process_images_and_generate_post(image_paths)
print(final_post)

content="Title: Free Vintage Sofa and Traditional Chair - Perfect for Your Home!\n\nDescription: Don't miss out on this opportunity to elevate your seating area with a beautiful vintage-style sofa and a traditional chair, both in good condition and ready to add charm to your space. The large sofa is ideal for outdoor relaxation with its curved edges and earthy tones, while the medium-sized chair features a classic design with a smooth wood texture and comfortable upholstery. Get your hands on these high-quality items for free and enhance your home decor today!" response_metadata={'token_usage': {'completion_tokens': 104, 'prompt_tokens': 265, 'total_tokens': 369}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-c2591b25-517c-4e5e-8e47-5b9eb617e973-0' usage_metadata={'input_tokens': 265, 'output_tokens': 104, 'total_tokens': 369}
