In [17]:
import os
import pandas as pd
import openai
from google.cloud import vision
import io
import time
from dotenv import load_dotenv

In [18]:

# Set up your OpenAI API key

load_dotenv('openai.env')

# Accessing the API key
openai.api_key = os.getenv("OAI_KEY")


In [19]:

# Set up your Google Cloud Vision API client
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/njlalwani/Downloads/fiery-rarity-430115-b5-5cad704232f1.json'
client = vision.ImageAnnotatorClient()


I0000 00:00:1722076350.795137 5804692 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported


In [20]:
# Extract Lables from Vision API

def get_image_labels(image_path):
    """Use Google Vision API to get labels from the image."""
    try:
        with io.open(image_path, 'rb') as image_file:
            content = image_file.read()

        image = vision.Image(content=content)
        response = client.label_detection(image=image)
        labels = response.label_annotations

        return [label.description for label in labels]
    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        return []


In [28]:

def generate_prompt_from_labels(labels, code, color):
    """Generate a prompt for GPT-4 based on image labels."""
    prompt = (
    f"Product Code: {code}\n"
    f"Product Color: {color}\n"
    f"Image Labels: {', '.join(labels)}\n\n"
    "Generate the following:\n"
    "- Long Description:: Information-rich description of the dress (around 50 words, no style codes)\n"
    "- Short Description:: Concise 20-word artsy description for catalog display (no style codes)\n"
    "- Keywords:: Specific terms users might search for to find this product (no style codes)\n"
    "- Tags:: Single-word info for filtering purposes (e.g., color, length, no style codes)\n\n"
    "Stick to this format and avoid ':' in text use :: because it is the seprator" 
)

    return prompt


In [22]:

def generate_product_info(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=250
    )

    response_text = response.choices[0].message['content'].strip()
    print("API Response:", response_text)  # Debugging line

    # Initialize default values
    long_description = short_description = keywords = tags = "N/A"
    
    try:
        # Split the response into lines for parsing
        response_lines = response_text.split('\n')
        
        # Extract relevant information based on expected format
        for line in response_lines:
            if 'Long Description' in line:
                long_description = line.split(':: ', 1)[1] if ':: ' in line else "N/A"
            elif 'Short Description' in line:
                short_description = line.split(':: ', 1)[1] if ':: ' in line else "N/A"
            elif 'Keywords' in line:
                keywords = line.split(':: ', 1)[1] if ':: ' in line else "N/A"
            elif 'Tags' in line:
                tags = line.split(':: ', 1)[1] if ':: ' in line else "N/A"
    except Exception as e:
        print(f"Error parsing response: {e}")
    
    return long_description, short_description, keywords, tags

In [30]:

def process_images(image_directory, output_csv, batch_size=10):
    """Process images in the directory and save product information to a CSV file."""
    # Load existing data if the CSV exists
    if os.path.exists(output_csv):
        df = pd.read_csv(output_csv)
    else:
        df = pd.DataFrame(columns=['Code','Color', 'Long Description', 'Short Description', 'Keywords', 'Tags','Filename'])

    # Get list of image files with num=1 in filename
    image_files = [f for f in os.listdir(image_directory) if f.endswith(('.jpg', '.png')) and '_1.' in f]

    start_index = 0
    while start_index < len(image_files):
        batch_files = image_files[start_index:start_index + batch_size]
        new_data = []
        
        for image_file in batch_files:
            try:
                image_path = os.path.join(image_directory, image_file)
                code, color, num_ext = image_file.split('_')[0], image_file.split('_')[1], image_file.split('_')[2]
                num, ext = num_ext.split('.')
                
                # Extract labels
                labels = get_image_labels(image_path)

                # Generate GPT-4 prompt and get product info
                prompt = generate_prompt_from_labels(labels, code, color)
                long_description, short_description, keywords, tags = generate_product_info(prompt)

                # Add to new_data list
                new_data.append({
                    'Code': code,
                    'Color': color,
                    'Long Description': long_description,
                    'Short Description': short_description,
                    'Keywords': keywords,
                    'Tags': tags,
                    'Filename': image_file
                })
            
            except IndexError:
                print(f"IndexError occurred for file: {image_file}. Reprocessing...")
                continue  # Skip this file and continue to the next one
            
            except Exception as e:
                print(f"Error processing file {image_file}: {str(e)}")
                continue  # Skip this file and continue to the next one
        
        # Concatenate new data to the existing DataFrame
        df = pd.concat([df, pd.DataFrame(new_data)], ignore_index=True)

        # Save intermediate results to CSV
        df.to_csv(output_csv, index=False)
        
        print(f"Processed batch {start_index // batch_size + 1} / {len(image_files) // batch_size + 1}")

        # Pause mechanism (optional)
        user_input = input("Press Enter to continue to the next batch or type 'pause' to pause: ")
        if user_input.lower() == 'pause':
            print("Pausing... Type 'resume' to continue or 'reprocess' to reprocess the previous batch.")
            resume = False
            while not resume:
                response = input().lower()
                if response == 'resume':
                    resume = True
                elif response == 'reprocess':
                    start_index = max(0, start_index - batch_size)  # Move back one batch
                    break
                else:
                    print("Invalid input. Type 'resume' to continue or 'reprocess' to reprocess the previous batch.")
        
        start_index += batch_size

    print("Processing complete!")




In [31]:

# Directory containing your images
image_directory = '/Users/njlalwani/Documents/GitHub/Ecomm-data/photo_scripts/shopify_images'
output_csv = 'product_info.csv'
batch_size = 200

process_images(image_directory, output_csv, batch_size)



API Response: - Long Description:: This elegant navy dress showcases a beautifully designed one-piece silhouette. It hugs the body from neck to waist, subtly accentuating the curves. With an azure tone and superb fit at the joint, arm, and shoulder, it promises a fashion-forward statement.

- Short Description:: Sophisticated navy dress, perfectly fitted, embraces curves, enhancing arm, shoulder, and waist.

- Keywords:: Navy dress, one-piece silhouette, neck to waist fitting, shoulder and arm accentuating, azure tone, fashion-forward dress.

- Tags:: Navy, One-piece, Fitted, Azure, Elegant, Fashion-forward.
API Response: - Long Description:: This is a chic royal azure dress that perfectly covers your body from neck to joint. Styled with a shoulder and arm-baring design, this one-piece garment offers a striking blend of comfort and fashion. Ideal for those who Savor elegance with a hint of whimsy in their wardrobe.

- Short Description:: Elegantly designed royal dress perfect for fashi

In [None]:
# Milano Formals "TYPE" "CODE"