# After using the previous code to test ideas we can draft a better list with a better prompt

We need more information about each sample. Each sample has a label with a Brand e.g. Pollack, Opuzen, ZAK+FOX - a product name e.g. Bedford, Manu, Voyager - a product code e.g. 4185-01, #4214-4001, ZFVOYWN-01 - and sometimes a color and some written notes. Please create a comma seperated file listing in the columns Brand, product name, product code, color, notes for each sample found, group the rows by brand. If you can not find an item for a column enter unknown. Here are the three images.

<img src="images/250513 LDK Back.jpg" style="height:200px;"> <img src="images/IMG_4795.jpeg" style="height:200px;"> <img src="images/IMG_4797.jpeg" style="height:200px;">

# Open the CSV file that ChatGPT produces using VS Code

# This is helpful but needs editing. Edited version below.

# We can make this better later. Lets move on to find these images.

# Ask ChatGPT to write a program

Write a python script to run in a Jupyter notebook that reads the materials.csv file with Brand, product name, product code, color and hand written notes, looks for and downloads the brand image, looks for and downloads an image of that material (using a combination of brand, product name, product code and color when available and not unknown) saving the material images in a directory called images with names that are a unique combination of brand and product name. Then prints an image of the fabric in the left hand column of a two column table and in the right hand column of that table print the brand image above on one line each the product name, product number, color, and any hand written notes. Output this table in a PDF document.

# Try again

Write a python script to run in a Jupyter notebook that reads the materials.csv file with Brand, product name, product code, color and hand written notes, looks for and downloads the brand image, looks for and downloads an image of that material (using a combination of brand, product name, product code and color when available and not unknown) saving the material images in a directory called images with names that are a unique combination of brand and product name. Then prints an image of the fabric in the left hand column of a two column table and in the right hand column of that table print the brand image above on one line each the product name, product number, color, and any hand written notes. Output this table in a PDF document. Use a web scraping approach using Python BeautifulSoup libraries.

In [None]:
!pip install requests beautifulsoup4 fpdf pillow pandas

Should be either Requirements already satisfied or

Successfully installed fpdf-1.7.2 numpy-2.3.1 pandas-2.3.1 pillow-11.3.0 pytz-2025.2 tzdata-2025.2

In [None]:
import pandas as pd
import os
import requests
from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
from fpdf import FPDF
import urllib.parse
import time

# === SETTINGS ===
csv_path = 'data/materials.csv'
image_dir = 'data/images/part3/'
os.makedirs(image_dir, exist_ok=True)
HEADERS = {"User-Agent": "Mozilla/5.0"}

# === REMOVE NONE LATIN CHARACTERS ===
def sanitize_text(text):
    if not isinstance(text, str):
        return ""
    return text.encode('latin-1', 'replace').decode('latin-1')

# === SEARCH FUNCTION (GOOGLE IMAGES) ===
def google_image_search(query):
    search_url = f"https://www.google.com/search?tbm=isch&q={urllib.parse.quote_plus(query)}"
    try:
        resp = requests.get(search_url, headers=HEADERS)
        soup = BeautifulSoup(resp.text, 'html.parser')
        images = soup.find_all('img')
        for img in images:
            src = img.get('src')
            if src and src.startswith('http'):
                return src
    except Exception as e:
        print(f"Error searching for '{query}': {e}")
    return None

# === DOWNLOAD FUNCTION ===
def download_image(url, save_path):
    try:
        response = requests.get(url, headers=HEADERS, timeout=10)
        img = Image.open(BytesIO(response.content)).convert("RGB")
        img.save(save_path)
        return True
    except Exception as e:
        print(f"Download failed for {url}: {e}")
        return False

# === LOAD CSV ===
df = pd.read_csv(csv_path)

# === BRAND LOGO CACHE ===
brand_logos = {}

records = []
for idx, row in df.iterrows():
    brand = str(row['Brand']).strip()
    product_name = str(row['Product Name']).strip()
    product_code = str(row['Product Code']).strip()
    color = str(row['Color']).strip()
    notes = str(row['Notes']).strip()

    # Filenames
    safe_name = f"{brand}_{product_name}".replace(" ", "_").replace("/", "-")
    material_img_path = os.path.join(image_dir, f"{safe_name}_material.jpg")
    brand_logo_path = os.path.join(image_dir, f"{brand}_logo.jpg")

    # Brand logo
    if brand not in brand_logos:
        print(f"Searching logo for {brand}...")
        logo_url = google_image_search(f"{brand} logo")
        if logo_url and download_image(logo_url, brand_logo_path):
            brand_logos[brand] = brand_logo_path
        else:
            brand_logos[brand] = None
        time.sleep(2)

    # Material image
    search_terms = [brand, product_name]
    if product_code.lower() != 'unknown':
        search_terms.append(product_code)
    if color.lower() != 'unknown':
        search_terms.append(color)
    fabric_query = " ".join(search_terms) + " fabric"

    if not os.path.exists(material_img_path):
        print(f"Searching material image for {fabric_query}...")
        img_url = google_image_search(fabric_query)
        if img_url:
            download_image(img_url, material_img_path)
        time.sleep(2)

    records.append({
        "material_img": material_img_path,
        "brand_logo": brand_logos[brand],
        "brand": brand,
        "product_name": product_name,
        "product_code": product_code,
        "color": color,
        "notes": notes
    })

# === PDF GENERATION ===
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)

def insert_image(pdf, path, width, height):
    try:
        pdf.image(path, w=width, h=height)
    except:
        pdf.rect(pdf.get_x(), pdf.get_y(), width, height)  # fallback placeholder

for rec in records:
    pdf.add_page()

    margin = 10
    page_width = pdf.w - 2 * margin
    col_width = page_width / 2
    img_height = 70

    # Left: Fabric Image
    pdf.set_xy(margin, pdf.get_y())
    insert_image(pdf, rec['material_img'], width=col_width, height=img_height)

    # Right: Brand Logo + Text
    pdf.set_xy(margin + col_width, pdf.get_y())
    if rec['brand_logo']:
        insert_image(pdf, rec['brand_logo'], width=col_width, height=20)
        pdf.ln(22)
    else:
        pdf.ln(5)

    pdf.set_x(margin + col_width)
    pdf.set_font("Arial", size=10)
    pdf.multi_cell(col_width, 8, 
    sanitize_text(f"Product Name: {rec['product_name']}\n"
                  f"Product Code: {rec['product_code']}\n"
                  f"Color: {rec['color']}\n"
                  f"Notes: {rec['notes']}")
    )

# === SAVE PDF ===
pdf.output("data/fabric_catalog_part3.pdf")
print("✅ PDF saved as fabric_catalog.pdf")

# You will need a PDF reader
* Search and install the KDAN PDF Reader - View and Edit PDF
* Or download and install the free Adobe Acrobat Reader