In [52]:

%pip install pytesseract pillow opencv-python pymupdf torch open_clip_torch sentence-transformers huggingface_hub

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Import required libraries

In [53]:
import cv2
import pytesseract
from PIL import Image
import fitz  # PyMuPDF
import torch
import open_clip
from sentence_transformers import SentenceTransformer
from huggingface_hub import InferenceClient


Set up API keys & Tesseract

In [59]:

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
HF_token="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"

 Load CLIP and text models 

In [55]:
clip_model, _, preprocess = open_clip.create_model_and_transforms("ViT-B-32", pretrained="openai")
tokenizer = open_clip.get_tokenizer("ViT-B-32")
text_model = SentenceTransformer("all-MiniLM-L6-v2")



 Functions

In [56]:
def read_logo_text(path):
    image = cv2.imread(path)
    return pytesseract.image_to_string(image).strip()

def get_logo_color(path):
    img = Image.open(path).resize((1, 1))
    r, g, b = img.getpixel((0, 0))
    return f"#{r:02x}{g:02x}{b:02x}"

def read_pdf_text(path):
    text = ""
    pdf = fitz.open(path)
    for page in pdf:
        text += page.get_text()
    return text.strip()

def image_embedding(path):
    img = Image.open(path).convert("RGB")
    input_tensor = preprocess(img).unsqueeze(0)
    with torch.no_grad():
        vec = clip_model.encode_image(input_tensor)
        return vec / vec.norm(dim=-1, keepdim=True)

def text_embedding(text):
    tokenized = tokenizer([text])
    with torch.no_grad():
        text_features = clip_model.encode_text(tokenized)
        text_features /= text_features.norm(dim=-1, keepdim=True)
    return text_features

def compare_similarity(image_vec, text_vec):
    return torch.nn.functional.cosine_similarity(image_vec, text_vec).item()

Hugging Face text generation

In [71]:
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1", token=HF_token)

def hf_generate(prompt):
    response = client.text_generation(
        prompt,
        max_new_tokens=512,
        temperature=0.5
    )
    return response

Main Pipeline

In [76]:
def run_identity_check(logo_img, pdf_file, persona):
    logo_text = read_logo_text(logo_img)
    color = get_logo_color(logo_img)
    pdf_text = read_pdf_text(pdf_file)

    # Combine for similarity
    combined_text = pdf_text + " " + persona
    image_vec = image_embedding(logo_img)
    text_vec = text_embedding(combined_text)
    score = compare_similarity(image_vec, text_vec)

    # Prepare prompt for the LLM
    prompt = f"""
Generate a brand identity from:
- Logo Text: {logo_text}
- Color: {color}
- PDF Text: {pdf_text[:400]}
- Persona: {persona}
- Similarity: {round(score, 2)}

Return a JSON object with:
- brand_colors
- tone
- style
- audience
- theme
"""
    try:
        identity = hf_generate(prompt)
        print("\n Final Identity Output:\n")
        print(identity)
        return identity
    except Exception as e:
        print("\n Hugging Face API call failed. Showing sample output instead.\n")
        sample_output = {
            "brand_colors": [color, "#ffffff", "#fbbc05"],
            "tone": "Professional, friendly, and modern",
            "style": "Clean, minimalistic, and contemporary",
            "audience": persona,
            "theme": "Innovation and simplicity"
        }
        print(sample_output)
        return sample_output

In [77]:
run_identity_check(
    "logo.jpg",         # Your logo image file
    "exp.pdf",       # Your PDF file
    "Young professionals who value clean and modern design."  
)


 Hugging Face API call failed. Showing sample output instead.

{'brand_colors': ['#dde1e4', '#ffffff', '#fbbc05'], 'tone': 'Professional, friendly, and modern', 'style': 'Clean, minimalistic, and contemporary', 'audience': 'Young professionals who value clean and modern design.', 'theme': 'Innovation and simplicity'}


{'brand_colors': ['#dde1e4', '#ffffff', '#fbbc05'],
 'tone': 'Professional, friendly, and modern',
 'style': 'Clean, minimalistic, and contemporary',
 'audience': 'Young professionals who value clean and modern design.',
 'theme': 'Innovation and simplicity'}