## LLM Clip

https://github.com/simonw/llm-clip

requires LLM: https://llm.datasette.io/en/stable/

So: 

```
$ pip install llm
$ llm install llm-clip
```

Then, assuming you are doing this in an environment (I create mine with conda), find the site packages directory, and the llm-clip.py file. `/Users/shawngraham/mambaforge/envs/clip/lib/python3.10/site-packages` is where mine hides.

Change

```
if self._model is None:
   self._model = SentenceTransformer('clip-ViT-B-32')
```
to point to your new model, like so: 

```
    def embed_batch(self, items):
        # Embeds a mix of text strings and binary images
        if self._model is None:
            self._model = SentenceTransformer('/path/to/your/retrained-model')
```

The folder with your model should contain a pytorch_model.bin and config.json inside a _subfolder_ called `0CLIP_Model`. You will need the extra json files and so on from here https://huggingface.co/sentence-transformers/clip-ViT-B-32/tree/main . You _need_ all those .json files, arranged that way. And since you're not otherwise futzing with the basic CLIP-ness, it should be ok. 

Once you create your embeddings, these will be in your `~Library/Application Support/io.datasette.llm` folder.

In [5]:
#turn your test images into embeddings
!llm embed-multi photos --files testing/ '*.jpg' --binary -m clip

[?25lEmbedding  [####################################]  100%[?25h


In [72]:
# functions to look up images and captions from the results of llm search
import subprocess
import json
import IPython.display as display

def get_similar_ideas(query):
    # Construct the command
    cmd = f'llm similar photos -c "{query}"'
    
    # Execute the command and retrieve its output
    output = subprocess.check_output(cmd, shell=True)
    
    # Since the output is a bytes object, decode it to convert it to a string
    output_str = output.decode()
    
    # Split the output by lines and parse each line as JSON
    results = [json.loads(line) for line in output_str.split('\n') if line.strip()]
    
    # Return the parsed output
    return results

def get_similar_images(query):
    # Construct the command
    cmd = f'llm similar photos -i "{query}" --binary'
 
    # Execute the command and retrieve its output
    output = subprocess.check_output(cmd, shell=True)
    
    # Since the output is a bytes object, decode it to convert it to a string
    output_str = output.decode()
    
    # Split the output by lines and parse each line as JSON
    results = [json.loads(line) for line in output_str.split('\n') if line.strip()]
    
    # Return the parsed output
    return results

def get_image_metadata(image_ids):
    "Reads the json file and retrieves the metadata for the given image ids"
    metadata = {}
    with open('test.json', 'r') as f:
        for line in f:
            data = json.loads(line)
            if data['image'].replace("\\", "") in image_ids:
                metadata[data['image'].replace("\\", "")] = data['caption']
                
    return metadata

def display_similar_ideas(query):
    "Fetches similar ideas using the llm package and displays them with captions"
    similar_images = get_similar_ideas(query)
    image_ids = ["testing/" + img['id'] for img in similar_images]
    image_metadata = get_image_metadata(image_ids)
    html_str = ''
    for img in similar_images:
        image_id = "testing/" + img['id']  # Adding "testing/" here
        score = img['score']
        caption = image_metadata.get(image_id, '')  # Now this should retrieve the correct caption
        
        src = image_id  # image_id now contains 'testing/'
        url_id = img['id'].split('.jpg')[0]  # This will remove the '.jpg' from the image id
        url = "https://opencontext.org/media/" + url_id  # Create the URL by concatenating the base URL and the image id (without 'testing/' and '.jpg')
        
        html_str += f'<div><img src="{src}" width=25% alt="Image not found"> <p><strong>Score:</strong> {score}</p><p><strong>Caption:</strong> {caption}</p><p><a href="{url}">Link to full record</a></p></div>'
    display.display(display.HTML(html_str))

def display_similar_images(query):
    "Fetches similar ideas using the llm package and displays them with captions"
    similar_images = get_similar_images(query)
    image_ids = ["testing/" + img['id'] for img in similar_images]
    image_metadata = get_image_metadata(image_ids)
    html_str = ''
    for img in similar_images:
        image_id = "testing/" + img['id']  # Adding "testing/" here
        score = img['score']
        caption = image_metadata.get(image_id, '')  # Now this should retrieve the correct caption
        
        src = image_id  # image_id now contains 'testing/'
        url_id = img['id'].split('.jpg')[0]  # This will remove the '.jpg' from the image id
        url = "https://opencontext.org/media/" + url_id  # Create the URL by concatenating the base URL and the image id (without 'testing/' and '.jpg')
        
        html_str += f'<div><img src="{src}" width=25% alt="Image not found"> <p><strong>Score:</strong> {score}</p><p><strong>Caption:</strong> {caption}</p><p><a href="{url}">Link to full record</a></p></div>'
    display.display(display.HTML(html_str))

In [73]:
query = "terracotta tile from 600 BCE"
display_similar_ideas(query)

In [71]:
#pass a photo and see what you get

query = "testing/b3911efe-1222-42ff-3c1f-3c38b9a096e7.jpg"
display_similar_images(query)