# Image Embedding

- Picture to text
    + https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
- Text to embedding
    + https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python

In [1]:
import base64
import requests
import json
import os

In [2]:
key_location = '/Users/silvi/Downloads/key-location/genaikey.txt'

with open(key_location, 'r') as file:
    key = file.readline().strip()

In [3]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [4]:
# Path to your images, using raw strings
image_path1 = r"C:\Users\silvi\Proton Drive\silvio.biagioni\My files\stdy\genai\mine\0002_Image_Embeddings\minesweeper.png"
image_path2 = r"C:\Users\silvi\Proton Drive\silvio.biagioni\My files\stdy\genai\mine\0002_Image_Embeddings\dish1.jpg"
image_path3 = r"C:\Users\silvi\Proton Drive\silvio.biagioni\My files\stdy\genai\mine\0002_Image_Embeddings\dish2.jpg"

# Getting the base64 string
base64_image1 = encode_image(image_path1)
base64_image2 = encode_image(image_path2)
base64_image3 = encode_image(image_path3)

In [5]:
headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {key}"
}

max_tokens = 25
prompt = "What’s in this image? Make sure your description is below " + str(max_tokens) + " tokens."
temperature = 0 # creativity on a scale of 0 to 2; generally 0.7

payload1 = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image1}"
          }
        }
      ]
    }
  ],
  "temperature": temperature,
  "max_tokens": max_tokens
}

payload2 = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image2}"
          }
        }
      ]
    }
  ],
  "temperature": temperature,
  "max_tokens": max_tokens
}

payload3 = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image3}"
          }
        }
      ]
    }
  ],
  "temperature": temperature,  
  "max_tokens": max_tokens
}

In [6]:
response1 = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload1)
response2 = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload2)
response3 = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload3)

In [7]:
print(response1.json())
print(response2.json())
print(response3.json())

# add new line
print()

content1 = response1.json()['choices'][0]['message']['content']
content2 = response2.json()['choices'][0]['message']['content']
content3 = response3.json()['choices'][0]['message']['content']
print(content1)
print(content2)
print(content3)

{'id': 'chatcmpl-9HH58EI3agPE8JrwigAvsAyFireql', 'object': 'chat.completion', 'created': 1713904266, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'A screenshot of an online Minesweeper game with a timer and smiley face.'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 448, 'completion_tokens': 17, 'total_tokens': 465}, 'system_fingerprint': 'fp_67e6987839'}
{'id': 'chatcmpl-9HH5Jo3oYa2t4iY6XUF1ya3FpgeMN', 'object': 'chat.completion', 'created': 1713904277, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Steak with herb butter, mashed potatoes, and sautéed vegetables on a plate.'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 788, 'completion_tokens': 19, 'total_tokens': 807}, 'system_fingerprint': 'fp_67e6987839'}
{'id': 'chatcmpl-9HH5UJHjr7gbfekZU4MSBuvyW4aL4', 'object': 'chat.completion', 'created': 1713904288, 'model

In [8]:
import openai

openai.api_key = key

In [13]:
response = openai.embeddings.create(
    input=[content1, content2, content3],
    model="text-embedding-3-small"
)

# print(response)

# add a new line
print()


# print the first 10 elements of the embedding
print(response.data[0].embedding[:10])
print(response.data[1].embedding[:10])
print(response.data[2].embedding[:10])


[-0.026293493807315826, -0.001618022215552628, 0.005114874802529812, 0.02388967201113701, -0.013173756189644337, -0.003011530265212059, 0.04151320084929466, -0.011377641931176186, -0.02925100550055504, 0.02637452259659767]
[-0.026464182883501053, -0.03669813275337219, -0.07195545732975006, 0.018306689336895943, -0.02093403786420822, -0.01184425875544548, -0.012310400605201721, 0.02803211845457554, 0.025129321962594986, -0.002804800868034363]
[-0.05506456270813942, -0.013195100240409374, -0.027940167114138603, -0.01879333145916462, -0.02636980637907982, -0.02188306488096714, -0.009529228322207928, -0.005975524429231882, 0.017406519502401352, 0.0028297065291553736]


In [10]:
from scipy import spatial

In [11]:
print(content1)
print(content2)
print(content3)

# add a new line
print()

# compute the Euclidean distance between the embeddings and show the result
euclidean_distance1 = spatial.distance.euclidean(response.data[0].embedding, response.data[1].embedding)
print('euclidean distance(content1, content2): ')
print(euclidean_distance1)

euclidean_distance2 = spatial.distance.euclidean(response.data[1].embedding, response.data[2].embedding)
print('euclidean distance(content2, content3): ')
print(euclidean_distance2)

euclidean_distance3 = spatial.distance.euclidean(response.data[0].embedding, response.data[2].embedding)
print('euclidean distance(content1, content3): ')
print(euclidean_distance3)


# add a new line
print()

cosine_similarity1 = 1 - spatial.distance.cosine(response.data[0].embedding, response.data[1].embedding)
print('cosine similarity(content1, content2): ')
print(cosine_similarity1)

cosine_similarity2 = 1 - spatial.distance.cosine(response.data[1].embedding, response.data[2].embedding)
print('cosine similarity(content2, content3): ')
print(cosine_similarity2)

cosine_similarity3 = 1 - spatial.distance.cosine(response.data[0].embedding, response.data[2].embedding)
print('cosine similarity(content1, content3): ')
print(cosine_similarity3)

# add a new line
print()

# compute the Manhattan distance between the embeddings and show the result
manhattan_distance1 = spatial.distance.cityblock(response.data[0].embedding, response.data[1].embedding)
print('manhattan distance(content1, content2): ')
print(manhattan_distance1)

manhattan_distance2 = spatial.distance.cityblock(response.data[1].embedding, response.data[2].embedding)
print('manhattan distance(content2, content3): ')
print(manhattan_distance2)

manhattan_distance3 = spatial.distance.cityblock(response.data[0].embedding, response.data[2].embedding)
print('manhattan distance(content1, content3): ')
print(manhattan_distance3)






A screenshot of an online Minesweeper game with a timer and smiley face.
Steak with herb butter, mashed potatoes, and sautéed vegetables on a plate.
Swedish meatballs with mashed potatoes, lingonberries, and cucumber salad.

euclidean distance(content1, content2): 
1.2942117705058542
euclidean distance(content2, content3): 
1.1080693530122256
euclidean distance(content1, content3): 
1.3200403548077118

cosine similarity(content1, content2): 
0.1625079439068794
cosine similarity(content2, content3): 
0.38609111736019286
cosine similarity(content1, content3): 
0.12874675126529112

manhattan distance(content1, content2): 
40.09568326443423
manhattan distance(content2, content3): 
34.397873350793816
manhattan distance(content1, content3): 
40.48731136177139
