# Image Embedding

- Picture to text
    + https://platform.openai.com/docs/guides/vision/uploading-base-64-encoded-images
- Text to embedding
    + https://platform.openai.com/docs/guides/embeddings/what-are-embeddings?lang=python

In [1]:
import base64
import requests
import json
import os

In [2]:
key_location = '/Users/silvi/Downloads/key-location/genaikey.txt'

with open(key_location, 'r') as file:
    key = file.readline().strip()

In [3]:
# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

In [4]:
# Path to your images, using raw strings
image_path1 = r"C:\Users\silvi\Proton Drive\silvio.biagioni\My files\stdy\genai\mine\0002_Image_Embeddings\minesweeper.png"
image_path2 = r"C:\Users\silvi\Proton Drive\silvio.biagioni\My files\stdy\genai\mine\0002_Image_Embeddings\dish1.jpg"
image_path3 = r"C:\Users\silvi\Proton Drive\silvio.biagioni\My files\stdy\genai\mine\0002_Image_Embeddings\dish2.jpg"

# Getting the base64 string
base64_image1 = encode_image(image_path1)
base64_image2 = encode_image(image_path2)
base64_image3 = encode_image(image_path3)

In [5]:
headers = {
  "Content-Type": "application/json",
  "Authorization": f"Bearer {key}"
}

max_tokens = 25
prompt = "What’s in this image? Make sure your description is below " + str(max_tokens) + " tokens."
temperature = 0 # creativity on a scale of 0 to 2; generally 0.7

payload1 = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image1}"
          }
        }
      ]
    }
  ],
  "temperature": temperature,
  "max_tokens": max_tokens
}

payload2 = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image2}"
          }
        }
      ]
    }
  ],
  "temperature": temperature,
  "max_tokens": max_tokens
}

payload3 = {
  "model": "gpt-4-turbo",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": prompt
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image3}"
          }
        }
      ]
    }
  ],
  "temperature": temperature,  
  "max_tokens": max_tokens
}

In [6]:
response1 = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload1)
response2 = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload2)
response3 = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload3)

In [7]:
print(response1.json())
print(response2.json())
print(response3.json())

content1 = response1.json()['choices'][0]['message']['content']
content2 = response2.json()['choices'][0]['message']['content']
content3 = response3.json()['choices'][0]['message']['content']


print(content1)
print(content2)
print(content3)

{'id': 'chatcmpl-9HGrBFAPgRf35AgiSWKBiYvoqfbTw', 'object': 'chat.completion', 'created': 1713903401, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'A screenshot of an online Minesweeper game in progress.'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 448, 'completion_tokens': 12, 'total_tokens': 460}, 'system_fingerprint': 'fp_67e6987839'}
{'id': 'chatcmpl-9HGrM1Btp0wDewGKCpGvUZapH4UHU', 'object': 'chat.completion', 'created': 1713903412, 'model': 'gpt-4-turbo-2024-04-09', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'Steak with herb butter, mashed potatoes, and sautéed vegetables on a plate.'}, 'logprobs': None, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 788, 'completion_tokens': 19, 'total_tokens': 807}, 'system_fingerprint': 'fp_67e6987839'}
{'id': 'chatcmpl-9HGrY12JNWxyD3RA1Gc6AAD2yYbyH', 'object': 'chat.completion', 'created': 1713903424, 'model': 'gpt-4-turbo-2

In [8]:
import openai

openai.api_key = key

In [9]:
response = openai.embeddings.create(
    input=[content1, content2, content3],
    model="text-embedding-3-small"
)

print(response)
print(response.data[0].embedding)
print(response.data[1].embedding)
print(response.data[2].embedding)

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.04447605460882187, -0.010080973617732525, 0.02108021266758442, 0.03228573128581047, -0.013620958663523197, -0.0037263003177940845, 0.04293230175971985, 0.0026932500768452883, -0.05333932861685753, 0.037449318915605545, 0.05690592899918556, -0.037076689302921295, 0.045088235288858414, -0.015850083902478218, 0.027015676721930504, 0.018498418852686882, -0.04865483567118645, -0.020295029506087303, 0.010433641262352467, 0.019483227282762527, 0.055149246007204056, -0.0035466393455863, 0.020893897861242294, 0.03444166108965874, 0.006960196886211634, -0.007618953473865986, -0.0010912736179307103, -0.007811922580003738, 0.011278712190687656, 0.026390191167593002, -0.011797732673585415, -0.0326051265001297, -0.01254299283027649, 0.0025718125980347395, 0.012702691368758678, -0.03638466075062752, -0.015836777165532112, 0.004794284701347351, -0.06563612073659897, 0.006580912508070469, -0.03066212870180607, 0.005366537719964981, -0.01533106435090

In [10]:
from scipy import spatial

In [14]:
print(content1)
print(content2)
print(content3)

# add a new line
print()

# compute the Euclidean distance between the embeddings and show the result
euclidean_distance1 = spatial.distance.euclidean(response.data[0].embedding, response.data[1].embedding)
print('euclidean distance(content1, content2): ')
print(euclidean_distance1)

euclidean_distance2 = spatial.distance.euclidean(response.data[1].embedding, response.data[2].embedding)
print('euclidean distance(content2, content3): ')
print(euclidean_distance2)

euclidean_distance3 = spatial.distance.euclidean(response.data[0].embedding, response.data[2].embedding)
print('euclidean distance(content1, content3): ')
print(euclidean_distance3)


# add a new line
print()

cosine_similarity1 = 1 - spatial.distance.cosine(response.data[0].embedding, response.data[1].embedding)
print('cosine similarity(content1, content2): ')
print(cosine_similarity1)

cosine_similarity2 = 1 - spatial.distance.cosine(response.data[1].embedding, response.data[2].embedding)
print('cosine similarity(content2, content3): ')
print(cosine_similarity2)

cosine_similarity3 = 1 - spatial.distance.cosine(response.data[0].embedding, response.data[2].embedding)
print('cosine similarity(content1, content3): ')
print(cosine_similarity3)

# add a new line
print()

# compute the Manhattan distance between the embeddings and show the result
manhattan_distance1 = spatial.distance.cityblock(response.data[0].embedding, response.data[1].embedding)
print('manhattan distance(content1, content2): ')
print(manhattan_distance1)

manhattan_distance2 = spatial.distance.cityblock(response.data[1].embedding, response.data[2].embedding)
print('manhattan distance(content2, content3): ')
print(manhattan_distance2)

manhattan_distance3 = spatial.distance.cityblock(response.data[0].embedding, response.data[2].embedding)
print('manhattan distance(content1, content3): ')
print(manhattan_distance3)






A screenshot of an online Minesweeper game in progress.
Steak with herb butter, mashed potatoes, and sautéed vegetables on a plate.
Swedish meatballs with mashed potatoes, lingonberries, cucumber, and gravy.

euclidean distance(content1, content2): 
1.2757970788983615
euclidean distance(content2, content3): 
1.1065673586457587
euclidean distance(content1, content3): 
1.3280063973663847

cosine similarity(content1, content2): 
0.18617090525029145
cosine similarity(content2, content3): 
0.38775431101214
cosine similarity(content1, content3): 
0.11819953708704412

manhattan distance(content1, content2): 
39.61442664311198
manhattan distance(content2, content3): 
34.43200101237653
manhattan distance(content1, content3): 
40.72957174465046
