# Creating Vector Embeddings

You'll need to have an OpenAI API key to use their models. Install the OpenAI Python package if you haven't already.

In [25]:
!pip install openai
!pip install requests



## Import the required libraries

In [26]:
import requests
import json

## Mention OpenAI API Key

In [27]:
api_key = ''

## OpenAI's Format to Generate Text Embeddings

Using the embedding model - text-embedding-ada-002

In [33]:
def generate_embeddings(text):
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    data = {
        "input": text,
        "model": "text-embedding-ada-002",
        "encoding_format": "float"
    }

    response = requests.post("https://api.openai.com/v1/embeddings", headers=headers, data=json.dumps(data))
    return response.json()

## Text to Convert into Embeddings

In [34]:
text = "Pavan is a developer evangelist"
embeddings = generate_embeddings(text)
print(embeddings)

{'object': 'list', 'data': [{'object': 'embedding', 'index': 0, 'embedding': [0.020013444, -0.00961902, 0.021550883, -0.0055682026, -0.018155148, 0.012339619, -0.023877095, -0.019197933, -0.024010785, -0.015240699, 0.028502781, 0.020427885, 0.024304904, -0.018649803, 0.0011129722, -0.0048295637, 0.01951879, -0.01842253, 0.007854308, -0.021336978, -0.017272793, -0.0072994926, -0.013449249, -0.0022777491, -0.01647065, 0.0101404125, 0.007627034, -0.0155615555, 0.0063502914, 0.00040274215, 0.023984047, -0.0065708803, 0.011209935, -0.01078881, -0.0052139233, -0.035080347, -0.01909098, 0.0019351677, 0.011129721, 0.0035628476, 0.024024155, 0.008602973, 0.003348943, -0.023422549, -0.0044552307, 0.010635067, 0.004796141, -0.0051537626, -0.011250042, 0.0061597824, 0.008475968, -0.0022109041, -0.0027974704, 0.0037667253, -0.015320913, -0.011303519, -0.009886401, 0.029599043, -0.013522778, -0.0026738069, -0.0022927893, 0.026002772, -0.0037199338, 0.0086497655, -0.025655176, 0.015788829, 0.01209897

# Creating Embeddings using Cohere

In [39]:
!pip install cohere

Collecting cohere
  Downloading cohere-4.46-py3-none-any.whl.metadata (6.0 kB)
Collecting aiohttp<4.0,>=3.0 (from cohere)
  Downloading aiohttp-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)
Collecting fastavro<2.0,>=1.8 (from cohere)
  Downloading fastavro-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting aiosignal>=1.1.2 (from aiohttp<4.0,>=3.0->cohere)
  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Collecting frozenlist>=1.1.1 (from aiohttp<4.0,>=3.0->cohere)
  Downloading frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp<4.0,>=3.0->cohere)
  Downloading multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)
Collecting yarl<2.0,>=1.0 (from aiohttp<4.0,>=3.0->cohere)
  Downloading yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux

## Mention the Cohere API Key & Format to Generate Embeddings

In [63]:
import cohere
co = cohere.Client('add your cohere api key')

response = co.embed(
  texts=['Pavan is a developer evangelist'],
  model='embed-english-v3.0',
  input_type='classification'
)
print(response)

cohere.Embeddings {
	response_type: embeddings_floats
	embeddings: [[-0.02645874, -0.029937744, -0.018249512, -0.04788208, -0.029525757, 0.0019321442, -0.042419434, 0.040222168, 0.015777588, 0.007713318, -0.04574585, 0.036590576, -0.02319336, -0.020614624, 0.022125244, -0.019012451, -0.020050049, 0.014678955, 0.00793457, -0.028137207, 0.048431396, -0.047576904, 0.0012464523, -0.036956787, 0.03463745, 0.026489258, -0.0022621155, 0.001748085, -0.004333496, -0.010414124, 0.013580322, 0.016571045, 0.0236969, -0.06939697, 0.05239868, -0.0021152496, -0.023452759, -0.022903442, -0.002708435, -0.00390625, 0.024963379, -0.008338928, 0.00073337555, -0.0602417, -0.036315918, 0.04849243, -0.04711914, 0.008598328, 0.02507019, -0.07293701, -0.011817932, 0.051727295, 0.013626099, -0.023513794, -0.027114868, -0.0043945312, -0.009811401, -0.053100586, -0.012680054, 0.06112671, 0.040161133, 0.015472412, 0.0074424744, -0.015945435, -0.016143799, 0.009292603, 0.019927979, 0.021957397, 0.03942871, -0.03485

In [231]:
import cohere
co = cohere.Client('add your cohere api key')

response = co.embed(
  texts=['Rohit is a project manager'],
  model='embed-english-v3.0',
  input_type='classification'
)
print(response)

cohere.Embeddings {
	response_type: embeddings_floats
	embeddings: [[-0.021774292, -0.007987976, 0.012161255, -0.038848877, -0.04309082, 0.039031982, -0.089538574, -0.02355957, -0.06384277, 0.0045814514, 0.04046631, 0.033721924, -0.032562256, -0.046844482, 0.053894043, -0.040527344, -0.015777588, 0.030014038, 0.021057129, -0.024291992, 0.053344727, -0.026138306, -0.032592773, -0.0362854, 0.055541992, 0.005065918, -0.08886719, -0.024459839, -0.004070282, -0.0042648315, -0.00166893, 0.06274414, -0.019454956, -0.009803772, 0.047912598, -0.052124023, 0.062683105, -0.008903503, 0.01777649, 0.009918213, 0.019927979, -0.0061302185, -0.037475586, -0.04345703, -0.037200928, 0.025741577, -0.021392822, 0.0014190674, -0.0010576248, 0.008384705, 0.001739502, -0.01361084, 0.026397705, -0.07318115, -0.01550293, -0.009262085, -0.020584106, 0.026885986, 0.017822266, 0.032409668, -0.011795044, 0.032165527, -0.005432129, -0.024536133, -0.008804321, -0.0418396, 0.043121338, 0.017150879, 0.00093221664, -0.

## Creating Embeddings using HuggingFace

Install the required libraries

In [67]:
!pip install transformers torch

Collecting transformers
  Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch
  Downloading torch-2.2.0-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Downloading huggingface_hub-0.20.3-py3-none-any.whl.metadata (12 kB)
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Downloading tokenizers-0.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting sympy (from torch)
  Downloading sympy-1.12-py3-none-any.whl (5.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.7/5.7 MB[0m [31m140.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hCollecting networ

## HuggingFace Format to Create Embeddings

In [72]:
from transformers import AutoTokenizer, AutoModel
import torch

def get_huggingface_embedding(text, model_name='sentence-transformers/all-MiniLM-L6-v2'):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)

    # You can choose how to derive the final embeddings, e.g., mean pooling
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings

# Example usage
text = "Pavan is a developer evangelist."
embedding_huggingface = get_huggingface_embedding(text)
print(embedding_huggingface)

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

[-3.11155856e-01  1.21053746e-02 -7.91178495e-02 -5.49541473e-01
 -2.40571097e-01 -5.81769682e-02 -5.66864461e-02 -1.75549850e-01
 -1.47690214e-02  3.50884229e-01 -9.87588167e-02  3.75869334e-01
 -1.42508462e-01 -3.27809677e-02  6.36595301e-03  2.15077385e-01
 -6.92793787e-01 -1.18672915e-01  4.95419174e-01 -3.03096622e-01
 -1.03327595e-01  5.82211316e-01  2.53503695e-02  8.69146883e-02
  1.15777321e-01  1.97696071e-02  1.20102063e-01 -2.78640628e-01
  3.48292142e-01 -6.49680048e-02  6.08609729e-02 -1.89587653e-01
 -1.91119641e-01  1.08277984e-01 -2.03703225e-01  5.04730940e-01
 -2.84284651e-01  4.40510571e-01  1.11233048e-01  1.17239408e-01
  2.00541154e-01 -4.46545959e-01 -6.28000498e-01 -3.16739559e-01
  2.17059236e-02 -3.46851557e-01 -3.23967069e-01 -3.27198297e-01
 -1.75462276e-01 -3.01104039e-01  1.00595079e-01 -3.74577194e-01
 -2.03733309e-03 -6.60950422e-01 -3.16171423e-02 -3.09775144e-01
 -1.31165698e-01  3.75441194e-01  1.81388453e-01 -1.14097036e-02
  7.14505076e-01  2.39808

In [100]:
text = "John is a developer."
embedding_huggingface = get_huggingface_embedding(text)
print(embedding_huggingface)

[-3.53741705e-01  7.31846318e-02  3.55827093e-01 -4.35869724e-01
 -3.21012735e-01  8.88969526e-02 -5.36415316e-02  8.19621142e-03
 -1.18343703e-01  5.20700812e-02 -7.05919489e-02  5.86628258e-01
 -2.77895760e-02 -1.85495615e-03 -1.74233645e-01  1.97413817e-01
 -3.40605557e-01  5.95643334e-02  2.43644506e-01 -7.58853912e-01
 -4.40034904e-02  2.25569606e-01 -1.10292688e-01 -4.36388046e-01
  6.60439849e-01  1.01330884e-01  1.42671997e-02  3.02981377e-01
  2.60640860e-01 -2.75635391e-01 -4.41124171e-01  2.25096345e-01
  3.43015730e-01 -1.02790676e-01  9.17843427e-04  8.74177217e-02
 -2.01224014e-01  1.73391223e-01 -8.16869587e-02 -2.56950200e-01
 -1.11163378e-01 -2.58494586e-01  3.03782016e-01  3.38299036e-01
 -3.01333338e-01 -3.64149123e-01 -4.19720709e-02 -4.67647731e-01
 -1.32392421e-01 -1.48101579e-02 -6.14700317e-01 -5.12114704e-01
  2.30360241e-03 -4.93082017e-01  3.71704102e-02  1.16371281e-01
  8.29615593e-02  3.86770666e-01  7.43414834e-02 -2.69728690e-01
 -1.03865646e-01  3.03962

In [101]:
text = "Developer are smart."
embedding_huggingface = get_huggingface_embedding(text)
print(embedding_huggingface)

[-3.85963507e-02 -2.97152251e-01  4.50984865e-01 -2.88934354e-02
 -2.05785111e-01 -5.55878639e-01  1.30690530e-01  1.29803732e-01
 -2.76777390e-02  4.22587246e-01 -2.09965602e-01  3.47569704e-01
 -1.64835423e-01 -2.86722630e-01 -1.41215846e-01  4.11229849e-01
 -5.12525976e-01 -6.17220581e-01 -3.41716893e-02 -5.05379379e-01
 -4.13461536e-01 -1.99794531e-01  2.37722043e-02 -1.84097707e-01
  5.34079492e-01  1.50557570e-02 -4.62071933e-02 -2.49347940e-01
  4.86596435e-01  2.30830669e-01 -1.36461437e-01  3.72113377e-01
  1.96092740e-01  1.94022581e-01 -3.61062318e-01  2.84182966e-01
 -3.66158155e-03  2.21103683e-01  1.06840380e-01 -4.48725700e-01
 -3.08008313e-01 -5.97257912e-01 -5.00339508e-01  2.04436243e-01
 -2.67410606e-01 -3.74610871e-01 -7.30463043e-02 -5.92532158e-01
 -4.30089951e-01  5.49809635e-02 -4.10362124e-01 -5.32065332e-01
 -2.45401382e-01 -3.19519073e-01  1.87077805e-01 -2.53512096e-02
  1.19143270e-01 -7.10581318e-02  4.72998261e-01  4.91511852e-01
  8.80949125e-02 -1.02271

In [116]:
text = "Developer"
embedding_huggingface = get_huggingface_embedding(text)
print(embedding_huggingface)

[-3.67827088e-01 -9.28338841e-02  6.64375350e-02  8.24825540e-02
 -1.00981958e-01 -7.01574504e-01  6.00585043e-01  3.36968929e-01
  2.55397763e-02  6.58296570e-02 -4.24138218e-01 -3.39177608e-01
 -2.17380166e-01 -1.45957902e-01 -2.70177245e-01  6.03788793e-01
 -5.54828405e-01 -2.42935851e-01  1.75660048e-02 -8.15645695e-01
 -7.12874234e-01  1.28358886e-01 -2.15946361e-01 -1.26122549e-01
  5.51421583e-01  5.80849499e-02  1.73276857e-01  1.32784843e-01
  1.00601983e+00 -6.64004028e-01 -4.12436634e-01  1.95678651e-01
  4.97394800e-01  3.20329189e-01 -2.38409504e-01  1.40269265e-01
  3.10970377e-02 -2.92164296e-01  1.43453255e-01 -2.25843653e-01
 -5.20592153e-01 -3.67669970e-01 -4.98074055e-01  1.53945282e-01
 -1.93539575e-01 -3.33719611e-01  1.21302955e-01 -1.77818045e-01
 -2.90710241e-01  5.83569892e-02  2.11994097e-01 -7.23151863e-01
 -4.31678802e-01 -3.30952942e-01 -1.25866935e-01 -2.82005578e-01
  2.45360181e-01  1.84061661e-01  4.47753876e-01  1.16807580e-01
  4.26553577e-01 -5.21505