## Create a simple index with a knn_vector field

curl -X PUT "https://localhost:9200/article_index" -H 'Content-Type: application/json' -ku 'admin:admin' -d'
{
   "settings": {
      "index.knn": true
   },
   "mappings": {
      "properties": {
         "vector_field": {
            "type": "knn_vector",
            "dimension": 768,
            "method": {
            "name": "hnsw",
            "space_type": "l2",
            "engine": "lucene"
            }
         },
         "title": {
            "type": "text"
         },
         "news_id": {
            "type": "text"
         },
         "text": {
            "type": "text"
         },
         "url": {
            "type": "text"
         }
      }
   }
}
'

## load test data

In [None]:
import pandas as pd

df = pd.read_csv('data/test_data.csv')
df.head()


## create embeddings for text

In [None]:
import requests

def get_embeddings(text):
    url = 'http://localhost:80/embeddings'
    myobj = {"text":text}
    x = requests.post(url, json = myobj)
    return x.json()['emb_vector']

df['emb'] = df.apply(lambda x: get_embeddings(x.text), axis=1)

In [None]:
actions = []
for i in range(len(df)):
    document = {
          "vector_field": df.emb[i],
          "title": df.title[i],
          "news_id": df.news_id[i],
          "text": df.text[i],
          "url": df.url[i]
        }
    action = {"_op_type": "index", "_index": "article_index", "_id": i, "_source": document}    
    actions.append(action)

## insert the documents in the opensearch index

In [None]:
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk

host = 'localhost'
port = 9200
auth = ('admin', 'admin')

client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_compress = True, # enables gzip compression for request bodies
    http_auth = auth,
    use_ssl = True,
    verify_certs = False,
    ssl_assert_hostname = False,
    ssl_show_warn = False,
)
success, failed = bulk(client, actions)