In [None]:
from elasticsearch import Elasticsearch
from elasticsearch_llm_cache import ElasticsearchLLMCache, ElasticsearchLLMFilter
from elasticsearch.exceptions import NotFoundError

# common libraries
from dotenv import load_dotenv
import os
from os import environ
import openai
from icecream import ic

# load our environment file
load_dotenv()

es_url = f"https://{os.environ['elasticsearch_user']}:{os.environ['elasticsearch_pw']}@{os.environ['elasticsearch_host']}:{os.environ['elasticsearch_port']}"
# es_index= os.environ['elasticsearch_index']
os.environ['OPENAI_API_KEY'] = os.environ['openai_api_key']
open_api_key=os.environ['OPENAI_API_KEY']

# define our API Key
openai.api_key = os.getenv("openai_api_key")


In [None]:
es = Elasticsearch([es_url])

cache_index_name = 'llm_cache_test'
filter_index_name = 'llm_filter_test'
model_id = 'sentence-transformers__msmarco-minilm-l-12-v3'

In [None]:
import os
import time
#print(os.environ['ELASTIC_CLOUD_ID'])
#time.sleep(10)
from elasticsearch import Elasticsearch

from elasticsearch_llm_cache import (
    ElasticsearchLLMCache,  
    ElasticsearchLLMFilter,
)

from pprint import pprint
import time

es_client = es

if es_client.indices.exists(index=cache_index_name):
    es_client.indices.delete(index=cache_index_name)
    ic(f'{cache_index_name} exists, deleting.')

if es_client.indices.exists(index=filter_index_name):
    es_client.indices.delete(index=filter_index_name)
    ic(f'{filter_index_name} exists, deleting.')

# es_client.indices.create(index=cache_index_name)
# es_client.indices.create(index=filter_index_name)


# Initialize your caching class
cache = ElasticsearchLLMCache(es_client=es_client, index_name=cache_index_name, es_model_id=model_id, create_index=False)
cache.create_index(dims=384)

filter = ElasticsearchLLMFilter(es_client=es_client, index_name=filter_index_name, es_model_id=model_id, create_index=False)
filter.create_index(dims=384)


In [None]:
!eland_import_hub_model --url "$es_url" \
      --hub-model-id "sentence-transformers/msmarco-MiniLM-L-12-v3" \
      --task-type "text_embedding"

# Third Party fill-mask


# Question and Answer

# Text Embedding

# Text Classification

# Single label classification
You provide some text and get back "Positive" or "Negative" and a probability of a match.

In [None]:
!eland_import_hub_model --url "$es_url" \
      --hub-model-id "distilbert-base-uncased-finetuned-sst-2-english" \
      --task-type "text_classification" \
      --start

In [None]:
from elasticsearch import Elasticsearch
from elasticsearch.client import MlClient


def single_label_classify(text, es, model_id="distilbert-base-uncased-finetuned-sst-2-english"):
    models = MlClient.get_trained_models(es_client)
    for model in models["trained_model_configs"]:
        print(model["model_id"])


    #Run a query againt the model - this is the format the query imput must be used in, you can later map your features into this format through an ingest pipeline
    doc_test = {"text_field": text}

    result = MlClient.infer_trained_model(es, model_id =model_id, docs = doc_test)
    ic(result)
    return result["inference_results"]

res = single_label_classify(es=es, text="This totally, totally, totally sucks")
res

# Hate Speech Detection
You provide some text and get back "HATE" or "NON_HATE" and a probability of a match.

In [1]:
!eland_import_hub_model --url "$es_url" \
      --hub-model-id "Hate-speech-CNERG/dehatebert-mono-english" \
      --task-type "text_classification" \
      --start

In [None]:
from elasticsearch import Elasticsearch
from elasticsearch.client import MlClient


def hate_speech_classify(text, es, model_id="hate-speech-cnerg__dehatebert-mono-english"):
    models = MlClient.get_trained_models(es_client)
    for model in models["trained_model_configs"]:
        print(model["model_id"])


    #Run a query againt the model - this is the format the query imput must be used in, you can later map your features into this format through an ingest pipeline
    doc_test = {"text_field": text}

    result = MlClient.infer_trained_model(es, model_id =model_id, docs = doc_test)
    ic(result)
    return result["inference_results"]

res = hate_speech_classify(es=es, text="This totally, totally, totally sucks")
res

# Text Similarity

# Zero-shot classification

You provide some text and some labels that could potentially describe the text.
This will return an array with all the labels that matched (class name) and the probability of the match.
Optionally, you can provide a threshold where values lower than it will be ignored.

In [None]:
!eland_import_hub_model --url "$es_url" \
      --hub-model-id "valhalla/distilbart-mnli-12-6" \
      --task-type "zero_shot_classification"

In [None]:
from elasticsearch import Elasticsearch
from elasticsearch.client import MlClient

def zero_shot_classify(text, labels, es, model_id = "valhalla__distilbart-mnli-12-6", threshold = 0.5):

  doc_test = {"text_field": text}
  inference_config = {
      "zero_shot_classification": {
        "labels": labels,
        "multi_label": True
      }
    }

  result = MlClient.infer_trained_model(es, model_id =model_id, docs = doc_test, inference_config=inference_config)

  filtered_results = {}

  filtered_data = [item for item in result['inference_results'][0]['top_classes'] if item['class_probability'] >= threshold]

  return filtered_data

res = zero_shot_classify(es=es, text="Our city councilman is going to be at the event.", labels=["sports", "money", "family", "politics"])
res

In [None]:
res = filter(es=es, text="Tell me about the plant that blooms and has petals", labels=["sports", "money", "family", "flowers", "politics"], threshold=0.75)
res
