In [22]:
%%capture
!pip install semchunk tiktoken sentence_transformers elasticsearch openai

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [40]:
import time, json
import requests
import functools
from tqdm.auto import tqdm
import semchunk
import tiktoken
from sentence_transformers import SentenceTransformer
from elasticsearch import Elasticsearch
from openai import OpenAI

In [55]:
data_file_path = 'data/The_Adventure_of_the_Speckled_Band.txt'
with open(data_file_path, 'r') as file:
    content = file.read()

len(content)

52991

In [56]:
chunk_size = 100
chunker = semchunk.chunkerify(tiktoken.encoding_for_model('gpt-4o'), chunk_size)

In [57]:
chunks = chunker(content)
len(chunks)

162

In [66]:
len(chunks[0])

489

In [58]:
model_name = 'all-mpnet-base-v2'
embedding_model = SentenceTransformer(model_name)



In [59]:
embedding_size = embedding_model.get_sentence_embedding_dimension()
embedding_size

768

In [60]:
question = "What were the circumstances that led to the death of Julia Stoner?"
v = embedding_model.encode(question)

In [61]:
docs = []
for chunk in tqdm(chunks):
    doc = {
        'text': chunk,
        'vector': embedding_model.encode(chunk),
    }
    docs.append(doc)
len(docs)

100%|██████████| 162/162 [00:18<00:00,  8.54it/s]


162

In [67]:
url_elasticsearch = 'http://localhost:9200'

def create_elasticsearch_client():
    while True:
        try:
            response = requests.get(url_elasticsearch)
        except requests.ConnectionError:
            time.sleep(5)
        else:
            break
    client = Elasticsearch(url_elasticsearch)
    print(json.dumps(client.info().raw, indent=4))
    return client

In [68]:
es_client = create_elasticsearch_client()

{
    "name": "3d10f11edd60",
    "cluster_name": "docker-cluster",
    "cluster_uuid": "t-WOVq08TcelO6DwA352WQ",
    "version": {
        "number": "8.9.0",
        "build_flavor": "default",
        "build_type": "docker",
        "build_hash": "8aa461beb06aa0417a231c345a1b8c38fb498a0d",
        "build_date": "2023-07-19T14:43:58.555259655Z",
        "build_snapshot": false,
        "lucene_version": "9.7.0",
        "minimum_wire_compatibility_version": "7.17.0",
        "minimum_index_compatibility_version": "7.0.0"
    },
    "tagline": "You Know, for Search"
}


In [69]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "vector": {
                "type": "dense_vector",
                "dims": embedding_size,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}

index_name = "story_chunks"
es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'story_chunks'})

In [70]:
for doc in tqdm(docs):
    es_client.index(index=index_name, document=doc)

100%|██████████| 162/162 [00:03<00:00, 44.62it/s]


In [71]:
def elasticsearch_knn(field, vector):
    knn = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000
    }

    search_query = {
        "knn": knn,
        "_source": ["text"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )

    result_docs = []
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])
    return result_docs

In [72]:
results = elasticsearch_knn('vector', v)
results

[{'text': '"You can at least tell me whether my own thought is correct, and if she died from some sudden fright."\n"No, I do not think so. I think that there was probably some more tangible cause. And now, Miss Stoner, we must '},
 {'text': '"I had come to these conclusions before ever I had entered his room. An inspection of his chair showed me that he had been in the habit of standing on it, which of course would be necessary in order that he should reach the ventilator. The sight of the safe, the saucer of milk, and the loop of whipcord were enough to finally dispel any doubts which may have remained. The metallic clang heard by Miss Stoner was obviously caused by her step-father hastily closing the'},
 {'text': '"She died just two years ago, and it is of her death that I wish to speak to you. You can understand that, living the life which I have described, we were little likely to see anyone of our own age and position. We had, however, an aunt, my mother\'s maiden sister. Miss Hon

In [73]:
url_ollama = 'http://localhost:11434'

def create_ollama_client(model):
    while True:
        try:
            response = requests.get(url_ollama)
        except requests.ConnectionError:
            time.sleep(5)
        else:
            print(response.content.decode())
            break
    
    response = requests.post(f'{url_ollama}/api/pull', json={"name": model})
    print(response.status_code)
    print(response)

    while True:
        response = requests.get(f'{url_ollama}/api/tags')
        if len(response.json().get("models", [])) > 0:
            print(json.dumps(response.json(), indent=4))
            break
        time.sleep(5)

    api_key = "ollama"
    base_url = f"{url_ollama}/v1/"
    return OpenAI(api_key=api_key, base_url=base_url)

In [74]:
model_name = 'phi3'
ol_client = create_ollama_client(model_name)

Ollama is running
200
<Response [200]>
{
    "models": [
        {
            "name": "phi3:latest",
            "model": "phi3:latest",
            "modified_at": "2024-08-27T04:57:33.027550777Z",
            "size": 2176178913,
            "digest": "4f222292793889a9a40a020799cfd28d53f3e01af25d48e06c5e708610fc47e9",
            "details": {
                "parent_model": "",
                "format": "gguf",
                "family": "phi3",
                "families": [
                    "phi3"
                ],
                "parameter_size": "3.8B",
                "quantization_level": "Q4_0"
            }
        }
    ]
}


In [82]:
def build_prompt(query, search_results):
    prompt_template = """
You are an expert detective analyzing the details of the story "The Adventure of the Speckled Band." Answer the QUESTION using only the relevant information provided in the CONTEXT from the story.

Make sure to stay true to the facts in the CONTEXT when answering the QUESTION. Avoid adding any outside knowledge or assumptions.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context = ""
    for doc in search_results:
        context = context + f"text: {doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [83]:
def llm_ollama(client, prompt):
    response = client.chat.completions.create(
        model=model_name,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

In [84]:
def rag(search_func, llm_func, build_prompt_func, query, query_vec):
    search_results = search_func(query_vec)
    prompt = build_prompt_func(query, search_results)
    answer = llm_func(prompt)
    return answer

In [85]:
len(build_prompt(question, results))

2319

In [86]:
rag(
    search_func=functools.partial(elasticsearch_knn, 'vector'),
    llm_func=functools.partial(llm_ollama, ol_client),
    build_prompt_func=build_prompt,
    query=question,
    query_vec=v
)

"- Julia Stoner died two years ago after accidentally inhaling a venomous snake that Dr. Jekyll's step-father drugged the snake with for entertainment purposes to poison her without raising suspicion since they had no servants and their family members were also too young or old, as suggested by Mrs. Stoner’s description of Julia'0–3 being an uncommon age in that setting given social prejudices against youthful females lacking employable experience due to minimal working-age population mentioned implicitly near the beginning/end where no one from their own generation frequently encountered each other, adding layers such as her habitual standing on a chair and routine objects within reach further point towards Dr. Jekyll's manipulative nature exploiting these familial weaknesses intentionally without any mention of personal psychological issues contributing to self-harm or suicide implying external danger leading directly to accidental death, all inferred from the interlocked details sha