In [1]:
import minsearch
import json

In [2]:
with open('faq_crypto.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [3]:
documents = []

for doc in docs_raw:
    documents.append(doc)

In [4]:
documents[0]

{'question': 'What is a blockchain?',
 'answer': 'A blockchain is a distributed, cryptographically-secure database structure that allows network participants to establish a trusted and immutable record of transactional data without the need for intermediaries. A blockchain can execute a variety of functions beyond transaction settlement, such as smart contracts. Smart contracts are digital agreements that are embedded in code and that can have limitless formats and conditions. Blockchains have proven themselves as superior solutions for securely coordinating data, but they are capable of much more, including tokenization, incentive design, attack-resistance, and reducing counterparty risk. The very first blockchain was the Bitcoin blockchain, which itself was a culmination of over a century of advancements in cryptography and database technology.'}

In [5]:
index = minsearch.Index(
    text_fields=["question", "answer"],
    keyword_fields=[]
)

In [6]:
index.fit(documents)

<minsearch.Index at 0x1325b6503d0>

In [7]:
!pip install openai



In [8]:
from openai import OpenAI

In [9]:
client = OpenAI()

In [10]:
q = 'what is a blockchain?'

In [11]:
response = client.chat.completions.create(
    model='gpt-4o',
    messages=[{"role": "user", "content": q}]
)

response.choices[0].message.content

'A blockchain is a decentralized, distributed ledger technology that securely records transactions across many computers so that the record cannot be altered retroactively without the alteration of all subsequent blocks and the consensus of the network. Here are some key aspects to understand:\n\n1. **Decentralization**: Unlike traditional databases controlled by a single entity (like a bank or a corporation), a blockchain is maintained by a network of nodes (computers) that work together in a peer-to-peer manner.\n\n2. **Immutability**: Once data is recorded in a block and added to the blockchain, it is exceedingly difficult to change. This is because each block contains a cryptographic hash of the previous block, a timestamp, and transaction data. Altering a single block would require recalculating the hashes of all subsequent blocks, which is computationally impractical in a large network.\n\n3. **Transparency and Trust**: Since the ledger is distributed among all nodes in the netwo

In [12]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        boost_dict=boost,
        num_results=5
    )

    return results

In [13]:
def build_prompt(query, search_results):
    prompt_template = """
You're a personal assisstant which teach people which still beginner about investing in cryptocurrency. 
Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"\nquestion: {doc['question']}\nanswer: {doc['answer']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [14]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [15]:
query = 'what is a blockchain?'

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [16]:
rag(query)

'A blockchain is a distributed, cryptographically-secure database structure that allows network participants to establish a trusted and immutable record of transactional data without the need for intermediaries. It can execute a variety of functions beyond transaction settlement, such as smart contracts, which are digital agreements embedded in code with limitless formats and conditions. Blockchains can securely coordinate data and offer additional benefits like tokenization, incentive design, attack-resistance, and reducing counterparty risk. The first blockchain was the Bitcoin blockchain, which resulted from over a century of advancements in cryptography and database technology.'

In [17]:
from elasticsearch import Elasticsearch

In [18]:
es_client = Elasticsearch('http://localhost:9200') 

In [19]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "question": {"type": "text"},
            "answer": {"type": "text"} 
        }
    }
}

index_name = "crypto-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'crypto-questions'})

In [20]:
from tqdm.auto import tqdm

In [21]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/46 [00:00<?, ?it/s]

In [22]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "answer"],
                        "type": "best_fields"
                    }
                },
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [23]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [24]:
rag(query)

'A blockchain is a distributed, cryptographically-secure database structure that enables network participants to create a trusted and immutable record of transactional data without needing intermediaries. Beyond transaction settlement, blockchains can execute functions such as smart contracts, which are digital agreements embedded in code with various formats and conditions. Blockchains are excellent for securely coordinating data and can also be used for tokenization, incentive design, attack-resistance, and reducing counterparty risk. The first blockchain was the Bitcoin blockchain, which represented a culmination of advancements in cryptography and database technology over more than a century.'