# AskWikidata


A prototype for a Wikidata Question Answering System.


## Quickstart Notebook

In [None]:
# Clone the askwikidata repo from Github.
!git clone https://github.com/rti/askwikidata && cp -r askwikidata/* . && rm -rf askwikidata

In [None]:
# Install required libraries.
!pip install -q langchain annoy sentence_transformers transformers touch pandas tqdm protobuf accelerate bitsandbytes safetensors sentencepiece

In [None]:
# Unzip all cache files provided with the askwikidata repository.
!bunzip2 --force --keep *.bz2

In [None]:
# Generate text representations of Wikidata items.
!python text_representation.py > text_representations.log

In [None]:
# Add askwikidata source to the python import paths.
import sys
sys.path.append('/content/')

# Setup the actual AskWikidata RAG system.
from askwikidata import AskWikidata

config = {
    "chunk_size": 1280,
    "chunk_overlap": 0,
    "index_trees": 1024,
    "retrieval_chunks": 16,
    "context_chunks": 5,
    "embedding_model_name": "BAAI/bge-small-en-v1.5",
    "reranker_model_name": "BAAI/bge-reranker-base",
    "qa_model_url": "mistralai/Mistral-7B-Instruct-v0.1",
}

askwikidata = AskWikidata(**config)
askwikidata.setup()

In [None]:
# Answer an example question.
print(askwikidata.ask("Who is the current mayor of Berlin? Since when do they serve?"))

In [None]:
# Answer any question.
query = input("AskWikidata >> ")
response = askwikidata.ask(query)
print("\n" + response + "\n")