In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQA
import os
from jilm.model import JILMLangModel


gptj_model_load: loading model from '/mnt/c/Users/jeanb/models/ggml-gpt4all-j-v1.3-groovy.bin' - please wait ...
gptj_model_load: n_vocab = 50400
gptj_model_load: n_ctx   = 2048
gptj_model_load: n_embd  = 4096
gptj_model_load: n_head  = 16
gptj_model_load: n_layer = 28
gptj_model_load: n_rot   = 64
gptj_model_load: f16     = 2
gptj_model_load: ggml ctx size = 4505.45 MB
gptj_model_load: memory_size =   896.00 MB, n_mem = 57344
gptj_model_load: ................................... done
gptj_model_load: model size =  3609.38 MB / num tensors = 285


In [2]:
from jilm.document_loader import DocumentLoader
from jilm.settings import CHROMA_SETTINGS, PERSIST_DIRECTORY

In [3]:
embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME')
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)


[2023-05-22 21:51:15,517] {SentenceTransformer.py:66} INFO - Load pretrained SentenceTransformer: all-mpnet-base-v2


  from .autonotebook import tqdm as notebook_tqdm


[2023-05-22 21:51:18,708] {SentenceTransformer.py:105} INFO - Use pytorch device: cuda


In [4]:
doc = DocumentLoader.load_single_document("/home/jbp/projects/jilm/README.md")


[2023-05-22 21:51:19,367] {xml.py:96} INFO - Reading document from string ...
[2023-05-22 21:51:19,369] {html.py:99} INFO - Reading document ...


In [5]:
doc 

Document(page_content='JILM\n\nJilm enhances your AI capabilities.', metadata={'source': '/home/jbp/projects/jilm/README.md'})

In [6]:
doc.__dict__

{'page_content': 'JILM\n\nJilm enhances your AI capabilities.',
 'metadata': {'source': '/home/jbp/projects/jilm/README.md'}}

In [9]:
# Create and store locally vectorstore
db = Chroma.from_documents([doc], embeddings, persist_directory=PERSIST_DIRECTORY, client_settings=CHROMA_SETTINGS)
db.persist()

[2023-05-22 21:52:00,967] {__init__.py:91} INFO - Running Chroma using direct local API.
[2023-05-22 21:52:00,995] {duckdb.py:467} INFO - loaded in 1 embeddings
[2023-05-22 21:52:00,999] {duckdb.py:479} INFO - loaded in 1 collections
[2023-05-22 21:52:01,002] {duckdb.py:92} INFO - collection with name langchain already exists, returning existing collection
['JILM\n\nJilm enhances your AI capabilities.']
[{'source': '/home/jbp/projects/jilm/README.md'}]


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.27it/s]

[2023-05-22 21:52:01,459] {duckdb.py:421} INFO - Persisting DB to disk, putting it in the save folder: tmp/vector-db





In [10]:
retriever = db.as_retriever()

In [18]:
retriever.__dict__

{'vectorstore': <langchain.vectorstores.chroma.Chroma at 0x7fdbadb615a0>,
 'search_type': 'similarity',
 'search_kwargs': {}}

In [17]:
db.__dict__

{'_client_settings': Settings(environment='', chroma_db_impl='duckdb+parquet', chroma_api_impl='local', clickhouse_host=None, clickhouse_port=None, persist_directory='tmp/vector-db', chroma_server_host=None, chroma_server_http_port=None, chroma_server_ssl_enabled=False, chroma_server_grpc_port=None, chroma_server_cors_allow_origins=[], anonymized_telemetry=False),
 '_client': <chromadb.api.local.LocalAPI at 0x7fdbc47897b0>,
 '_embedding_function': HuggingFaceEmbeddings(client=SentenceTransformer(
   (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
   (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
   (2): Normalize()
 ), model_name='all-mpnet-base-v2', cache_folder=None, model_kwargs={}),
 '_persist_directory': 'tmp/vector-db',
 '_collection': Collection(name=langchain)}

In [14]:
llm = JILMLangModel(callbacks=[StreamingStdOutCallbackHandler()])

In [None]:
#llm = JILMLangModel(retriever=retriever, embeddings=embeddings, max_tokens=1000, chunk_size=64, chunk_overlap=0)
#llm = JILMLangModel(callbacks=[StreamingStdOutCallbackHandler()], retriever=retriever, embeddings=embeddings, max_tokens=1000, chunk_size=64, chunk_overlap=0)

In [15]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

In [19]:
qa.__dict__

{'memory': None,
 'callbacks': None,
 'callback_manager': None,
 'verbose': False,
 'combine_documents_chain': StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:", template_format='f-string', validate_template=True), llm=JILMLangModel(cache=None, verbose=False, callbacks=[<langchain.callbacks.streaming_stdout.StreamingStdOutCallbackHandler object at 0x7fdbadb61a50>], callback_manager=None, n_ctx=512, n_parts=-1, seed=0, f16_kv=False, logits_all=False, vocab_only=False, use_mlock=False, 

In [16]:
query = "What is the name of the project?"
res = qa(query)    
answer, docs = res['result'], res['source_documents']

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.20it/s]


NameError: name 'chromadb' is not defined

In [None]:
for document in docs:
    print("\n> " + document.metadata["source"] + ":")
    print(document.page_content)