In [None]:
from llama_index import Document, VectorStoreIndex
import os, json
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

#load documents
documents = []
directory = 'uva_articles_sample'
for article in os.listdir(directory):
    article_path = os.path.join(directory,article)
    with open(article_path, "r",encoding = 'utf-8') as f:
        article = json.loads(f.read())
    content = article['text']
    article.pop('text')
    article.pop('url')
    doc = Document(text=content,metadata=article)
    doc.id_ = article_path
    documents.append(doc)


for doc in documents:
    print(doc.metadata)




#things to do
# test metadata extraction with 10 documents, maybe make a custom one
# implement meta data extraction by creating nodes from the documents
# once the nodes are created (could take a brick), turn the nodes into indicies using the different index methods
#vector db to use: FAISS\
#chunk size = 1024
#chunk overlap = 128

: 

: 

In [3]:
#build service context for querying
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts.prompts import SimpleInputPrompt
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding
from llama_index import ServiceContext

system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
- StableLM will refuse to participate in anything that could harm a human.
"""

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")


query_llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="StabilityAI/stablelm-tuned-alpha-3b",
    model_name="StabilityAI/stablelm-tuned-alpha-3b",
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    # uncomment this if using CUDA to reduce memory usage
    #model_kwargs={"torch_dtype": torch.float16}
)


embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs = {'device': 'cpu'})
)

query_service_context = ServiceContext.from_defaults(chunk_size=1024,llm=query_llm,embed_model=embed_model)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
binary_path: c:\Users\Benjamin\Documents\Random Projects\UVA-QA-Model\QA-UVA-Model\.venv\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll
CUDA SETUP: Loading binary c:\Users\Benjamin\Documents\Random Projects\UVA-QA-Model\QA-UVA-Model\.venv\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
from llama_index import Document
from llama_index.node_parser import SimpleNodeParser

node_parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=128)

nodes = node_parser.get_nodes_from_documents(documents=documents, show_progress=True)

Parsing documents into nodes:   0%|          | 0/22 [00:00<?, ?it/s]

In [5]:
#load llm and embed models
from llama_index.llms import MockLLM
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding
llm = MockLLM()
'''embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs = {'device': 'cpu'})
)'''


'embed_model = LangchainEmbedding(\n  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs = {\'device\': \'cpu\'})\n)'

In [6]:
#building vector store index
from llama_index import VectorStoreIndex
from llama_index import ServiceContext, StorageContext
import faiss #only have faiss-cpu installed for now, to get gpu:pip install faiss-gpu
from llama_index.vector_stores import FaissVectorStore
service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)

#v_index = VectorStoreIndex(nodes=nodes,service_context=service_context,show_progress=True)

'''d = 768 
faiss_index = faiss.IndexFlatL2(d)
faiss_vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=faiss_vector_store)

v_faiss_index = VectorStoreIndex(nodes=nodes, service_context=query_service_context, storage_context=storage_context,show_progress=True)'''

'd = 768 \nfaiss_index = faiss.IndexFlatL2(d)\nfaiss_vector_store = FaissVectorStore(faiss_index=faiss_index)\nstorage_context = StorageContext.from_defaults(vector_store=faiss_vector_store)\n\nv_faiss_index = VectorStoreIndex(nodes=nodes, service_context=query_service_context, storage_context=storage_context,show_progress=True)'

In [9]:
#building query engine
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine

response_synthesizer = get_response_synthesizer(response_mode='compact', service_context=query_service_context)

retriever = VectorIndexRetriever(
    index=v_faiss_index,
    similarity_top_k=3,
    vector_store_query_mode="default",
    alpha=None,
    doc_ids=None,
) #https://gpt-index.readthedocs.io/en/latest/core_modules/data_modules/index/vector_store_guide.html

# build query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever, response_synthesizer=response_synthesizer
)

query_engine_faiss = v_faiss_index.as_query_engine()

In [10]:
#query
#response = query_engine.query("What did the Dave Matthews band do?")
response = query_engine_faiss.query("What did the Dave Matthews band do?")

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [11]:
from IPython.display import Markdown, display
#display(Markdown(f"<b>{response}</b>"))
with open("source_nodes_text.txt","w",encoding='utf-8') as f:
    f.write(response.get_formatted_sources(length=10000))
print(response.response)

The Dave Matthews Band is a student-run radio station in Virginia that plays a variety of unique songs and has been providing music education and community outreach for over two decades. The band has produced numerous notable performances, including the 17 Blockbusters Arts Highlights from 2017. The station has also hosted many prestigious events, including the Bicentennial Launch Celebration and the Concert for Charlottesville. The band has also produced several successful albums and has been recognized with numerous awards and accolades.


In [None]:
#faiss test
#works better than regular vector store
from llama_index import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore
from IPython.display import Markdown, display
documents = SimpleDirectoryReader("./uva_articles_sample").load_data()
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context,service_context=query_service_context)
query_engine = index.as_query_engine()
response = query_engine.query("What did the Dave Matthews band do?")
print(response)

In [None]:
#tree index test
from llama_index.graph_stores import SimpleGraphStore
from llama_index import KeywordTableIndex

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)
g_store_index = VectorStoreIndex(nodes=nodes, service_context=query_service_context, storage_context=storage_context,show_progress=True)


Extracting keywords from nodes:   0%|          | 0/62 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Benjamin\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


: 

: 

In [15]:
g_query_engine = g_store_index.as_query_engine()
response = g_query_engine.query("What did the Dave Matthews band do?")


Context information is below.
---------------------
title: 2 Alumnae to Continue Studies as Knight-Hennessy Scholars
author: Matt Kelly
date: 5/9/2022 6:19:37 PM
category: University News
description: Two University of Virginia graduates will delve into Black genre literature, and public and business administration as Knight-Hennessy Scholars.

Two University of Virginia alumnae will pursue their graduate studies at Stanford University as Knight-Hennessy scholars., Kristen “Kay” Barrett, a 2020 English graduate, will pursue a doctorate in English literature. Katie Deal, a 2017 political and social thought graduate, will pursue master’s degrees in public administration and business administration., Knight-Hennessy Scholars receive up to three years of funding to pursue graduate study at Stanford, as well as joint- and dual-degrees., The awards are funded by Stanford alumnus Phil Knight, a philanthropist and co-founder of Nike Inc., along with other benefactors. The Knight-Hennessy Schol

In [None]:
print(response)
print(response.source_nodes)