In [1]:
%%capture
!pip install llama-index==0.10.37 cohere==5.5.0 openai==1.30.1 llama-index-embeddings-openai==0.1.9 qdrant-client==1.9.1 llama-index-vector-stores-qdrant==0.2.8 

previously had this (llama-index-llms-cohere) above but removing solved pydantic problem

# Load Variables and Models 

In [2]:
import os
import sys
from getpass import getpass
import nest_asyncio

from IPython.display import Markdown, display

from dotenv import load_dotenv

nest_asyncio.apply()

load_dotenv()

sys.path.append('../helpers')

from utils import setup_llm

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /opt/miniconda3/envs/gaa_rules/lib/python3.9/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter CO API KEY: ")

In [4]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] or getpass("Enter your OpenAI API key: ")

In [5]:
QDRANT_URL = os.environ['QDRANT_URL'] or getpass("Enter your Qdrant URL:")

In [6]:
QDRANT_API_KEY = os.environ['QDRANT_API_KEY'] or  getpass("Enter your Qdrant API Key:")

In [7]:
from llama_index.core.settings import Settings
from utils import setup_llm, setup_embed_model, setup_vector_store

COLLECTION_NAME = "rules-of-the-gaa"

setup_llm(
    provider="cohere",
    model="command-r-plus",
    api_key=CO_API_KEY
)

# removed model_name from parameters here, defined in utils
setup_embed_model(
    provider="openai",
    api_key=OPENAI_API_KEY
)

vector_store = setup_vector_store(QDRANT_URL, QDRANT_API_KEY, COLLECTION_NAME)

Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


In [8]:
Settings.llm

Cohere(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x12c304850>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x129bee820>, completion_to_prompt=<function default_completion_to_prompt at 0x129caa040>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='command-r-plus', temperature=None, max_retries=10, additional_kwargs={}, max_tokens=8192)

In [9]:
Settings.embed_model

OpenAIEmbedding(model_name='text-embedding-3-large', embed_batch_size=100, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x12c304850>, num_workers=None, additional_kwargs={}, api_key='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', api_base='https://api.openai.com/v1', api_version='', max_retries=10, timeout=60.0, default_headers=None, reuse_client=True, dimensions=None)

# Load Docstore

In [10]:
from utils import get_documents_from_docstore

documents = get_documents_from_docstore("../data/rules-of-the-gaa")

In [11]:
documents[0].__dict__

{'id_': '63655c1b-27a2-4cf0-99d3-225ab1d30d4a',
 'embedding': None,
 'metadata': {'page_number': 0,
  'file_name': '../data/gaa_rules_simple.pdf',
  'title': 'OFFICIAL GUIDE - PART 2',
  'author': 'Gaelic Athletic Association'},
 'excluded_embed_metadata_keys': [],
 'excluded_llm_metadata_keys': [],
 'relationships': {},
 'text': 'level, and be 6.5m apart. The inside edge of the endline shall be at a tangent to the front edge of the base of the goalposts (see diagram). (ii) A CROSSBAR shall be fixed to the goalposts at a uniform height of 2.5m above the ground. The crossbar shall have a rectangular or circular cross section. When rectangular, it shall have a depth of 140mm + 10mm and a width not less than 50mm. When circular, it shall have a uniform diameter of 125mm + 5mm. (iii) GOALNETS shall be securely fixed to the back of the crossbar and the back of each goalpost. The roof of the net shall be supported for a horizontal distance of not less than 900mm, at crossbar height, by a met

# Ingest to VectorDatabase


In [12]:
from llama_index.core.constants import DEFAULT_CHUNK_SIZE
from llama_index.core.node_parser.text import SentenceSplitter
from llama_index.core import StorageContext
from utils import ingest

print(f"This is the chunk size: {DEFAULT_CHUNK_SIZE}")

tranforms = [
    SentenceSplitter(chunk_size=DEFAULT_CHUNK_SIZE),
    Settings.embed_model
]

nodes = ingest(
    documents=documents,
    transformations=tranforms,
    vector_store=vector_store,
)

This is the chunk size: 1024


# Build Index over Vector database

In [13]:
from utils import create_index, create_query_engine

storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

index = create_index(
    from_where="vector_store",
    embed_model=Settings.embed_model,
    vector_store=vector_store,
    # storage_context=storage_context
)

query_engine = create_query_engine(
    index=index,
    mode="query",
    # llm=Settings.llm
)

# Query

In [14]:
from utils import create_query_pipeline

from llama_index.core.query_pipeline import InputComponent

input_component = InputComponent()

chain = [input_component, query_engine]

query_pipeline = create_query_pipeline(chain)

In [20]:
response_1 = query_pipeline.run(input="How many points is a goal worth?")

[1;3;38;2;155;135;227m> Running module 00ef5ddc-89f8-4296-bfca-288a5a3be006 with input: 
input: How many points is a goal worth?

[0m[1;3;38;2;155;135;227m> Running module 1e66c42b-e2a6-481b-8a3b-856df814f85f with input: 
input: How many points is a goal worth?

[0m

In [21]:
print(response_1)

A goal is worth three points.


In [19]:
response_1.source_nodes[0].__dict__

{'node': TextNode(id_='c8b4364a-2401-427e-88c0-b99bb7732022', embedding=None, metadata={'page_number': 35, 'file_name': '../data/gaa_rules_simple.pdf', 'title': 'OFFICIAL GUIDE - PART 2', 'author': 'Gaelic Athletic Association'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='74b8ad3f-6115-4911-a209-4d00a522174f', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_number': 35, 'file_name': '../data/gaa_rules_simple.pdf', 'title': 'OFFICIAL GUIDE - PART 2', 'author': 'Gaelic Athletic Association'}, hash='f0be1d2dda9be2db55f3d44748df4db22614661f0d4834cda6dfaa15c23572c1')}, text='RULE 3 - SCORES 3.1 A goal is scored when the ball is played over the goal-line between the posts and under the crossbar by either team. A point is scored when the ball is played over the crossbar between the posts by either team. A goal is equivalent to three points. The team with the greater final total of points is the 

In [22]:
response_2 = query_pipeline.run(input="Is a goal worth 3 points?")

[1;3;38;2;155;135;227m> Running module 00ef5ddc-89f8-4296-bfca-288a5a3be006 with input: 
input: Is a goal worth 3 points?

[0m[1;3;38;2;155;135;227m> Running module 1e66c42b-e2a6-481b-8a3b-856df814f85f with input: 
input: Is a goal worth 3 points?

[0m

In [23]:
print(response_2)

Yes, a goal is worth three points.


In [25]:
response_2.source_nodes[0].__dict__

{'node': TextNode(id_='f8ef5a1d-d5b9-4d93-8960-3d23d33ee232', embedding=None, metadata={'page_number': 68, 'file_name': '../data/gaa_rules_simple.pdf', 'title': 'OFFICIAL GUIDE - PART 2', 'author': 'Gaelic Athletic Association'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='c4d782ca-b9d6-4e84-9df8-999b9e78a694', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_number': 68, 'file_name': '../data/gaa_rules_simple.pdf', 'title': 'OFFICIAL GUIDE - PART 2', 'author': 'Gaelic Athletic Association'}, hash='ed9bb72fae0f7175c69c1b64ae73eb139c7d4e86e0bad57a7130774eb6a21214')}, text='RULE 3 - SCORES 3.1 A goal is scored when the ball is played over the goal-line between the posts and under the crossbar by either team. A point is scored when the ball is played over the crossbar between the posts by either team. A goal is equivalent to three points. The team with the greater final total of points is the 

In [26]:
len(response_2.source_nodes)

1

In [27]:
response_3 = query_pipeline.run(input="In ladies football how many point is a 45 worth?")
print(response_3)

[1;3;38;2;155;135;227m> Running module 00ef5ddc-89f8-4296-bfca-288a5a3be006 with input: 
input: In ladies football how many point is a 45 worth?

[0m[1;3;38;2;155;135;227m> Running module 1e66c42b-e2a6-481b-8a3b-856df814f85f with input: 
input: In ladies football how many point is a 45 worth?

[0mA 45 can be worth two points if, following a foul, the ball goes directly over the bar or is deflected over by a member of the defending team. If the ball goes over the bar after being played again by the team awarded the 45, then it is worth one point.


In [28]:
response_3.source_nodes[0].__dict__

{'node': TextNode(id_='36d86944-6a0f-4b66-83fc-84a49e223b71', embedding=None, metadata={'page_number': 20, 'file_name': '../data/Playing-Rules-2021-1.pdf', 'title': 'LGFA GUIDE', 'author': 'LGFA'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='35b19edf-e933-45d6-bcd6-f10970e141ca', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_number': 20, 'file_name': '../data/Playing-Rules-2021-1.pdf', 'title': 'LGFA GUIDE', 'author': 'LGFA'}, hash='02a700ca3120ca3ad5f72056ae56c583b06f1527f64a07424f951ebfa14bc87f')}, text='Should the taker of the 45m kick foul the ball e.g ball not stationary, taken from the incorrect position etc., the 45m kick shall be retaken. If a player fouls the ball twice a throw in shall be awarded. Should an opposing player foul the 45m kick, on a regulation sized pitch, the 45m kick shall be brought forward 13 metres in a more advantageous position. A point scored off the groun