In [1]:

from llama_index.readers.google import GoogleSheetsReader
from llama_index.llms.azure_openai import AzureOpenAI
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding

from llama_index.llms.groq import Groq
from llama_index.core import VectorStoreIndex, Settings
import yaml


In [6]:


with open("creds.yaml", 'r') as f:
    config = yaml.safe_load(f)
    groq_key = config['groq']['key']
    aoi_api_key = config['azure-openai']['apikey']
    azure_endpoint = config['azure-openai']['endpoint']
    aoi_embed_deployment = config['azure-openai']['embed-deployment']
    aoi_embed_model = config['azure-openai']['embed-model']

llm = Groq(model="mixtral-8x7b-32768", api_key=groq_key)
Settings.llm = llm

embed_model = AzureOpenAIEmbedding(
    model=aoi_embed_model,
    deployment_name=aoi_embed_deployment,
    api_key=aoi_api_key,
    azure_endpoint=azure_endpoint,
    api_version = "2024-02-01"
)
Settings.embed_model = embed_model

In [7]:

list_of_sheets = ["1gt6m5CbK1y1eCLCBToJCRRT3dyFdwLSQTxeN0syb7zw"]
sheets = GoogleSheetsReader()
dataframes = sheets.load_data(list_of_sheets)

In [9]:
index = VectorStoreIndex.from_documents(dataframes)

In [10]:
chat = index.as_query_engine()


In [13]:
chat.query("List the datacol1 which don't have datacol2 as D1")

Response(response='The values in datacol1 that do not have D1 in datacol2 are B and D.', source_nodes=[NodeWithScore(node=TextNode(id_='e6274d0e-7954-41bb-b4f2-99200d9e9257', embedding=None, metadata={'spreadsheet_id': '1gt6m5CbK1y1eCLCBToJCRRT3dyFdwLSQTxeN0syb7zw'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='1gt6m5CbK1y1eCLCBToJCRRT3dyFdwLSQTxeN0syb7zw', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'spreadsheet_id': '1gt6m5CbK1y1eCLCBToJCRRT3dyFdwLSQTxeN0syb7zw'}, hash='001d367262c59b570fd6261b8a5ed7b8a770913e190e7f5c6299ff7245ccd508')}, text='Sheet1\ndatacol1\tdatacol2\nA\tD1\nB\tD2\nC\tD1\nD\tCX1', start_char_idx=0, end_char_idx=45, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), score=0.8073697323249436)], metadata={'e6274d0e-7954-41bb-b4f2-99200d9e9257': {'spreadsheet_id': '1gt6m5CbK1y1eCLCBToJCRRT3dyFdwLSQTxeN0syb7zw'}})