In [1]:
!pip install llama-index-experimental

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
!pip install -r azure-search-vector-python-llamaindex-sample-requirements.txt --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import logging
import sys
# from IPython.display import Markdown, display

import pandas as pd
from llama_index.experimental.query_engine import PandasQueryEngine


logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
import os

In [5]:
load_dotenv(override=True) # take environment variables from .env.

# Make sure your .env file has values for the following environment variables
endpoint = os.environ["AZURE_SEARCH_SERVICE_ENDPOINT"]
credential = AzureKeyCredential(os.environ["AZURE_SEARCH_ADMIN_KEY"]) if len(os.environ["AZURE_SEARCH_ADMIN_KEY"]) > 0 else DefaultAzureCredential()
# index_name = os.environ["AZURE_SEARCH_INDEX"]
azure_openai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
# Llama Index does not support RBAC authentication, an API key is required
azure_openai_key = os.environ["AZURE_OPENAI_KEY"]
if len(azure_openai_key) == 0:
    raise Exception("API key required")
azure_openai_embedding_model = os.environ["AZURE_OPENAI_EMBEDDING_MODEL"]
azure_openai_embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
azure_openai_chatgpt_deployment = os.environ["AZURE_OPENAI_CHATGPT_DEPLOYMENT"]
azure_openai_api_version = os.environ["AZURE_OPENAI_API_VERSION"]
# embedding_dimensions = int(os.getenv("AZURE_OPENAI_EMBEDDING_DIMENSIONS", 1536))

In [6]:
from llama_index.llms.azure_openai import AzureOpenAI
llm = AzureOpenAI(
    deployment_name=azure_openai_chatgpt_deployment,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key
)

In [7]:
from llama_index.core import (
    SimpleDirectoryReader,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.core.settings import Settings

Settings.llm = llm

In [8]:
# Test on some sample data
df = pd.DataFrame(
    {
        "city": ["Toronto", "Tokyo", "Berlin"],
        "population": [2930000, 13960000, 3645000],
    }
)

In [9]:
query_engine = PandasQueryEngine(df=df, verbose=True)

In [11]:
response = query_engine.query(
    "What is the city with the highest population?",
)

INFO:httpx:HTTP Request: POST https://genai-azure-openai-eq-swe.openai.azure.com//openai/deployments/gpt-4-turbo/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
HTTP Request: POST https://genai-azure-openai-eq-swe.openai.azure.com//openai/deployments/gpt-4-turbo/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
> Pandas Instructions:
```
df.loc[df['population'].idxmax(), 'city']
```
> Pandas Output: Tokyo


In [12]:
response = query_engine.query(
    "What is the average population of all cities in the dataframe?",
)

INFO:httpx:HTTP Request: POST https://genai-azure-openai-eq-swe.openai.azure.com//openai/deployments/gpt-4-turbo/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
HTTP Request: POST https://genai-azure-openai-eq-swe.openai.azure.com//openai/deployments/gpt-4-turbo/chat/completions?api-version=2024-02-01 "HTTP/1.1 200 OK"
> Pandas Instructions:
```
df['population'].mean()
```
> Pandas Output: 6845000.0
