# Getting Started

Welcome to the AI-model (GPT) API Test Notebook.  

This notebook shows how to run and test the AI-model (GPT) API via custom code. this notebook will use pytest framework to test the API with multiple use cases.
The notebook contains tests for the follwing API:
1. Data Search - use GPT model for data enrichment, you can select to use data in Sisense or not. 
1. Data Suggest - Use GPT model for column relations recommendations
1. Dashboard Summary - Use GPT model for dashboard summarization
1. Widget summary - Use GPT model for widget summarization

---

#### Jupyter Notebooks
If you are not familiar with Jupyter Notebooks, we suggest reading about it [here](https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/What%20is%20the%20Jupyter%20Notebook.html).

Get to know the Basics of Jupyter Notebooks, including how to add a New Notebook manually to the Jupyter Server [here](https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Notebook%20Basics.html).

---

This Tutorial shows the following basic principles:
1. How to use custom code notebooks as API within Sisense.
1. How to test custom code notebooks with pytest
1. How to use AI-model (GPT) extension as API within Sisense.


---

## Content
  
1. [Package Installation](#Installation)
1. [Imports](#Imports)
1. [Functions Utils](#Functions-Utils)  
1. [Tests](#Tests)

# Installation

In [None]:
!pip install -U ipytest
!pip install lxml

# Imports

In [None]:
import ipytest
import pytest
import SisenseAPI
import datetime
import json
import urllib.parse
import uuid
import re
import pandas as pd
from bs4 import BeautifulSoup


ipytest.autoconfig()

TEST_COOKIA = "XXX"


# Functions-Utils

In [None]:
def get_notebook_result_from_summary_response(response: dict) -> str:
    """
    extract textual result from summary API response
    @param response: a dict contain the API resonse
    return: string, result
    """
    cells_output_length = len(response["values"])
    result = response["values"][cells_output_length-1][0]['text']
    return result


def validate_summary_result(result: str): 
    """
    validate the textual result from summary API
    1. check that string result do not contain 'Error'
    2. check that summary is not empty string
    @param result: string, result
    return: None
    """
    assert 'Error' not in result, 'summary result contain Error'
    assert len(result)>0, 'summary result is empty string'
    
    
def get_payload_for_question(question:str, model_name:str, notebook_name: str, 
                             to_validate_entities_from_question:bool = True,
                             dashboard_name:str = "fake_dashboard",
                             table_name:str = "Temp", relation:str = ""):
    """
    create payload for AI-model custom code API
    @param question: string, requested question
    @param model_name: string, model name in Sisense
    @param notebook_name: string, notebook API to use
    @param to_validate_entities_from_question: boolean, true if the dashboard (/widget) names should be validated and compared to the data in Sisense, false otherwise.
    @param dashboard_name: string dashboard to use, if relevant to API
    @param table_name: string, table to use, if relevant to API
    @param relation: string, relation column ids in format "first_column_oid:second_column_oid,..", if relevant to API
    return: dict, payload
    """
    payload = {
        "datasource": { "title": 'AITransformation' },
        "metadata": [
            {
                "jaql": {
                    "table": "Empty",
                    "column": "Empty",
                    "dim": "[Empty.Empty]",
                    "filter": { "contains": datetime.datetime.now().isoformat()},
                    "datatype": "text",
                    "merged": True,
                    "title": "Empty"
                },
                "instanceid": "00128-E54A-D6",
                "field": {
                    "id": "[Empty.Empty]",
                    "index": 0
                }
            }
        ],
        "count": 1,
        "offset": 0,
        "transformData": [
            {
                "transformDataFormat": {
                    "CustomCodeTransformerFormatType": "Json"
                },
                "customCode": {
                    "codePath": f"/work/storage_notebooks/custom_code_notebooks/notebooks/{notebook_name}/{notebook_name}.ipynb",
                    "cellsDisable": [
                        0
                    ],
                    "language": "Python",
                    "serverUrl": "customcode:8888",
                    "mode": "Post",
                    "additionalParameters": "{\"question\":\"" + question +
                        "\",\"to_validate_entities_from_question\":" + json.dumps(to_validate_entities_from_question) +
                        ",\"model_name\":\"" + model_name + 
                        "\",\"relation\":\"" + relation +
                        "\",\"table_name\":\"" + table_name +
                        "\",\"widgetId\":\"" + str(uuid.uuid4()) +
                        "\",\"cookie\":\"" + TEST_COOKIA + "\"}"
                }
            }
        ],
        "isMaskedResult": True,
        "format": "json",
        "widgetType": "fakewidget",
        "by": "widget",
        "dashboard": dashboard_name + ";" + model_name,
        "queryGuid": str(uuid.uuid4())
    }
    return payload


def get_notebook_result_from_data_search_response(response: dict) -> str:
    """
    extract textual result from data search API response
    @param response: a dict contain the API resonse
    return: string with html textual result
    """
    cells_output_length = len(response["values"])
    result = response["values"][cells_output_length-2][0]['text']
    return result

def validate_data_search_result(result: str):    
    """
    validate the html result from data search API
    1. check the result table exist
    2. check that result table headers exist
    @param result: string with html result
    return: None
    """
    # check that result contain a result table before the dynamic log table
    assert result.index('table id')< result.index('AI Request response'), 'data search result do not contain result table before dynamic log table'

    soup = BeautifulSoup(result, features="html.parser")
    table = soup.find("table")
    # check that table exist
    assert table, 'data search result do not contain table'
    # check that the table contains headers
    headings = [th.get_text() for th in table.find("tr").find_all("th")]
    assert len(headings)>1, 'data search result do not contain table with headers'

    
def get_notebook_result_from_data_suggest_response(response: dict) -> str:
    """
    extract textual result from data suggest API response
    @param response: a dict contain the API resonse
    return: string with html textual result
    """
    result = response["values"][0][0]['text']
    return result


def validate_data_suggest_result(result:str, table_name:str):
    """
    validate the html result from data suggest API
    1. check the result table exist
    2. check that result table headers exist
    3. check that result table contain at least 1 suggestion
    4. check that suggestion contain the name of the requested table
    @param result: string, html result
    @param table_name: string, requested table for relation suggestion
    return: None
    """
    soup = BeautifulSoup(result, features="html.parser")
    tables = soup.findChildren('table')
    print(tables)
    # check that table exist
    table = tables[0]
    assert table, 'data suggest result do not contain table'

    # check that the table contains headers
    headings = [th.get_text() for th in table.find("tr").find_all("th")]
    assert len(headings)==5, 'data suggest result do not contain table with headers'

    # find rows in table
    rows = table.findChildren(['tr'])
    assert len(rows)>=2, 'data suggest result do not contain table with rows'
    # find that origin table (table1) is identical to the requested table
#     cells = rows[1].findChildren('td')   
#     assert cells[1].string == table_name, 'data suggest result do not contain the requested table'

    

# Tests

We use pytest framework to test custom code API, more info regarding pytest testing framework can be found 
[here](https://docs.pytest.org/en/7.2.x/).

### Summarization API Tests
The **test_summary** use to test both dashboard and widget summary API. The test function recieve multiple use cases, which are differ in the payload sends to the API. The payload's parameters are the following:
1. Textual question - the question contains the dashboard name and widget name.
1. Boolean value - true if the dashboard (/widget) names should be validated and compared to the data in Sisense, false otherwise.
1. Model name - the name of the dashboard's data model 
1. Notebook name - which API endpoint to use

The test validate that the API response do not contain error, also it checks that the summary is not empty string.

### Data Search API Tests
The **test_data_search** use to test data search API. The test function recieve multiple use cases, which are differ in the payload sends to the API. The payload's parameters are the following:
1. Textual question - the question, may contain refrence to data within sisense or not. the reference should be within closed brackets and could be reference to a names of table and column or a dashboard, widget and column.
1. Model name - the name of the data model (if the question contain reference to a sisense data then the name of the data model should be corresponding. Also, the generated table with the API result will created in the given data model) 
1. Notebook name - which API endpoint to use

The test validate that the API response do not contain error, also checks that result contain a result table.

### Data Suggest API Tests
The **test_data_suggest** use to test data suggest API. The test function recieve multiple use cases, which are differ in the payload sends to the API. The payload's parameters are the following:
1. table name - the table to find reletions to. 
1. Model name - the name of the data model (corresponding to the table)
1. Notebook name - which API endpoint to use

The test validate that the API response do not contain error, also checks that the result table contains the expected output.


To run the tests you will need to specify in **%%ipytest {---}** which test functions to run.

for example the following will run only the dats suggets tests:
**%%ipytest {test_data_suggest}**

In [None]:
%%ipytest {test_data_search}


@pytest.fixture
def get_sisense_connection(model_name):
    sisense_conn = SisenseAPI.SisenseAPI(cube_name=model_name, is_token_cookie=False,sisense_user_authentication_token=TEST_COOKIA)
    return sisense_conn


@pytest.mark.parametrize('question, to_validate_entities_from_question, model_name, notebook_name',[
    ("summarize dashboard sample healthcare ", True, "Sample Healthcare", 'DashboardSummary1'),
    ("model sample ecommerce dashboard sample-ecommerce", True, "Sample Healthcare", 'DashboardSummary1'),
    ("model sample ecommerce dashboard sample-ecommerce widget categories by revenue", True, "Sample Healthcare", 'WidgetSummary1'), 
    ("dashboard Sample - Healthcare", False, "Sample Healthcare", 'DashboardSummary1'),
    ("dashboard Sample - Healthcare model Sample Healthcare", True, "Sample ECommerce", 'DashboardSummary1'),
    ("summarize dashboard sample healthcare widget top diagnosis", True, "Sample Healthcare", 'WidgetSummary1'),
    ("dashboard Sample - Healthcare widget TOP 10 DIAGNOSIS", False, "Sample Healthcare", 'WidgetSummary1'),
    ("dashboard sample healthcare widget top diagnosis model sample healthcare", True, "Sample ECommerce", 'WidgetSummary1')

])
def test_summary(question, to_validate_entities_from_question, model_name, 
                 notebook_name, get_sisense_connection):
    print(f'start test summary for question: {question}\n, test notebook {notebook_name}') 
    sisense_conn = get_sisense_connection
    payload = get_payload_for_question(question, model_name, notebook_name, to_validate_entities_from_question)
    res = sisense_conn.call_api('POST','/api/datasources/' + model_name + '/jaql', payload=payload)
    res = json.loads(res.text)
    print(res)
    if 'error' in res:       
        assert not res['error']==True, "failed to apply summary"
    result = get_notebook_result_from_summary_response(res)
    validate_summary_result(result) 
    print("test finish successfully")
    
    
@pytest.mark.parametrize('question, model_name, notebook_name',[
    ("Give me the list of best 5 songs of this year. include the singer name, the date it went out and language of the song"
     , "Sample Healthcare", 'DataSearch1'),
    ("Give me the language in the following countries [country.country]"
     , "Sample ECommerce", 'DataSearch1'),
    ("Give me the language in the following countries [table country column country]"
     , "Sample ECommerce", 'DataSearch1'),
    ("What is the main organ in the body that is affected by each of the following diseases? [dashboard sample healthcare widget TOP 10 DIAGNOSIS column diagnosis]"
     , "Sample Healthcare", 'DataSearch1')  
])
def test_data_search(question, model_name, notebook_name, get_sisense_connection):  
    sisense_conn = get_sisense_connection
    payload = get_payload_for_question(question, model_name, notebook_name)
    res = sisense_conn.call_api('POST','/api/datasources/' + model_name + '/jaql', payload=payload)
    res = json.loads(res.text)
    print(res)
    if 'error' in res:
        assert not res['error']==True, "failed to apply data search"
    result = get_notebook_result_from_data_search_response(res)
    validate_data_search_result(result) 
    print("test finish successfully")

@pytest.mark.parametrize('table_name, model_name, notebook_name',[
    ("Country", "Sample ECommerce", 'DataSuggest1'),
    ("Commerce", "Sample ECommerce", 'DataSuggest1') 
])    
def test_data_suggest(table_name, model_name, notebook_name, get_sisense_connection):  
    sisense_conn = get_sisense_connection
    payload = get_payload_for_question('', model_name, notebook_name, table_name=table_name)
    res = sisense_conn.call_api('POST','/api/datasources/' + model_name + '/jaql', payload=payload)
    res = json.loads(res.text)
    if 'error' in res:
        print(res)
        assert not res['error']==True, "failed to apply data suggest"
    result = get_notebook_result_from_data_suggest_response(res)
    validate_data_suggest_result(result, table_name) 
    print("test finish successfully")

