In [1]:
from gen_ai_hub.proxy.langchain import init_llm

llm = init_llm('gpt-4', temperature=0.0, max_tokens=2000) # used to do logical reasoning

In [2]:
#define fields for scenario

scenario_fields_description = {
    "scenario": "Define the task the user wants to perform, e.g. [train_classifier, train_regressor, predict_classifier, predict_regressor, train_timeseries, predict_timeseries]",
    "algorithm": "Define the algorithm to be used, e.g. [linear_regression, logistic_regression, random_forest, gradient_boosting, lstm, cnn, transformer, automl]",
    "dataset_table": "Define the dataset to be used, e.g. [iris, boston, mnist, cifar10, custom_dataset]",
}

In [3]:
from generative_ai_toolkit_for_sap_hana_cloud.agents.scenario_utility import get_fields_by_llm

query = "Train an automl classification model on the iris dataset"
scenario_details = get_fields_by_llm(query, scenario_fields_description, llm, is_wait_for_rate_limit=True, verbose=True, show_prompt=True)
print(scenario_details)

[94m[Prompt] In a machine learning scenario. The user question is "Train an automl classification model on the iris dataset". Please find Define the task the user wants to perform, e.g. [train_classifier, train_regressor, predict_classifier, predict_regressor, train_timeseries, predict_timeseries],Define the algorithm to be used, e.g. [linear_regression, logistic_regression, random_forest, gradient_boosting, lstm, cnn, transformer, automl],Define the dataset to be used, e.g. [iris, boston, mnist, cifar10, custom_dataset] from this user question. If so, return them wrapped by html opening and closing tag with the tag name scenario,algorithm,dataset_table respectively. If the user decides not to use them from the question, then return empty string wrapped by html opening and closing tag with the tag name scenario,algorithm,dataset_table respectively. Otherwise, do not return anything.[0m
[92m[AI] Finding fields...[0m [91m<scenario>train_classifier</scenario>
<algorithm>automl</algor

In [4]:
parameters_fields_description = {
    "key": "the key column of the dataset",
    "label": "the label column of the dataset",
}

In [14]:
query = "the key is ID and the label is SPECIES"

parameters_details = get_fields_by_llm(query, parameters_fields_description, llm, is_wait_for_rate_limit=True, verbose=True, show_prompt=True)
print(parameters_details)

INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d0eb1cc3bcf74d77/chat/completions?api-version=2023-09-01-preview "HTTP/1.1 200 OK"


[94m[Prompt] In a machine learning scenario. The user question is "the key is ID and the label is SPECIES". Please find the key column of the dataset,the label column of the dataset from this user question. If so, return them wrapped by html opening and closing tag with the tag name key,label respectively. If the user decides not to use them from the question, then return empty string wrapped by html opening and closing tag with the tag name key,label respectively. Otherwise, do not return anything.[0m
[92m[AI] Finding fields...[0m [91mFrom the user question, the key column of the dataset is "ID" and the label column of the dataset is "SPECIES". 

So, the return would be:

<key>ID</key>
<label>SPECIES</label>[0m
{'key': 'ID', 'label': 'SPECIES'}


In [6]:
from generative_ai_toolkit_for_sap_hana_cloud.agents.scenario_agents import _get_scenario_code_template
from generative_ai_toolkit_for_sap_hana_cloud.vectorstore.embedding_service import GenAIHubEmbeddings
from generative_ai_toolkit_for_sap_hana_cloud.vectorstore.hana_vector_engine import HANAMLinVectorEngine

from hana_ml import dataframe
url, port, user, pwd = "hana-ml-api.hana-ml.c.ap-cn-1.cloud.sap", 30015, "PAL_TEST", "Init1234"
connection_context = dataframe.ConnectionContext(url, port, user, pwd)
knowledge_table = "KNOWLEDGE_TABLE"

knowledge_base = HANAMLinVectorEngine(connection_context, knowledge_table)
code_template =  _get_scenario_code_template("training")
if code_template is not None:
    if not connection_context.has_table(knowledge_table):
        knowledge_base.upsert_knowledge(code_template)

  0%|          | 0/6 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d91561008ffafece/embeddings?api-version=2023-09-01-preview "HTTP/1.1 200 OK"
 17%|█▋        | 1/6 [00:01<00:06,  1.36s/it]INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d91561008ffafece/embeddings?api-version=2023-09-01-preview "HTTP/1.1 200 OK"
 33%|███▎      | 2/6 [00:01<00:03,  1.13it/s]INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d91561008ffafece/embeddings?api-version=2023-09-01-preview "HTTP/1.1 200 OK"
 50%|█████     | 3/6 [00:02<00:02,  1.49it/s]INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d91561008ffafece/embeddings?api-version=2023-09-01-preview "HTTP/1.1 200 OK"
 67%|██████▋   | 4/6 [00:02<00:01,  

In [16]:
code_template = knowledge_base.query(input=f"Fields\n{scenario_details}", embedding_function=GenAIHubEmbeddings())
print(code_template)

INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d91561008ffafece/embeddings?api-version=2023-09-01-preview "HTTP/1.1 200 OK"



Strictly use the Examples to create AutoML instance.
Do fitting:
>>> from hana_ml.algorithms.pal.auto_ml import AutomaticClassification
>>> model_train = AutomaticClassification(generations=2, population_size=5, offspring_size=5, max_eval_time_mins=10, successive_halving=True)
>>> model.enable_workload_class(workload_class_name=<WORKLOAD_CLASS>)
>>> model_train.fit(data=df, key=<key>, features=<features>, label=<label>)
>>> print({"output table names": model_train._fit_output_table_names, "Python object address": id(model_train)})


In [15]:
from generative_ai_toolkit_for_sap_hana_cloud.agents.hana_dataframe_agent import create_hana_dataframe_agent
from generative_ai_toolkit_for_sap_hana_cloud.agents.scenario_utility import execute_code_with_fields


agent = create_hana_dataframe_agent(llm=llm, df=connection_context.table(scenario_details["dataset_table"]))

query = "traing the model"
result = execute_code_with_fields(agent, query, parameters_details, code_template, is_wait_for_rate_limit=True, verbose=True, show_prompt=True)

INFO:httpx:HTTP Request: POST https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/d0eb1cc3bcf74d77/chat/completions?api-version=2023-09-01-preview "HTTP/1.1 200 OK"
INFO:hana_ml.ml_base:Executing SQL: DO
BEGIN
DECLARE param_name VARCHAR(5000) ARRAY;
DECLARE int_value INTEGER ARRAY;
DECLARE double_value DOUBLE ARRAY;
DECLARE string_value VARCHAR(5000) ARRAY;
param_name[1] := N'GENERATIONS';
int_value[1] := 2;
double_value[1] := NULL;
string_value[1] := NULL;
param_name[2] := N'POPULATION_SIZE';
int_value[2] := 5;
double_value[2] := NULL;
string_value[2] := NULL;
param_name[3] := N'OFFSPRING_SIZE';
int_value[3] := 5;
double_value[3] := NULL;
string_value[3] := NULL;
param_name[4] := N'CONFIG_DICT';
int_value[4] := NULL;
double_value[4] := NULL;
string_value[4] := N'{"LabelEncoder": {"IGNORE_UNKNOWN": [1]}, "OneHotEncoder": {"MINIMUM_FRACTION": [0.05, 0.1, 0.15, 0.2, 0.25], "IGNORE_UNKNOWN": [1]}, "Imputer": {"IMPUTATION_TYPE": [1, 2, 3]}, "Polynomia

[94m[Prompt] Fields: key: ID
label: SPECIES
. The user question is "traing the model". Please refer to the code template: 
Strictly use the Examples to create AutoML instance.
Do fitting:
>>> from hana_ml.algorithms.pal.auto_ml import AutomaticClassification
>>> model_train = AutomaticClassification(generations=2, population_size=5, offspring_size=5, max_eval_time_mins=10, successive_halving=True)
>>> model.enable_workload_class(workload_class_name=<WORKLOAD_CLASS>)
>>> model_train.fit(data=df, key=<key>, features=<features>, label=<label>)
>>> print({"output table names": model_train._fit_output_table_names, "Python object address": id(model_train)})[0m
[92m[AI] Executing code...[0m [91m{'output table names': ['#PAL_AUTOML_BEST_PIPELINE_TBL_0_3E6D3466_C1D6_11EF_B714_A84A633567F9', '#PAL_AUTOML_MODEL_TBL_0_3E6D3466_C1D6_11EF_B714_A84A633567F9', '#PAL_AUTOML_INFO_TBL_0_3E6D3466_C1D6_11EF_B714_A84A633567F9'], 'Python object address': 2335890900240}[0m


In [17]:
print(result)

{'output table names': ['#PAL_AUTOML_BEST_PIPELINE_TBL_0_3E6D3466_C1D6_11EF_B714_A84A633567F9', '#PAL_AUTOML_MODEL_TBL_0_3E6D3466_C1D6_11EF_B714_A84A633567F9', '#PAL_AUTOML_INFO_TBL_0_3E6D3466_C1D6_11EF_B714_A84A633567F9'], 'Python object address': 2335890900240}
