In [None]:
import pandas as pd

In [None]:
dataset = pd.read_csv("validation.csv")

dataset.head()

### <a id="client">Instantiating the client</a>

In [None]:
import openlayer

openlayer.api.OPENLAYER_ENDPOINT = "http://localhost:8080/v1"
openlayer.api.STORAGE = openlayer.api.StorageType.ONPREM

client = openlayer.OpenlayerClient("-QfWTgH1NcHyBofvHr_Cm5huswkowJpJ")


### <a id="project">Creating a project on the platform</a>

In [None]:
from openlayer.tasks import TaskType

project = client.load_project(
    name="Stripe Docs QA",
    task_type=TaskType.LLM,
)

### <a id="dataset">Uploading datasets</a>

Before adding the datasets to a project, we need to do Prepare a `dataset_config`.  

This is a Python dictionary that contains all the information needed by the Openlayer platform to utilize the dataset. It should include the column names, the input variable names, etc. For details on the `dataset_config` items, see the [API reference](https://reference.openlayer.com/reference/api/openlayer.OpenlayerClient.add_dataset.html#openlayer.OpenlayerClient.add_dataset).

Let's prepare the `dataset_config` for our validation set:

In [None]:
# Some variables that will go into the `dataset_config`
input_variable_names = ["user_question", "context"]
output_column_name = "model_output_json"
context_column_name = "context"
ground_truth_column_name = "ideal_json"
question_column_name = "user_question"

In [None]:
validation_dataset_config = {
    "contextColumnName": context_column_name,
    "groundTruthColumnName": ground_truth_column_name,
    "inputVariableNames": input_variable_names,
    "label": "validation",
    "outputColumnName": output_column_name,
    "questionColumnName": question_column_name
}

In [None]:
# Validation set
project.add_dataframe(
    dataset_df=dataset,
    dataset_config=validation_dataset_config,
)

We can confirm that the validation set is now staged using the `project.status()` method. 

In [None]:
project.status()

In [None]:
prompt_template = """
You are provided a user question and relevant context from the documentation.
You must answer the question taking into account the given context. Be polite and friendly. 
Your answer must be in JSON format with the fields:

- answer: your answer to the user query
- url: the url of the documentation page relevant to answer the question (given with the context)

question: {{ user_question }}
context: {{ context }}
"""
prompt = [
    {"role": "system", "content": "You are a helpful and polite assistant helping users understand documentation."},
    {"role": "user", "content": prompt_template}
]

In [None]:
# Note the camelCase for the keys
model_config = {
    "prompt": prompt,
    "inputVariableNames": ["user_question", "context"],
    "modelProvider": "OpenAI",
    "modelType": "api",
    "model": "gpt-3.5-turbo",
    "modelParameters": {
        "temperature": 0
    },
}

In [None]:
# Adding the model
project.add_model(
    model_config=model_config,
)

We can confirm that both the model and the validation set are now staged using the `project.status()` method. 

In [None]:
project.status()

### <a id="commit"> Committing and pushing to the platform </a>

Finally, we can commit the first project version to the platform. 

In [None]:
project.commit("Initial commit!")

In [None]:
project.status()

In [None]:
project.push()