# Setup
install openai and setup API key

This notebook works both in a Colab environment and on local machine

Colab:
- API key must be saved in Colab sectrets as OPENAI_API_KEY

Local:
- API key must be defined in the .env file (refer to example.env)

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
IN_COLAB

In [None]:
if IN_COLAB:
    %pip install openai
    %pip install requests
    %pip install -U instructor
    %pip install anthropic
    %pip install langsmith
    import os
    from google.colab import userdata
    os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
    os.environ["LANGCHAIN_API_KEY"] = userdata.get("LANGCHAIN_API_KEY")
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
    os.environ["LANGCHAIN_PROJECT"] = "GHPT_Instructor"

### load files into Colab and install package

NOTE: you may need to refresh your Colab files directory to see changes

In [None]:
if IN_COLAB:
    # remove the existing directory
    import shutil
    shutil.rmtree('/content/GHPT-experiments/', ignore_errors=True)
    !git clone "https://github.com/samgregson/GHPT-experiments"
    %pip install -e /content/GHPT-experiments/
    # add the modules to the search path
    import site
    site.main()

Load the correct SSL certificates

In [None]:
if not IN_COLAB:
    from dotenv import load_dotenv
    import os
    import ssl
    load_dotenv()
    context = ssl.create_default_context(cafile=os.environ.get("REQUESTS_CA_BUNDLE"))
    print(os.environ.get("REQUESTS_CA_BUNDLE"))

test SSL certificate, this should return `<Response [200]>` if not try restarting the Jupyter kernel

In [None]:
if not IN_COLAB:
    import requests
    # requests.get("https://fastapi-production-e161.up.railway.app/docs")
    requests.get("https://api.smith.langchain.com/docs")

### OpenAI

In [None]:
from openai import OpenAI, AsyncOpenAI
from langsmith.wrappers import wrap_openai

client = AsyncOpenAI()

# Custom wrap for VSCode, needs to be the first wrap!
if not IN_COLAB:
    from patch_openai.patch_openai import patch_openai
    client = patch_openai(client)

# Wrap the OpenAI client with LangSmith
client = wrap_openai(client)

In [None]:
import instructor

client_instructor = instructor.from_openai(client, mode=instructor.Mode.TOOLS)

In [None]:
from data.components import get_components_with_embeddings

get_components_with_embeddings()

# Evals

In [None]:
from langsmith import Client
from validation_set import inputs, outputs

ls_client = Client()

# Define dataset: these are your test cases
dataset_name = "Simple Example Dataset"
if len(list(ls_client.list_datasets(dataset_name=dataset_name))) == 0:
    dataset = ls_client.create_dataset(dataset_name)
    ls_client.create_examples(
        inputs=[
            {"question": input} for input in inputs
        ],
        dataset_id=dataset.id,
    )

In [None]:
from pipeline.pipeline import run_pipeline
import asyncio
import nest_asyncio
nest_asyncio.apply()

index = 1

async def run_all():
    tasks = []
    for i, input in enumerate(inputs):
        if i < index: continue # start index
        if i > index: break # finish index
        tasks.append(run_pipeline(client=client_instructor, user_prompt=input))
    return await asyncio.gather(*tasks)

all_output = await run_all()

print(all_output)

In [None]:
from langsmith.evaluation import evaluate

# experiment_results = evaluate(
#     lambda inputs: asyncio.run(
#         run_pipeline(client=client_instructor, user_prompt=inputs["user_prompt"])
#     ),
#     data=ls_client.list_examples(example_ids=['e6564fb7-e626-4d25-ac9e-48d5219b6c1f']),
#     # data=dataset_name,
#     evaluators=[],
#     experiment_prefix="simple-experiment",
#     metadata={
#       "description": "'instructor' baseline"
#     },
# )