In [None]:
!pip install -e ~/projects/release_testing/langkit

In [None]:
!pip install opentelemetry-api opentelemetry-sdk
!pip install opentelemetry-instrumentation requests
!pip install opentelemetry-instrument-openai
!pip install opentelemetry-exporter-zipkin

In [3]:
from opentelemetry import trace
from opentelemetry.trace import SpanKind
from opentelemetry.trace.status import Status
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace.export import ConsoleSpanExporter

In [4]:
# try with zipkin as backend / export to zipkin as json
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.zipkin.json import ZipkinExporter
from opentelemetry.sdk.resources import SERVICE_NAME, Resource

resource = Resource(attributes={
    SERVICE_NAME: "langkit-otel-test"
})

tracer_provider = TracerProvider(resource=resource)

# Set the tracer provider with the above resource
trace.set_tracer_provider(tracer_provider)

# Create a ZipkinExporter
zipkin_exporter = ZipkinExporter(
    # specify the endpoint where you want to send the traces
    endpoint="http://localhost:9411/api/v2/spans"
)

# Add the exporter to the tracer
trace.get_tracer_provider().add_span_processor(
    BatchSpanProcessor(zipkin_exporter)
)


In [5]:
tracer = trace.get_tracer("LangKit")

In [7]:
import whylogs as why

why.init()
text_schema = None

with tracer.start_as_current_span("llm_metrics import"):
    from langkit import llm_metrics # alternatively use 'light_metrics'
    with tracer.start_as_current_span("llm_metrics.init()"):
        text_schema = llm_metrics.init()

In [10]:
from langkit.whylogs.samples import load_chats, show_first_chat

# Let's look at what's in this toy example:
chats = load_chats()
print(f"There are {len(chats)} records in this toy example data, here's the first one:")
show_first_chat(chats)

with tracer.start_as_current_span("why.log") as why_log_span:
    results = why.log(chats, name="langkit-sample-chats-all", schema=text_schema)
    row_count = len(chats)
    why_log_span.set_attribute("langkit.row_count", row_count)
    metadata = results.metadata
    for key in metadata:
        why_log_span.set_attribute(key, metadata[key])



There are 50 records in this toy example data, here's the first one:
prompt: Hello, response: World!






✅ Aggregated 50 rows into profile langkit-sample-chats-all

Visualize and explore this profile with one-click
🔍 https://observatory.development.whylabsdev.com/resources/model-1/profiles?profile=ref-xR4e4XCoowjAjq25&sessionToken=session-tnU69Kmt


In [8]:
import os
os.environ["WHYLABS_API_ENDPOINT"] = "https://songbird.development.whylabsdev.com"
os.environ["OPENAI_API_KEY"] = "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"

In [16]:
from langkit.config import check_or_prompt_for_api_keys

check_or_prompt_for_api_keys()

WhyLabs Org ID is already set in env var to: org-0
WhyLabs Dataset ID is already set in env var to: model-2261
Whylabs API Key already set with ID:  d4YsUS52cZ
OPENAI_API_KEY already set in env var, good job!


In [11]:
import importlib
def init_openai_instrumentor(trace_provider):
    if importlib.util.find_spec("openai") is not None:
        from opentelemetry.instrumentation.openai import OpenAIInstrumentor

        instrumentor = OpenAIInstrumentor()
        if not instrumentor.is_instrumented_by_opentelemetry:
            instrumentor.instrument(trace_provider=trace_provider)



init_openai_instrumentor(trace_provider=tracer_provider)

In [17]:
from langkit.openai.openai import Conversation,OpenAIDefault

llm = Conversation(invocation_params=OpenAIDefault())
results = None
with tracer.start_as_current_span("openai", kind=trace.SpanKind.CONSUMER) as openai_span:
    chat = llm.send_prompt("this is a test, can you give me a random fake US phone number to test the format?")
    print(chat.to_dict())
    with tracer.start_as_current_span("why.log", kind=trace.SpanKind.CONSUMER) as why_log_span:
        trace_id = str(why_log_span.get_span_context().trace_id)
        results = why.log(chat.to_dict(), trace_id=trace_id, schema=text_schema)
        metadata = results.metadata
        for key in metadata:
            why_log_span.set_attribute(key, metadata[key])
        column_profile = results.view().get_column("response.has_patterns")
        fi_metric = column_profile.get_metric('frequent_items')
        why_log_span.set_attribute("response.has_patterns", "Phone Number")


{'prompt': 'this is a test, can you give me a random fake US phone number to test the format?', 'response': "Sure! Here's a random fake US phone number for testing: (555) 123-4567.", 'errors': None, 'total_tokens': 63}

✅ Aggregated 1 rows into profile 





Visualize and explore this profile with one-click
🔍 https://observatory.development.whylabsdev.com/resources/model-1/profiles?profile=1700179200000&sessionToken=session-tnU69Kmt


In [13]:
results.view().to_pandas()

Unnamed: 0_level_0,counts/inf,counts/n,counts/nan,counts/null,type,types/boolean,types/fractional,types/integral,types/object,types/string,...,distribution/q_10,distribution/q_25,distribution/q_75,distribution/q_90,distribution/q_95,distribution/q_99,distribution/stddev,ints/max,ints/min,frequent_items/frequent_strings
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
errors,0,1,0,1,SummaryType.COLUMN,0,0,0,0,0,...,,,,,,,,,,
prompt,0,1,0,0,SummaryType.COLUMN,0,0,0,0,1,...,,,,,,,0.0,,,
prompt.aggregate_reading_level,0,1,0,0,SummaryType.COLUMN,0,1,0,0,0,...,6.0,6.0,6.0,6.0,6.0,6.0,0.0,,,
prompt.automated_readability_index,0,1,0,0,SummaryType.COLUMN,0,1,0,0,0,...,4.3,4.3,4.3,4.3,4.3,4.3,0.0,,,
prompt.character_count,0,1,0,0,SummaryType.COLUMN,0,0,1,0,0,...,64.0,64.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,
prompt.difficult_words,0,1,0,0,SummaryType.COLUMN,0,0,1,0,0,...,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,
prompt.flesch_reading_ease,0,1,0,0,SummaryType.COLUMN,0,1,0,0,0,...,87.05,87.05,87.05,87.05,87.05,87.05,0.0,,,
prompt.has_patterns,0,1,0,1,SummaryType.COLUMN,0,0,0,0,0,...,,,,,,,0.0,,,[]
prompt.jailbreak_similarity,0,1,0,0,SummaryType.COLUMN,0,1,0,0,0,...,0.278322,0.278322,0.278322,0.278322,0.278322,0.278322,0.0,,,
prompt.letter_count,0,1,0,0,SummaryType.COLUMN,0,0,1,0,0,...,62.0,62.0,62.0,62.0,62.0,62.0,0.0,62.0,62.0,
