In [None]:
pip install Ollama

In [None]:
pip install trulens trulens-apps-langchain trulens-providers-litellm litellm langchain

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import ChatPromptTemplate
from langchain.prompts.chat import HumanMessagePromptTemplate
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.apps.langchain import TruChain

session = TruSession()
session.reset_database()

In [None]:
from langchain_ollama import OllamaLLM

# Initialize Ollama with the correct class name
ollama = OllamaLLM(base_url="http://localhost:11434", model="llama2")

# Query the model
response = ollama("Why is the sky blue?")
print(response)

In [None]:
pip install langchain_ollama

In [None]:
import requests

# Ollama's local server URL (adjust as needed)
ollama_url = "http://localhost:11434/models"

# Send a GET request to fetch models
response = requests.get(ollama_url)

# Check the response status and print the available models
if response.status_code == 200:
    models = response.json()
    print("Available models:", models)
else:
    print(f"Error fetching models: {response.status_code}")

In [None]:
import torch
from transformers import pipeline
from trulens_eval import Tru, TruChain
from trulens_eval.keys import openai
from trulens_eval.feedback import Feedback
from trulens_eval.instruments import Instrument
from trulens_eval.schema import FeedbackResult
import numpy as np

# Disable the OpenAI dependency warning
openai.api_key = "not_required"

# Initialize TruLens tracking
tru = Tru()

# Create a Hugging Face text generation pipeline
generator = pipeline(
    'text-generation', 
    model='gpt2', 
    device=0 if torch.cuda.is_available() else -1
)

# Define custom feedback functions
def coherence_feedback(output: str) -> float:
    """
    Simple coherence check: Measure sentence variation 
    and complexity of generated text
    """
    sentences = output.split('.')
    if len(sentences) <= 1:
        return 0.0
    
    sentence_lengths = [len(sentence.split()) for sentence in sentences if sentence.strip()]
    
    if not sentence_lengths:
        return 0.0
    
    length_variation = np.std(sentence_lengths)
    avg_length = np.mean(sentence_lengths)
    
    # Normalize score between 0 and 1
    coherence_score = min(1.0, max(0.0, 1 - (length_variation / avg_length)))
    return coherence_score

def toxicity_feedback(output: str) -> float:
    """
    Basic toxicity estimation
    Returns 1.0 if toxic, 0.0 if not toxic
    """
    toxic_words = ['hate', 'stupid', 'idiot', 'terrible', 'worst']
    output_lower = output.lower()
    
    toxicity_count = sum(1 for word in toxic_words if word in output_lower)
    return min(1.0, toxicity_count * 0.2)

# Create feedback functions
coherence_fn = Feedback(coherence_feedback)
toxicity_fn = Feedback(toxicity_feedback)

# Wrapper function for LLM call
def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with error handling
    """
    try:
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True
        )
        return result[0]['generated_text']
    except Exception as e:
        print(f"Generation error: {e}")
        return "Error in text generation"

# Instrument the text generation function
instrumented_generate = Instrument(generate_text)

# Create a TruChain to track the generation
with tru.chain(
    instrumented_generate, 
    feedbacks=[coherence_fn, toxicity_fn]
) as chain:
    # Example usage
    prompt = "Tell me a short story about adventure"
    output = chain(prompt)
    print("Generated Text:", output)

# Record the record
tru.run_dashboard()

# Optional: Print out results
tru.get_records_and_feedback()

In [None]:
pip install openai

In [None]:
pip install trulens-eval transformers torch

In [None]:
pip install trulens-eval transformers torch numpy

In [None]:
import os
import torch
from transformers import pipeline
from trulens_eval import Tru, Feedback
from trulens_eval.experimental.instrumentation import Instrument
import numpy as np

# Initialize TruLens tracking
tru = Tru()

# Create a Hugging Face text generation pipeline
generator = pipeline(
    'text-generation', 
    model='gpt2', 
    device=0 if torch.cuda.is_available() else -1
)

# Define custom feedback functions
def coherence_feedback(output: str) -> float:
    """
    Simple coherence check: Measure sentence variation 
    and complexity of generated text
    """
    sentences = output.split('.')
    if len(sentences) <= 1:
        return 0.0
    
    sentence_lengths = [len(sentence.split()) for sentence in sentences if sentence.strip()]
    
    if not sentence_lengths:
        return 0.0
    
    length_variation = np.std(sentence_lengths)
    avg_length = np.mean(sentence_lengths)
    
    # Normalize score between 0 and 1
    coherence_score = min(1.0, max(0.0, 1 - (length_variation / avg_length)))
    return coherence_score

def toxicity_feedback(output: str) -> float:
    """
    Basic toxicity estimation
    Returns 1.0 if toxic, 0.0 if not toxic
    """
    toxic_words = ['hate', 'stupid', 'idiot', 'terrible', 'worst']
    output_lower = output.lower()
    
    toxicity_count = sum(1 for word in toxic_words if word in output_lower)
    return min(1.0, toxicity_count * 0.2)

# Create feedback functions using Feedback class
coherence_fn = Feedback(coherence_feedback)
toxicity_fn = Feedback(toxicity_feedback)

# Wrapper function for LLM call
def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with error handling
    """
    try:
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True
        )
        return result[0]['generated_text']
    except Exception as e:
        print(f"Generation error: {e}")
        return "Error in text generation"

# Instrument the text generation function
@Instrument
def instrumented_generate(prompt: str) -> str:
    return generate_text(prompt)

# Configure TruLens logging directory
tru_logging_dir = os.path.join(os.getcwd(), 'trulens_logs')
os.makedirs(tru_logging_dir, exist_ok=True)
tru.config.database_path = tru_logging_dir

# Record the record with feedbacks
def main():
    with tru.chain(
        instrumented_generate, 
        feedbacks=[coherence_fn, toxicity_fn]
    ) as chain:
        # Example usage
        prompt = "Tell me a short story about adventure"
        output = chain(prompt)
        print("Generated Text:", output)

    # Generate and print records
    records = tru.get_records_and_feedback()
    print("\nRecords and Feedback:")
    for record in records:
        print(f"Prompt: {record.prompt}")
        print(f"Response: {record.response}")
        print("Feedback Scores:")
        for feedback in record.feedback:
            print(f"  {feedback.name}: {feedback.result}")
        print("-" * 50)

    # Optional: Run dashboard
    tru.run_dashboard()

if __name__ == "__main__":
    main()

In [None]:
pip install trulens_eval.experimental

In [None]:
import os
import torch
from transformers import pipeline
from trulens_eval import Tru, Feedback
from trulens_eval.utils.containers import Context
import numpy as np

# Initialize TruLens tracking
tru = Tru()

# Create a Hugging Face text generation pipeline
generator = pipeline(
    'text-generation', 
    model='gpt2', 
    device=0 if torch.cuda.is_available() else -1
)

# Define custom feedback functions
def coherence_feedback(context: Context) -> float:
    """
    Simple coherence check: Measure sentence variation 
    and complexity of generated text
    """
    output = context.get_response()
    sentences = output.split('.')
    if len(sentences) <= 1:
        return 0.0
    
    sentence_lengths = [len(sentence.split()) for sentence in sentences if sentence.strip()]
    
    if not sentence_lengths:
        return 0.0
    
    length_variation = np.std(sentence_lengths)
    avg_length = np.mean(sentence_lengths)
    
    # Normalize score between 0 and 1
    coherence_score = min(1.0, max(0.0, 1 - (length_variation / avg_length)))
    return coherence_score

def toxicity_feedback(context: Context) -> float:
    """
    Basic toxicity estimation
    Returns 1.0 if toxic, 0.0 if not toxic
    """
    output = context.get_response()
    toxic_words = ['hate', 'stupid', 'idiot', 'terrible', 'worst']
    output_lower = output.lower()
    
    toxicity_count = sum(1 for word in toxic_words if word in output_lower)
    return min(1.0, toxicity_count * 0.2)

# Create feedback functions
coherence_fn = Feedback(coherence_feedback)
toxicity_fn = Feedback(toxicity_feedback)

# Wrapper function for LLM call
def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with error handling
    """
    try:
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True
        )
        return result[0]['generated_text']
    except Exception as e:
        print(f"Generation error: {e}")
        return "Error in text generation"

# Configure TruLens logging directory
tru_logging_dir = os.path.join(os.getcwd(), 'trulens_logs')
os.makedirs(tru_logging_dir, exist_ok=True)
tru.config.database_path = tru_logging_dir

def main():
    # Record the record with feedbacks
    with tru.track(
        generate_text, 
        feedbacks=[coherence_fn, toxicity_fn]
    ) as chain:
        # Example usage
        prompt = "Tell me a short story about adventure"
        output = chain(prompt)
        print("Generated Text:", output)

    # Generate and print records
    records = tru.get_records_and_feedback()
    print("\nRecords and Feedback:")
    for record in records:
        print(f"Prompt: {record.get_input()}")
        print(f"Response: {record.get_response()}")
        print("Feedback Scores:")
        for feedback in record.feedbacks:
            print(f"  {feedback.name}: {feedback.result}")
        print("-" * 50)

    # Optional: Run dashboard
    tru.run_dashboard()

if __name__ == "__main__":
    main()

In [None]:
pip install trulens-eval

In [None]:
import os
from typing import Any, Dict
import torch
from transformers import pipeline
from trulens_eval import Tru, TruChain
from trulens_eval.feedback import Feedback, Groundedness
from trulens_eval.feedback.provider.openai import OpenAI

# Initialize TruLens and OpenAI feedback provider
tru = Tru()
openai = OpenAI()

# Create a Hugging Face text generation pipeline
generator = pipeline(
    'text-generation', 
    model='gpt2', 
    device=0 if torch.cuda.is_available() else -1
)

def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with error handling
    """
    try:
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True
        )
        return result[0]['generated_text']
    except Exception as e:
        print(f"Generation error: {e}")
        return "Error in text generation"

def main():
    # Configure logging directory
    tru_logging_dir = os.path.join(os.getcwd(), 'trulens_logs')
    os.makedirs(tru_logging_dir, exist_ok=True)
    tru.config.database_path = tru_logging_dir

    # Define feedback functions using OpenAI
    # Groundedness check
    grounded = Groundedness(groundedness_provider=openai)
    groundedness_feedback = Feedback(grounded.groundedness_measure_with_cot_reasons).on_output()

    # Relevance feedback
    relevance_feedback = Feedback(openai.relevance).on_input_output()

    # Quality feedback
    quality_feedback = Feedback(openai.quality).on_output()

    # Track the text generation with TruChain
    chain = TruChain(
        generate_text, 
        app_id = 'GPT2_Text_Generation_App',
        feedbacks = [
            groundedness_feedback, 
            relevance_feedback, 
            quality_feedback
        ]
    )

    # Example prompts
    prompts = [
        "Tell me a short story about adventure",
        "Describe a peaceful mountain landscape",
        "Write a brief poem about friendship"
    ]

    # Generate and track responses
    for prompt in prompts:
        print(f"\nPrompt: {prompt}")
        # Use the chain to generate and record
        response = chain.run(prompt)
        print(f"Response: {response}")

    # Retrieve and display records
    records = tru.get_records_and_feedback(app_ids=['GPT2_Text_Generation_App'])
    print("\n--- Feedback Records ---")
    for record in records:
        print(f"Prompt: {record.input}")
        print(f"Response: {record.output}")
        print("Feedback:")
        for feedback in record.feedback:
            print(f"  {feedback.name}: {feedback.result}")
        print("-" * 50)

    # Open the dashboard
    tru.run_dashboard()

if __name__ == "__main__":
    main()

In [None]:
pip install arize-phoenix transformers torch pandas numpy

In [None]:
import os
import phoenix as px
import openai
from transformers import pipeline
import pandas as pd
import numpy as np

# Configuration and setup
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

# Configure OpenAI (optional, for more advanced uses)
# openai.api_key = 'your-api-key'

def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with robust error handling
    """
    try:
        # Use GPT-2 for text generation
        generator = pipeline('text-generation', model='gpt2')
        
        # Generate text with safety checks
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True
        )
        
        # Extract and clean generated text
        generated_text = result[0]['generated_text'].strip()
        return generated_text
    
    except Exception as e:
        print(f"Text generation error: {e}")
        return f"Generation error: {str(e)}"

def main():
    # Initialize Phoenix session
    session = px.launch_app()

    # Prepare example prompts
    prompts = [
        "Tell me a short story about adventure",
        "Describe a peaceful mountain landscape",
        "Write a brief poem about friendship"
    ]

    # Lists to collect data for DataFrame
    generations = []

    # Generate texts and collect data
    for prompt in prompts:
        response = generate_text(prompt)
        
        # Collect generation data
        generation_data = {
            'prompt': prompt,
            'response': response,
            'response_length': len(response),
            'word_count': len(response.split()),
        }
        generations.append(generation_data)

    # Create DataFrame for tracking
    generations_df = pd.DataFrame(generations)

    # Basic custom evaluations
    def calculate_complexity(text):
        """Calculate text complexity based on word length and sentence structure"""
        words = text.split()
        avg_word_length = np.mean([len(word) for word in words])
        unique_word_ratio = len(set(words)) / len(words)
        return avg_word_length * unique_word_ratio

    generations_df['complexity_score'] = generations_df['response'].apply(calculate_complexity)

    # Log DataFrame to Phoenix
    px.log_dataframe(generations_df, 'text_generations')

    # Print out generations for verification
    print("\nGeneration Results:")
    print(generations_df)

    # Keep the Phoenix session open
    session.wait()

if __name__ == "__main__":
    main()

In [1]:
pip install arize-phoenix transformers torch pandas numpy

Note: you may need to restart the kernel to use updated packages.


In [3]:
import os
import phoenix as px
import pandas as pd
import numpy as np
import torch
from transformers import pipeline

def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with GPU support
    """
    try:
        # Use GPU if available
        device = 0 if torch.cuda.is_available() else -1
        
        # Generate text with explicit truncation
        generator = pipeline(
            'text-generation', 
            model='gpt2', 
            device=device,
            truncation=True
        )
        
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True,
            truncation=True
        )
        
        generated_text = result[0]['generated_text'].strip()
        return generated_text
    
    except Exception as e:
        print(f"Text generation error: {e}")
        return f"Generation error: {str(e)}"

def calculate_complexity(text):
    """Calculate text complexity based on word length and sentence structure"""
    try:
        words = text.split()
        if not words:
            return 0
        
        avg_word_length = np.mean([len(word) for word in words])
        unique_word_ratio = len(set(words)) / len(words)
        return avg_word_length * unique_word_ratio
    except Exception:
        return 0

def main():
    # Explicitly start Phoenix session
    px.launch_app()

    # Prepare example prompts
    prompts = [
        "Tell me a short story about adventure",
        "Describe a peaceful mountain landscape",
        "Write a brief poem about friendship"
    ]

    # Lists to collect data for DataFrame
    generations = []

    # Generate texts and collect data
    for prompt in prompts:
        response = generate_text(prompt)
        
        # Collect generation data
        generation_data = {
            'prompt': prompt,
            'response': response,
            'response_length': len(response),
            'word_count': len(response.split()),
            'complexity_score': calculate_complexity(response)
        }
        generations.append(generation_data)

    # Create DataFrame for tracking
    generations_df = pd.DataFrame(generations)

    # Print out generations for verification
    print("\nGeneration Results:")
    print(generations_df)

    # Use Phoenix's ingest method instead of log_dataframe
    px.collect_llm_generations(
        dataframe=generations_df, 
        prompt_column='prompt', 
        response_column='response'
    )

if __name__ == "__main__":
    main()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Generation Results:
                                   prompt  \
0   Tell me a short story about adventure   
1  Describe a peaceful mountain landscape   
2     Write a brief poem about friendship   

                                            response  response_length  \
0  Tell me a short story about adventure. I just ...              487   
1  Describe a peaceful mountain landscape, especi...              447   
2  Write a brief poem about friendship without be...              478   

   word_count  complexity_score  
0          90          2.849877  
1          81          3.226642  
2          91          2.796764  


AttributeError: module 'phoenix' has no attribute 'collect_llm_generations'

In [12]:
import os
import phoenix as px
import pandas as pd
import numpy as np
import torch
from transformers import pipeline

def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with GPU support
    """
    try:
        # Use GPU if available
        device = 0 if torch.cuda.is_available() else -1
        
        # Generate text with explicit truncation
        generator = pipeline(
            'text-generation', 
            model='gpt2', 
            device=device,
            truncation=True
        )
        
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True,
            truncation=True
        )
        
        generated_text = result[0]['generated_text'].strip()
        return generated_text
    
    except Exception as e:
        print(f"Text generation error: {e}")
        return f"Generation error: {str(e)}"

def calculate_complexity(text):
    """Calculate text complexity based on word length and sentence structure"""
    try:
        words = text.split()
        if not words:
            return 0
        
        avg_word_length = np.mean([len(word) for word in words])
        unique_word_ratio = len(set(words)) / len(words)
        return avg_word_length * unique_word_ratio
    except Exception:
        return 0

def main():
    # Explicitly start Phoenix session
    session = px.launch_app()

    # Prepare example prompts
    prompts = [
        "Tell me a short story about adventure",
        "Describe a peaceful mountain landscape",
        "Write a brief poem about friendship"
    ]

    # Lists to collect data for DataFrame
    generations = []

    # Generate texts and collect data
    for prompt in prompts:
        response = generate_text(prompt)
        
        # Collect generation data
        generation_data = {
            'prompt': prompt,
            'response': response,
            'response_length': len(response),
            'word_count': len(response.split()),
            'complexity_score': calculate_complexity(response)
        }
        generations.append(generation_data)

    # Create DataFrame for tracking
    generations_df = pd.DataFrame(generations)

    # Print out generations for verification
    print("\nGeneration Results:")
    print(generations_df)

    # Use Phoenix's data tracking
    predictions = px.Predictions(
        dataframe=generations_df, 
        input_column='prompt', 
        output_column='response'
    )

    # Explicitly collect the predictions
    px.save(predictions)

    # Keep the session open
    session.wait()

if __name__ == "__main__":
    main()

Existing running Phoenix instance detected! Shutting it down and starting a new instance...


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



Generation Results:
                                   prompt  \
0   Tell me a short story about adventure   
1  Describe a peaceful mountain landscape   
2     Write a brief poem about friendship   

                                            response  response_length  \
0  Tell me a short story about adventure (with a ...              422   
1  Describe a peaceful mountain landscape in Indi...              527   
2  Write a brief poem about friendship and an ide...               63   

   word_count  complexity_score  
0          84          3.065760  
1          87          3.537984  
2          12          4.333333  


AttributeError: module 'phoenix' has no attribute 'Predictions'

In [10]:
import os
import phoenix as px
import pandas as pd
import torch
from transformers import pipeline

def generate_text(prompt: str, max_length: int = 100) -> str:
    """
    Generate text using Hugging Face pipeline with GPU support
    """
    try:
        # Use GPU if available
        device = 0 if torch.cuda.is_available() else -1
        
        # Generate text with explicit truncation
        generator = pipeline(
            'text-generation', 
            model='gpt2', 
            device=device
        )
        
        result = generator(
            prompt, 
            max_length=max_length, 
            num_return_sequences=1,
            do_sample=True
        )
        
        generated_text = result[0]['generated_text'].strip()
        return generated_text
    
    except Exception as e:
        print(f"Text generation error: {e}")
        return f"Generation error: {str(e)}"

def main():
    # Launch Phoenix application
    session = px.launch_app()

    # Create sample data for LLM traces
    traces = []
    prompts = [
        "Tell me a short story about adventure",
        "Describe a peaceful mountain landscape",
        "Write a brief poem about friendship"
    ]

    # Generate traces
    for prompt in prompts:
        response = generate_text(prompt)
        
        # Create a trace for each generation
        trace = px.evals(
            name="text_generation",
            kind="LLM",
            inputs={"prompt": prompt},
            outputs={"response": response}
        )
        traces.append(trace)

    # Log traces to Phoenix
    px.log_traces(traces)

    # Keep the session open
    session.wait()

if __name__ == "__main__":
    main()

Existing running Phoenix instance detected! Shutting it down and starting a new instance...


🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


TypeError: 'module' object is not callable

In [14]:
#pip install requests opentelemetry-sdk opentelemetry-exporter-otlp arize-phoenix
import requests
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.trace import set_tracer_provider
from arize.pheonix import ArizePhoenix

# Set up OpenTelemetry tracing
def setup_tracing():
    tracer_provider = TracerProvider()
    span_processor = BatchSpanProcessor(OTLPSpanExporter(endpoint="http://localhost:4318/v1/traces"))
    tracer_provider.add_span_processor(span_processor)
    set_tracer_provider(tracer_provider)
    return trace.get_tracer(__name__)

# Function that queries the model
"""
def query_ollama(prompt, model="model_name", base_url="localhost:xyz"):
    url = f"{base_url}/api/models/{model}/generate"
    headers = {"Content-Type": "application/json"}
    data = {"prompt": prompt}
    
    response = requests.post(url, json=data, timeout=10)
    response.raise_for_status()
    return response.json().get("generated_text", "")
"""

# Trace a single request and log the interaction
def trace_and_log_request(tracer, prompt, model="llama2", base_url="http://localhost:11434"):
    with tracer.start_as_current_span("ollama-query") as span:
        span.set_attribute("prompt", prompt)
        span.set_attribute("model", model)
        
        response = query_ollama(prompt, model, base_url)
        
        span.set_attribute("response", response)
        return response

# Prepare dataset and evaluate in Phoenix
def evaluate_in_phoenix(prompts, responses, dataset_path="evaluation.jsonl"):
    # Write the dataset in JSONL format
    with open(dataset_path, "w") as f:
        for prompt, response in zip(prompts, responses):
            record = {"prompt": prompt, "response": response}
            f.write(f"{json.dumps(record)}\n")
    
    # Evaluate using Phoenix
    phoenix = ArizePhoenix()
    phoenix.load(dataset_path)

# Main function
def main():
    # Setup tracing
    tracer = setup_tracing()

    # Example prompts to query the model
    prompts = [
        "Why is the sky blue?",
        "Explain quantum entanglement in simple terms.",
        "What is the capital of France?"
    ]
    
    # Trace each request and collect responses
    responses = []
    for prompt in prompts:
        try:
            response = trace_and_log_request(tracer, prompt)
            responses.append(response)
        except Exception as e:
            print(f"Error querying Ollama for prompt '{prompt}': {e}")
            responses.append("Error")
    
    # Evaluate results in Phoenix
    evaluate_in_phoenix(prompts, responses)

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'arize'