In [1]:
from pylangdb.client import LangDb
from utils import UserTrendsAnalyzer
import os
from dotenv import load_dotenv
from datetime import datetime

In [2]:
# Load environment variables
load_dotenv()

# Initialize client and analyzer
client = LangDb(
    api_key=os.getenv("LANGDB_API_KEY"), project_id=os.getenv("LANGDB_PROJECT_ID")
)
analyzer = UserTrendsAnalyzer(
    api_key=os.getenv("LANGDB_API_KEY"), project_id=os.getenv("LANGDB_PROJECT_ID")
)

In [8]:
# List of questions to ask
questions = [
    "What are the main differences between Python and JavaScript?",
    "How does garbage collection work in Python?",
    "Explain the concept of decorators in Python with an example.",
    "What is the Global Interpreter Lock (GIL) in Python?",
    "How do context managers work in Python and when should you use them?",
    # Additional questions for better trend analysis
    "What's the best way to handle errors in Python?",
    "How to optimize database queries in Django?",
    "Explain the concept of async/await in Python.",
    "Best practices for writing unit tests in Python?",
    "How to implement caching in a Python web application?",
]

In [3]:
thread_ids = []
for question in questions:
    messages = [
        {"role": "system", "content": "You are a helpful programming tutor."},
        {"role": "user", "content": question},
    ]
    response = client.completion(
        model="gpt-4o-mini", messages=messages, temperature=0.7, max_tokens=1000
    )
    thread_ids.append(response["thread_id"])

## Analyzing Conversation Data
Let's examine the conversation data in detail. The `create_evaluation_df` method gives us a DataFrame containing all messages in the thread, along with metadata like timestamps and costs.

In [9]:
# Fetching conversation data
df = client.create_evaluation_df(thread_ids)

In [10]:
# Cell: Cost Analysis
print("\nCost Analysis:")
print(f"Total cost across all threads: ${df['thread_total_cost'].sum():.4f}")

# Cost breakdown per thread
print("\nCost per thread:")
thread_costs = df.groupby("thread_id")["thread_total_cost"].sum()
for thread_id, cost in thread_costs.items():
    print(f"Thread {thread_id}: ${cost:.4f}")

# Messages per thread
print("\nMessages per thread:")
messages_per_thread = df.groupby("thread_id").size()
print(messages_per_thread)

# Average cost per message
avg_cost = df["thread_total_cost"].sum() / len(df)
print(f"\nAverage cost per message: ${avg_cost:.4f}")

# Save analysis with timestamp (optional)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
analysis_file = f"cost_analysis_{timestamp}.txt"

with open(analysis_file, "w") as f:
    f.write(f"=== Cost Analysis Report ({timestamp}) ===\n")
    f.write(f"Total cost: ${df['thread_total_cost'].sum():.4f}\n")
    f.write("\nCost per thread:\n")
    for thread_id, cost in thread_costs.items():
        f.write(f"Thread {thread_id}: ${cost:.4f}\n")
    f.write(f"\nAverage cost per message: ${avg_cost:.4f}\n")

print(f"\nDetailed analysis saved to {analysis_file}")


Cost Analysis:
Total cost across all threads: $0.0161

Cost per thread:
Thread 0c8e2d92-493d-4916-83b5-3a5084134cb5: $0.0011
Thread 4a5ba631-7947-4d5a-9e26-f5c51d1c7c4d: $0.0021
Thread 5a4ba778-4af0-469b-84a3-d98743324c99: $0.0015
Thread 715d0936-3ad4-4b29-be42-f47fadcb047a: $0.0019
Thread 82af12cd-cd2b-4ba4-a3d3-d1a1d216df09: $0.0015
Thread 8d78fd23-e207-495e-96c5-1f771dc92714: $0.0015
Thread e365ce82-cca9-4148-bdf3-bb0b16510511: $0.0017
Thread e81f1766-3482-434a-83be-63943bfea08c: $0.0016
Thread ead791ce-b557-4026-9f3e-00c8c46c6fba: $0.0015
Thread f2242c4c-c276-489c-9f53-12f524d78a32: $0.0018

Messages per thread:
thread_id
0c8e2d92-493d-4916-83b5-3a5084134cb5    3
4a5ba631-7947-4d5a-9e26-f5c51d1c7c4d    3
5a4ba778-4af0-469b-84a3-d98743324c99    3
715d0936-3ad4-4b29-be42-f47fadcb047a    3
82af12cd-cd2b-4ba4-a3d3-d1a1d216df09    3
8d78fd23-e207-495e-96c5-1f771dc92714    3
e365ce82-cca9-4148-bdf3-bb0b16510511    3
e81f1766-3482-434a-83be-63943bfea08c    3
ead791ce-b557-4026-9f3e-00c8c

## Trend Analysis Across Multiple Topics
Now we'll create multiple conversation threads on different Python topics and analyze trends across them. This demonstrates how to:
1. Create multiple threads programmatically
2. Use the UserTrendsAnalyzer to identify patterns
3. Extract insights from multiple conversations

In [5]:
# Analyze trends
trends = analyzer.analyze_thread_trends(thread_ids)
print("Trend Analysis Results:")
print(trends)

Trend Analysis Results:
{'analysis': '```json\n{\n  "common_topics_or_themes": [\n    "Python programming concepts",\n    "Error handling in Python",\n    "Optimizing performance in Python applications",\n    "Best practices in Python development",\n    "JavaScript comparison"\n  ],\n  "types_of_questions_being_asked": [\n    "Comparative questions",\n    "Explanatory questions",\n    "How-to questions",\n    "Best practices inquiries",\n    "Conceptual understanding requests"\n  ],\n  "user_pain_points_or_challenges": [\n    "Understanding complex Python features (e.g., decorators, GIL, async/await)",\n    "Error handling and optimization in Python applications",\n    "Performance issues related to database queries in Django",\n    "Need for best practices in unit testing and caching"\n  ],\n  "feature_requests_or_suggestions": [\n    "More detailed explanations and examples for complex concepts",\n    "Guidance on error handling strategies",\n    "Resources or tools for optimizing da

In [6]:
# Analyze topic distribution
topics = analyzer.get_topic_distribution(thread_ids)
print("\nTopic Distribution Results:")
print(topics)


Topic Distribution Results:
{'topic_distribution': '```json\n{\n    "Programming Languages": 5,\n    "Python Concepts": 6,\n    "Web Development": 2,\n    "Error Handling": 1,\n    "Testing": 1,\n    "Optimization": 1\n}\n```', 'total_messages': 10}
