# Top Sheldon Users Analysis
## Users ranked 7-16 by conversation count

This notebook analyzes the top Sheldon users to understand:
- What questions they ask
- How Sheldon helps them
- Usage patterns and engagement

In [1]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine, text
from pymongo import MongoClient
from bson.objectid import ObjectId
from dotenv import load_dotenv
import os
from datetime import datetime
import anthropic

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 100)

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

In [2]:
# Load environment variables
load_dotenv()

# Get MySQL connection URL and modify for synchronous connection
mysql_url = os.getenv('MYSQL_DATABASE_URL')
mysql_url_sync = mysql_url.replace('aiomysql', 'pymysql')

# Create MySQL engine
engine = create_engine(mysql_url_sync)

print("MySQL connection established")

MySQL connection established


In [3]:
# Connect to MongoDB
mongo_uri = os.getenv('MONGO_DATABASE_URI')
mongo_client = MongoClient(mongo_uri)
db = mongo_client['UnifiedCare']
employees_collection = db['employees']
role_assignments_collection = db['role_assignments']

print("MongoDB connection established")

MongoDB connection established


In [4]:
# Initialize Claude API
claude_key = os.getenv('CLAUDE_KEY')
claude_client = anthropic.Anthropic(api_key=claude_key)

print("Claude API client initialized")

Claude API client initialized


## Part 1: Define User IDs (Top Users Ranked 7-16)

In [5]:
# Top Sheldon users ranked 7-16 by conversation count
target_user_ids = [
    '6564f6d5f7a6bd00139fb3ff',  # Rank 7: laiba.arshad - 51 convs
    '6734f6dde456de2bab99e5ea',  # Rank 8: katrina.dang - 50 convs
    '66983f87193aa2047421b2be',  # Rank 9: narry.hoeung - 49 convs
    '68700e24ec46de7e7f67119d',  # Rank 10: giang.mai - 48 convs
    '6578afc0f7a6bd00131b2561',  # Rank 11: valerie.deleon - 46 convs
    '5f68f04489ae140013d81f15',  # Rank 12: anna.sramek - 42 convs
    '665645883b6d8b0714b44b9d',  # Rank 13: amena.mushtaq - 40 convs
    '64a5fa93446be000136e274d',  # Rank 14: valeriia.kashyna - 36 convs
    '60c1536caee4e80013799929',  # Rank 15: jonathan.gonzalez - 35 convs
    '641deca45608f5001370ac00',  # Rank 16: sara.burr - 33 convs
]

print(f"Analyzing {len(target_user_ids)} top users")

Analyzing 10 top users


## Part 2: Get User Details

In [6]:
# Function to get user details from MongoDB
def get_user_details(user_id_str):
    try:
        user_obj_id = ObjectId(user_id_str)
        user_doc = employees_collection.find_one({"_id": user_obj_id})
        
        if user_doc:
            # Get roles from role_assignments collection
            role_docs = list(role_assignments_collection.find({"memberId": user_obj_id}))
            unique_roles = list(set([doc.get('roleType') for doc in role_docs if doc.get('roleType')]))
            roles_str = ', '.join(sorted(unique_roles)) if unique_roles else 'N/A'
            
            return {
                'User ID': user_id_str,
                'Name': f"{user_doc.get('firstName', '')} {user_doc.get('lastName', '')}".strip() or 'N/A',
                'Email': user_doc.get('email', 'N/A'),
                'Roles': roles_str,
                'Status': user_doc.get('status', 'N/A')
            }
        else:
            return {
                'User ID': user_id_str,
                'Name': 'Not Found',
                'Email': 'N/A',
                'Roles': 'N/A',
                'Status': 'N/A'
            }
    except Exception as e:
        return {
            'User ID': user_id_str,
            'Name': f'Error: {str(e)}',
            'Email': 'N/A',
            'Roles': 'N/A',
            'Status': 'N/A'
        }

# Get details for all users
print("User Information:")
print("="*80)
all_user_info = []
for user_id in target_user_ids:
    user_info = get_user_details(user_id)
    all_user_info.append(user_info)
    print(f"\nUser ID: {user_info['User ID']}")
    print(f"  Name: {user_info['Name']}")
    print(f"  Email: {user_info['Email']}")
    print(f"  Roles: {user_info['Roles']}")
    print(f"  Status: {user_info['Status']}")

# Create summary DataFrame
df_users = pd.DataFrame(all_user_info)
print("\n" + "="*80)
print("Users Summary:")
display(df_users)

User Information:

User ID: 6564f6d5f7a6bd00139fb3ff
  Name: N/A
  Email: laiba.arshad@ihealthlabs.com
  Roles: CA
  Status: ACTIVATED

User ID: 6734f6dde456de2bab99e5ea
  Name: N/A
  Email: katrina.dang@ihealthlabs.com
  Roles: CA
  Status: ACTIVATED

User ID: 66983f87193aa2047421b2be
  Name: N/A
  Email: narry.hoeung@ihealthlabs.com
  Roles: CA
  Status: ACTIVATED

User ID: 68700e24ec46de7e7f67119d
  Name: N/A
  Email: giang.mai@ihealthlabs.com
  Roles: CA
  Status: ACTIVATED

User ID: 6578afc0f7a6bd00131b2561
  Name: N/A
  Email: valerie.deleon@ihealthlabs.com
  Roles: HC
  Status: ACTIVATED

User ID: 5f68f04489ae140013d81f15
  Name: N/A
  Email: anna.sramek@ihealthlabs.com
  Roles: RD
  Status: ACTIVATED

User ID: 665645883b6d8b0714b44b9d
  Name: N/A
  Email: amena.mushtaq@ihealthlabs.com
  Roles: CA
  Status: ACTIVATED

User ID: 64a5fa93446be000136e274d
  Name: N/A
  Email: valeriia.kashyna@ihealthlabs.com
  Roles: CA
  Status: ACTIVATED

User ID: 60c1536caee4e80013799929
  Name: 

Unnamed: 0,User ID,Name,Email,Roles,Status
0,6564f6d5f7a6bd00139fb3ff,,laiba.arshad@ihealthlabs.com,CA,ACTIVATED
1,6734f6dde456de2bab99e5ea,,katrina.dang@ihealthlabs.com,CA,ACTIVATED
2,66983f87193aa2047421b2be,,narry.hoeung@ihealthlabs.com,CA,ACTIVATED
3,68700e24ec46de7e7f67119d,,giang.mai@ihealthlabs.com,CA,ACTIVATED
4,6578afc0f7a6bd00131b2561,,valerie.deleon@ihealthlabs.com,HC,ACTIVATED
5,5f68f04489ae140013d81f15,,anna.sramek@ihealthlabs.com,RD,ACTIVATED
6,665645883b6d8b0714b44b9d,,amena.mushtaq@ihealthlabs.com,CA,ACTIVATED
7,64a5fa93446be000136e274d,,valeriia.kashyna@ihealthlabs.com,CA,ACTIVATED
8,60c1536caee4e80013799929,,jonathan.gonzalez@ihealthlabs.com,RD,ACTIVATED
9,641deca45608f5001370ac00,,sara.burr@ihealthlabs.com,"CA, HC",ACTIVATED


## Part 3: Query All Conversations

In [7]:
# Query all Sheldon conversations for all target users
query = text("""
    SELECT
        id,
        user_id,
        timestamp,
        session_id,
        agent_id,
        question,
        answer,
        reaction,
        reaction_timestamp,
        comment,
        prompt_tokens,
        completion_tokens,
        total_tokens,
        llm_process_time,
        retrieval_results,
        llm_answer,
        action_type,
        patient_id
    FROM ai.chat_history
    WHERE user_id IN :user_ids
        AND action_type = 'Sheldon'
    ORDER BY user_id, timestamp ASC
""")

with engine.connect() as conn:
    df_conversations = pd.read_sql(query, conn, params={'user_ids': tuple(target_user_ids)})

print(f"\nTotal Sheldon conversations across all users: {len(df_conversations)}")
print(f"Date range: {df_conversations['timestamp'].min()} to {df_conversations['timestamp'].max()}")
print(f"Users with conversations: {df_conversations['user_id'].nunique()}")

# Per-user summary
user_summary = df_conversations.groupby('user_id').agg({
    'id': 'count',
    'session_id': 'nunique',
    'timestamp': ['min', 'max'],
    'total_tokens': 'sum',
    'reaction': lambda x: x.notna().sum()
}).round(2)

user_summary.columns = ['Total_Conversations', 'Unique_Sessions', 'First_Use', 'Last_Use', 'Total_Tokens', 'Reactions_Given']
print("\n" + "="*80)
print("Per-User Summary:")
display(user_summary)


Total Sheldon conversations across all users: 430
Date range: 2024-09-07 02:40:35 to 2025-11-04 22:11:47
Users with conversations: 10

Per-User Summary:


Unnamed: 0_level_0,Total_Conversations,Unique_Sessions,First_Use,Last_Use,Total_Tokens,Reactions_Given
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5f68f04489ae140013d81f15,42,32,2024-10-02 00:15:39,2025-10-30 18:24:19,90647.0,0
60c1536caee4e80013799929,35,28,2024-09-07 02:40:35,2025-10-03 14:37:38,64787.0,0
641deca45608f5001370ac00,33,20,2024-09-11 18:54:08,2025-08-12 23:48:03,88926.0,1
64a5fa93446be000136e274d,36,23,2024-09-12 16:42:34,2025-11-04 21:46:43,91363.0,0
6564f6d5f7a6bd00139fb3ff,51,25,2024-10-02 21:31:45,2025-11-03 17:38:51,122344.0,0
6578afc0f7a6bd00131b2561,46,36,2024-10-01 19:32:58,2025-09-10 21:39:21,81045.0,0
665645883b6d8b0714b44b9d,40,19,2024-09-09 23:43:15,2025-06-04 20:53:48,89842.0,1
66983f87193aa2047421b2be,49,16,2024-10-22 18:37:15,2025-10-13 20:37:19,181530.0,3
6734f6dde456de2bab99e5ea,50,31,2024-11-20 22:22:28,2025-06-17 16:40:21,104726.0,0
68700e24ec46de7e7f67119d,48,22,2025-08-01 14:30:17,2025-11-04 22:11:47,71149.0,0


## Part 4: Export Individual User Conversations

In [8]:
# Export conversations for each user separately
# Only save: user_id, session_id, question, answer

for user_id in target_user_ids:
    df_user = df_conversations[df_conversations['user_id'] == user_id]
    if len(df_user) == 0:
        continue
    
    # Select only the required columns
    df_export = df_user[['user_id', 'session_id', 'question', 'answer']].copy()
    
    # Get user info for filename
    user_info = get_user_details(user_id)
    email_prefix = user_info['Email'].split('@')[0] if user_info['Email'] != 'N/A' else user_id[:8]
    
    # Export to CSV
    output_file = f'top_user_{email_prefix}_{user_id[:8]}_conversations.csv'
    df_export.to_csv(output_file, index=False)
    print(f"Exported {len(df_export)} conversations for {email_prefix} to: {output_file}")

print("\n" + "="*80)
print("All user conversations exported successfully!")

Exported 51 conversations for laiba.arshad to: top_user_laiba.arshad_6564f6d5_conversations.csv
Exported 50 conversations for katrina.dang to: top_user_katrina.dang_6734f6dd_conversations.csv
Exported 49 conversations for narry.hoeung to: top_user_narry.hoeung_66983f87_conversations.csv
Exported 48 conversations for giang.mai to: top_user_giang.mai_68700e24_conversations.csv
Exported 46 conversations for valerie.deleon to: top_user_valerie.deleon_6578afc0_conversations.csv
Exported 42 conversations for anna.sramek to: top_user_anna.sramek_5f68f044_conversations.csv
Exported 40 conversations for amena.mushtaq to: top_user_amena.mushtaq_66564588_conversations.csv
Exported 36 conversations for valeriia.kashyna to: top_user_valeriia.kashyna_64a5fa93_conversations.csv
Exported 35 conversations for jonathan.gonzalez to: top_user_jonathan.gonzalez_60c1536c_conversations.csv
Exported 33 conversations for sara.burr to: top_user_sara.burr_641deca4_conversations.csv

All user conversations export

## Part 5: AI Analysis with Claude

In [13]:
# Function to analyze user conversations with Claude
def analyze_user_conversations(user_id, df_user_convs):
    """
    Use Claude API to analyze a user's Sheldon conversations
    """
    # Prepare conversation summary
    conv_text = f"User ID: {user_id}\n"
    conv_text += f"Total Conversations: {len(df_user_convs)}\n\n"
    conv_text += "Conversations:\n"
    conv_text += "="*80 + "\n\n"
    
    for idx, row in df_user_convs.iterrows():
        conv_text += f"[Conversation {idx+1}]\n"
        conv_text += f"Time: {row['timestamp']}\n"
        conv_text += f"Session: {row['session_id']}\n"
        conv_text += f"Question: {row['question']}\n"
        conv_text += f"Answer: {row['answer']}\n"
        conv_text += "-"*80 + "\n\n"
    
    # Create prompt for Claude
    prompt = f"""Please analyze the following Sheldon AI assistant conversations for a healthcare user.

{conv_text}

Please provide a comprehensive analysis covering:

1. **Question Patterns**: What types of questions is this user asking? (e.g., clinical questions, workflow questions, technical issues, patient care, etc.)

2. **Topic Analysis**: What are the main topics or themes across all conversations?

3. **Complexity Assessment**: Are the questions simple/routine or complex/specialized?

4. **Sheldon's Helpfulness**: Based on the answers provided, does Sheldon appear to be genuinely helpful to this user? Provide specific examples.

5. **User Engagement**: Does the user seem satisfied based on conversation patterns? (e.g., follow-up questions, diverse topics, repeated usage)

6. **Key Insights**: Any notable patterns, concerns, or recommendations?

Please provide a structured analysis with specific examples from the conversations."""

    try:
        # Call Claude API
        message = claude_client.messages.create(
            model="claude-sonnet-4-5",
            max_tokens=2000,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        return message.content[0].text
    
    except Exception as e:
        return f"Error analyzing conversations: {str(e)}"

print("Analysis function defined")

Analysis function defined


In [14]:
# Analyze each user's conversations with Claude
all_analyses = []

for user_id in target_user_ids:
    df_user = df_conversations[df_conversations['user_id'] == user_id].copy()
    
    if len(df_user) == 0:
        print(f"\nSkipping {user_id} - No conversations found")
        continue
    
    user_info = get_user_details(user_id)
    
    print(f"\n{'='*80}")
    print(f"Analyzing: {user_info['Name']} ({user_info['Email']})")
    print(f"User ID: {user_id}")
    print(f"Total Conversations: {len(df_user)}")
    print(f"{'='*80}")
    print("Calling Claude API...")
    
    # Get analysis from Claude
    analysis = analyze_user_conversations(user_id, df_user)
    
    print("\nANALYSIS:")
    print(analysis)
    print("\n")
    
    # Store analysis
    all_analyses.append({
        'user_id': user_id,
        'name': user_info['Name'],
        'email': user_info['Email'],
        'roles': user_info['Roles'],
        'total_conversations': len(df_user),
        'analysis': analysis
    })

print("\n" + "="*80)
print(f"Completed analysis for {len(all_analyses)} users")
print("="*80)


Analyzing: Error: Cannot use MongoClient after close (N/A)
User ID: 6564f6d5f7a6bd00139fb3ff
Total Conversations: 51
Calling Claude API...

ANALYSIS:
# Comprehensive Analysis of Sheldon AI Assistant Conversations

## User Profile
- **User ID**: 6564f6d5f7a6bd00139fb3ff
- **Total Conversations**: 51
- **Date Range**: October 2024 - November 2025
- **User Type**: Healthcare provider/care team member

---

## 1. Question Patterns

### Primary Question Categories:

**A. Nutritional/Dietary Questions (65%)**
- Meal planning for specific diets (South Asian, Pakistani)
- Healthy food options (pancakes, granola, breakfast foods)
- Nutritional information (calories, metabolism)

**B. Clinical/Patient Care Questions (20%)**
- Vital sign parameters (O2 levels, blood glucose readings)
- Symptom inquiries (facial swelling, metabolism signs)
- Patient monitoring (insulin resistance)

**C. Workflow/Technical Questions (10%)**
- Patient management (pinned patients, watchlist)
- System navigation
- Se

In [11]:
# Export all analyses to a CSV file
df_analyses = pd.DataFrame(all_analyses)
analysis_output_file = 'top_users_claude_analysis.csv'
df_analyses.to_csv(analysis_output_file, index=False)

print(f"Claude analyses exported to: {analysis_output_file}")
print(f"\nTotal users analyzed: {len(df_analyses)}")

# Display summary
print("\n" + "="*80)
print("Analysis Summary:")
display(df_analyses[['user_id', 'name', 'email', 'roles', 'total_conversations']])

Claude analyses exported to: top_users_claude_analysis.csv

Total users analyzed: 10

Analysis Summary:


Unnamed: 0,user_id,name,email,roles,total_conversations
0,6564f6d5f7a6bd00139fb3ff,,laiba.arshad@ihealthlabs.com,CA,51
1,6734f6dde456de2bab99e5ea,,katrina.dang@ihealthlabs.com,CA,50
2,66983f87193aa2047421b2be,,narry.hoeung@ihealthlabs.com,CA,49
3,68700e24ec46de7e7f67119d,,giang.mai@ihealthlabs.com,CA,48
4,6578afc0f7a6bd00131b2561,,valerie.deleon@ihealthlabs.com,HC,46
5,5f68f04489ae140013d81f15,,anna.sramek@ihealthlabs.com,RD,42
6,665645883b6d8b0714b44b9d,,amena.mushtaq@ihealthlabs.com,CA,40
7,64a5fa93446be000136e274d,,valeriia.kashyna@ihealthlabs.com,CA,36
8,60c1536caee4e80013799929,,jonathan.gonzalez@ihealthlabs.com,RD,35
9,641deca45608f5001370ac00,,sara.burr@ihealthlabs.com,"CA, HC",33


## Part 6: Close Connections

In [12]:
# Close connections
engine.dispose()
mongo_client.close()
print("\nConnections closed")


Connections closed
