In [24]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.prebuilt import create_react_agent
from dotenv import load_dotenv

load_dotenv()

True

In [25]:
questions = [
    "What are the different types of contraception available?",
    "How effective is the pill?",
    "Where can I get contraception near me?",
    "Are condoms 100 effective?",
    "Are condoms 100 effective?",
    "How do I use a condom correctly?",
    "What's the safest contraception method?",
    "What's the safest contraception method?",
    "What are the side effects of birth control pills?",
    "How does an IUD work?",
    "Can I get pregnant while on birth control?",
    "How effective is the pull-out method?",
    "How effective is the pull-out method?",
    "How effective is the pull-out method?",
    "Can I use emergency contraception as regular birth control?",
]

In [26]:
QUESTIONS_PROMPTS = """
You are an expert analyst specializing in Sexual and Reproductive Health and Rights (SRHR) topics. Your task is to analyze user questions from a database and organize them into a clear, structured list.

INSTRUCTIONS:
1. Review all user messages/questions provided to you
2. Remove duplicates and very similar questions
3. Organize the remaining questions into a numbered list
4. Present the questions in a clear, concise format


OUTPUT FORMAT:
Provide a numbered list of unique SRHR questions in this format:
1. [Question 1] (frequency: [10])
2. [Question 2] (frequency: [1])
3. [Question 3] (frequency: [2])
...and so on

Do not include any introductory text, explanations, or categorizations - just the numbered list of questions.
"""

In [27]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")


sentiment_agent = create_react_agent(
    llm,
    tools=[],
    prompt=QUESTIONS_PROMPTS
)


In [34]:
import re 
import json
from typing import List, Dict

def extract_json_from_response(response_text: str):
    """
    Extract and parse JSON from a text response that might contain markdown code blocks
    """
    # Try to extract JSON from markdown code blocks first
    json_code_block_pattern = r'```(?:json)?\n(.*?)\n```'
    code_block_match = re.search(json_code_block_pattern, response_text, re.DOTALL)
    
    if code_block_match:
        # Found a code block, try to parse its content as JSON
        try:
            json_str = code_block_match.group(1).strip()
            return json.loads(json_str)
        except json.JSONDecodeError:
            pass
    
    # If no code block or parsing failed, try to find any JSON-like structure
    json_pattern = r'\{.*\}'
    json_match = re.search(json_pattern, response_text, re.DOTALL)
    
    if json_match:
        try:
            json_str = json_match.group(0)
            return json.loads(json_str)
        except json.JSONDecodeError:
            pass
    
    # If all parsing attempts fail, return None
    return None



def extract_questions_with_counts(ai_response: str) -> List[Dict[str, any]]:
    """
    Extract questions and their counts from the AI response text
    """
    # Pattern to match numbered list items with count (e.g., "1. what is LGBTQ (asked 5 times)")
    pattern = r'\d+\.\s+(.*?)\s+\(asked\s+(\d+)\s+times?\)'
    
    # Find all matches
    matches = re.findall(pattern, ai_response, re.DOTALL)
    
    # Format the results as a list of dictionaries
    questions_with_counts = [
        {
            "question": question.strip(),
            "count": int(count)
        }
        for question, count in matches
    ]
    
    return questions_with_counts


In [35]:
filtered_questions = sentiment_agent.invoke({"messages": questions})

In [38]:
ai_response = filtered_questions['messages'][-1].content

In [39]:
sentiment_data = extract_questions_with_counts(ai_response)


In [40]:
print(sentiment_data)

[]
