<a href="https://colab.research.google.com/github/pastrop/kaggle/blob/master/ThinkingAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
!pip install anthropic

In [8]:
import pandas as pd
import json
#from anthropic import Anthropic
import anthropic
from typing import Dict, List, Any, Optional

In [3]:
from google.colab import userdata
#api_key_openAI = userdata.get('OpenAI')
api_key_anthropic = userdata.get('Antropic')
#api_key_gemini = userdata.get('google')

In [20]:
csv_file = 'frog_ferry.csv'

In [21]:
#Dataset to be used:
df = pd.read_csv(csv_file)
#Text cleanup
def text_input(file = 'Mejuri_texts.csv'):
  df = pd.read_csv(file)
  df_clean = df[df['Text'].apply(lambda x: isinstance(x, str))]
  texts = [item.replace("\t", " ") for item in df_clean['Text']]

  return texts

In [None]:
#Getting a corpus of texts
texts_cleaned = text_input(csv_file)
corpus = ' '.join(texts_cleaned)
test1 = ' '.join(corpus.split()[:20000])

In [22]:
#transform the dataframe into the list of dicts
df_clean = df[df['Text'].apply(lambda x: isinstance(x, str))]
records = df_clean.to_dict(orient='records')

In [23]:
records[1]

{'Text': 'We have GREATLY appreciated the addition of speed bumps and cross walks in our area by Roosevelt. More traffic calming features and accessible curbs would always be appreciated. A swing set at George park or some other fun addition to the play area there (basketball court, garden, or fenced area for off leash dogs) would be amazing! The FROG FERRY would be SO GREAT for our community, having the option to take a ferry downtown would be so fun for tourists and a great way for locals to spend the day and obviously commuters would benefit so much. I think the addition of a ferry would be ICONIC.',
 'string_Concepts': 'walk | porch',
 'number_Connection': 4,
 'number_Energy Score': 6,
 'string_Question': 'Change for the Better',
 'number_Recommend': 5.0,
 'score_Satisfaction/Feeling Score': 6,
 'Unnamed: 7': nan,
 'Unnamed: 8': nan,
 'Unnamed: 9': nan,
 'Unnamed: 10': nan}

# Thinking Agent

In [9]:
class ThinkingModule:
    """Custom module that leverages Claude's capabilities for reflective thinking."""

    def __init__(self, api_key: Optional[str] = None):
        """Initialize the thinking module with the Anthropic API client."""
        self.api_key = api_key #or os.environ.get("ANTHROPIC_API_KEY")
        self.client = anthropic.Anthropic(api_key=self.api_key)
        self.model = "claude-3-7-sonnet-20250219"  # Using Claude 3.7 Sonnet

    def analyze(self, task: str, context: str, reflection_depth: int = 1) -> Dict[str, Any]:
        """
        Perform reflective thinking using Claude.

        Args:
            task: The specific thinking task to perform
            context: Relevant context for the thinking task
            reflection_depth: How many levels of reflection to perform (1-3)

        Returns:
            Dict containing the analysis results
        """
        # Build the prompt for Claude
        prompt = f"""<thinking>
Task: {task}

Context:
{context}

Please think through this step-by-step with {reflection_depth} level(s) of reflection.
Provide your analysis in JSON format with these fields:
- reasoning: Your step-by-step reasoning process
- conclusion: A concise summary of your conclusion
- confidence: A number from 0-1 indicating your confidence
- additional_fields: Any task-specific outputs needed
</thinking>"""

        # Call Claude API
        response = self.client.messages.create(
            model=self.model,
            max_tokens=2000,
            temperature=0.2,  # Low temperature for more deterministic thinking
            system="You are an expert analytical assistant. When asked to think about a problem, you break it down methodically and provide clear, structured analysis. Your output should always be valid JSON when requested.",
            messages=[
                {"role": "user", "content": prompt}
            ]
        )

        # Extract and parse JSON response
        try:
            # Find JSON in the response content
            content = response.content[0].text

            print(f"Claude's response - analyze function: {content}")

            # Extract JSON part (assuming it's properly formatted)
            json_str = content
            if "```json" in content:
                json_str = content.split("```json")[1].split("```")[0].strip()
            elif "```" in content:
                json_str = content.split("```")[1].split("```")[0].strip()

            result = json.loads(json_str)

            print(f"Parsed JSON - analyze function: {result}")

            return result
        except Exception as e:
            # Fallback if JSON parsing fails
            return {
                "reasoning": content,
                "conclusion": "Failed to parse structured output",
                "confidence": 0.5,
                "error": str(e)
            }


class MetadataAnalysisTool:
    """Stub for the metadata analysis tool."""

    def __init__(self):
        # Initialization for metadata tool would go here
        pass

    def analyze(self, params: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Analyze reviews based on metadata parameters.

        Args:
            params: Parameters for filtering and analyzing metadata

        Returns:
            Filtered list of reviews
        """
        # This is just a stub implementation
        print(f"Metadata tool called with parameters: {params}")
        # In a real implementation, this would filter the actual reviews
        return [{"id": 1, "text": "Example filtered review", "rating": 5}]


In [15]:
class ReviewAnalysisAgent:
    """Agent that processes customer queries about review data."""

    def __init__(self, review_corpus: List[Dict], api_key: Optional[str] = None):
        """
        Initialize the review analysis agent.

        Args:
            review_corpus: Collection of customer reviews with metadata
            api_key: Anthropic API key (optional if set in environment variables)
        """
        self.review_corpus = review_corpus
        self.metadata_tool = MetadataAnalysisTool()
        self.thinking_module = ThinkingModule(api_key=api_key)

    def process_query(self, query: str) -> str:
        """
        Process a customer query and return a response.

        Args:
            query: Natural language query about the review data

        Returns:
            Response to the query based on review analysis
        """
        # Step 1: Understand the query through the thinking module
        print(f'#####################calling thinking module form the process_query step 1')


        query_analysis = self.thinking_module.analyze(
            task="Analyze the user query to extract: (1) primary information need, "
                 "(2) any filtering criteria, (3) type of analysis requested, "
                 "(4) whether numerical/metadata analysis is likely needed",
            context=f"User query: {query}",
            reflection_depth=2
        )

        # Step 2: Decide whether to use metadata tool
        print(f'#####################calling thinking module form the process_query step 2')


        tool_decision = self.thinking_module.analyze(
            task="Determine if metadata analysis is required or beneficial for this query",
            context=f"Query analysis: {query_analysis}\n"
                   f"Available tools: text corpus analysis, metadata analysis tool for numerical data",
            reflection_depth=2
        )

        # Step 3: Execute appropriate analysis
        print(f'#####################calling thinking module form the process_query step 3')

        use_metadata = tool_decision.get("conclusion", "").lower().startswith("yes") or \
                      tool_decision.get("additional_fields", {}).get("use_metadata_tool", False)

        if use_metadata:
            # Define parameters for metadata tool
            metadata_params = self.thinking_module.analyze(
                task="Determine optimal parameters for metadata tool based on the query",
                context=f"Query analysis: {query_analysis}\n"
                       f"Metadata tool capabilities: filter by ratings, aggregate statistics, etc.",
                reflection_depth=1
            )

            # Use metadata tool to get filtered set of reviews
            tool_params = metadata_params.get("additional_fields", {}).get("tool_parameters", {})
            filtered_reviews = self.metadata_tool.analyze(tool_params)
            text_analysis = self._analyze_text_corpus(filtered_reviews, query_analysis)
        else:
            # Just analyze the full text corpus
            text_analysis = self._analyze_text_corpus(self.review_corpus, query_analysis)

        # Step 4: Generate final response

        print(f'#####################calling thinking module form the process_query step 4')

        response = self.thinking_module.analyze(
            task="Synthesize findings into a comprehensive response to the user query",
            context=f"Query: {query}\n"
                   f"Analysis results: {text_analysis}\n"
                   f"Was metadata used: {'Yes' if use_metadata else 'No'}\n"
                   f"Thinking process: {query_analysis.get('reasoning', '')}\n"
                   f"Tool decision reasoning: {tool_decision.get('reasoning', '')}",
            reflection_depth=2
        )

        # Return the final response text
        return response.get("conclusion", "I couldn't generate a proper response.")

    def _analyze_text_corpus(self, reviews: List[Dict], query_analysis: Dict) -> Dict[str, Any]:
        """
        Analyze the text content of reviews.

        Args:
            reviews: List of review objects to analyze
            query_analysis: Analysis of the user query to guide text analysis

        Returns:
            Results of the text analysis
        """
        # In a real implementation, this would use NLP techniques
        # appropriate for the query type (sentiment analysis, topic modeling, etc.)

        # Stub implementation
        review_texts = [review.get("Text", "") for review in reviews]

        # Use the thinking module to analyze the reviews based on the query

        print(f'!!!!!!!!!!!!!!!!!!!!!!!!!!calling thinking module from inside the analyze_text_corpus')

        analysis_result = self.thinking_module.analyze(
            task="Analyze review texts to answer the user query",
            context=f"Query analysis: {query_analysis}\n"
                   f"Reviews to analyze: {review_texts} (showing first 5 only)",
            reflection_depth=2
        )

        return analysis_result

queries (St.John):
Where are pedestrian safety improvements needed?
What can police be doing to make the neighborhood safer?
What can city council prioritize to help St Johns?
What new businesses are needed in St Johns and where?
What issues would Frog Ferry solve?

In [18]:
len(records_stjohn[1000:5400])

4400

In [24]:
# Example usage

# Set your API key
api_key = api_key_anthropic

# Sample review corpus (in a real scenario, this would be much larger)
sample_reviews = [
    {"id": 1, "text": "Love this product! Battery life is amazing.", "rating": 5, "verified": True},
    {"id": 2, "text": "Decent product but overpriced for what you get.", "rating": 3, "verified": True},
    {"id": 3, "text": "Terrible quality, broke after one week.", "rating": 1, "verified": True},
    # In reality, you'd have thousands more reviews here
]

# Initialize the agent

agent = ReviewAnalysisAgent(review_corpus=records, api_key=api_key)

# Example queries
queries = [
    "What do customers think about the battery life?"
    #"Are verified purchasers happier with the product than non-verified ones?",
    #"What are the most common complaints in 1-star reviews?"
]

queries_stjohn = [
    #"Where are pedestrian safety improvements needed?",
    #"What can police be doing to make the neighborhood safer?",
    #"What can city council prioritize to help St Johns?",
    #"What new businesses are needed in St Johns and where?",
    "What issues would Frog Ferry solve?"
]

# Process each query
for query in queries_stjohn:
    print(f"\nQuery: {query}")
    response = agent.process_query(query)
    print(f"Response: {response}")


Query: What issues would Frog Ferry solve?
Claude's response - analyze function: I'll analyze this query step-by-step:

```json
{
  "reasoning": {
    "step1": {
      "analysis": "Let me break down the query 'What issues would Frog Ferry solve?'",
      "observations": [
        "The query is asking about 'Frog Ferry', which appears to be a specific transportation service or project",
        "The user wants to know what problems or issues this ferry service would address or solve",
        "The query is open-ended, seeking information about benefits or purposes of this service",
        "There are no explicit filtering criteria mentioned"
      ]
    },
    "step2": {
      "analysis": "Let me determine the type of information needed and analysis required",
      "observations": [
        "The primary information need is explanatory - understanding the purpose and benefits of Frog Ferry",
        "This likely requires descriptive information about transportation problems and how Fro

In [None]:
# Install tenacity for retries
!pip install tenacity

import json
import anthropic
from typing import Dict, List, Any, Optional
from tenacity import retry, stop_after_attempt, wait_exponential

class ThinkingModule:
    """Custom module that leverages Claude's capabilities for reflective thinking."""

    def __init__(self, api_key: Optional[str] = None):
        """Initialize the thinking module with the Anthropic API client."""
        self.api_key = api_key  # or os.environ.get("ANTHROPIC_API_KEY")
        self.client = anthropic.Anthropic(api_key=self.api_key)
        self.model = "claude-3-7-sonnet-20250219"  # Using Claude 3.7 Sonnet

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
    def analyze(self, task: str, context: str, reflection_depth: int = 1) -> Dict[str, Any]:
        """
        Perform reflective thinking using Claude with retries for OverloadedError.

        Args:
            task: The specific thinking task to perform
            context: Relevant context for the thinking task
            reflection_depth: How many levels of reflection to perform (1-3)

        Returns:
            Dict containing the analysis results
        """
        # Build the prompt for Claude
        prompt = f"""<thinking>
Task: {task}

Context:
{context}

Please think through this step-by-step with {reflection_depth} level(s) of reflection.
Provide your analysis in JSON format with these fields:
- reasoning: Your step-by-step reasoning process
- conclusion: A concise summary of your conclusion
- confidence: A number from 0-1 indicating your confidence
- additional_fields: Any task-specific outputs needed
</thinking>"""

        # Call Claude API
        response = self.client.messages.create(
            model=self.model,
            max_tokens=2000,
            temperature=0.2,  # Low temperature for more deterministic thinking
            system="You are an expert analytical assistant. When asked to think about a problem, you break it down methodically and provide clear, structured analysis. Your output should always be valid JSON when requested.",
            messages=[
                {"role": "user", "content": prompt}
            ]
        )

        # Extract and parse JSON response
        try:
            # Find JSON in the response content
            content = response.content[0].text

            print(f"Claude's response - analyze function: {content}")

            # Extract JSON part (assuming it's properly formatted)
            json_str = content
            if "