In [9]:
from anthropic import Anthropic
from anthropic import Anthropic
import json
import datetime
import pymupdf4llm
from typing import Dict, List, Any
import time
from pathlib import Path

import os 
load_dotenv()

class PaperAnalyzer:
    def __init__(self):
        api_key = os.getenv("CLAUDE_AI_API_KEY")
        if not api_key:
            raise ValueError("API key not found. Please set it in the .env file.")
        self.client = Anthropic(api_key=api_key)
        self.model = "claude-3-5-sonnet-20241022"
        self.paper_text = None
        self.execution_times = {
        "claims_analysis": 0,
        "evidence_analysis": 0,
        "conclusions_analysis": 0,
        "total_time": 0
        }


    def extract_text_from_pdf(self, filename: str) -> str:
        """Extract text from PDF file using PyMuPDF"""
        try:
            self.paper_text = pymupdf4llm.to_markdown(filename)
            return self.paper_text
        except Exception as e:
            print(f"Error extracting text from PDF: {e}")
            return ""

    def _get_claude_response(self, prompt: str) -> str:
        """Helper method to get response from Claude"""
        # time.sleep(45)  # Rate limiting
        message = self.client.messages.create(
            model=self.model,
            system="You are a helpful assistant specialized in analyzing research papers.",
            max_tokens=8192,
            messages=[
                {"role": "user", "content": prompt}
                
            ]
        )
        return message.content[0].text

    def get_claims(self, filename: str) -> Dict:
        """Extract all claims from the paper"""
        if not self.paper_text:
            text = self.extract_text_from_pdf(filename)
        else:
            text = self.paper_text
        
        if not self.paper_text:
            raise Exception("Failed to extract text from PDF")
        start_time = time.time()

        claims_prompt = f"""
        Analyze this research paper and extract ALL possible claims made by the authors.
        Paper text: {text}
        
        Your task is to identify all statements in the text that meet the following criteria for a claim:
        1. Makes a specific, testable assertion about results, methods, or contributions
        2. Represents a novel finding, improvement, or advancement
        3. Presents a clear position or conclusion

        Make sure to:
        1. Include both major and minor claims
        2. Don't miss any claims
        3. Present each claim as a separate item
        
        Return ONLY the following JSON structure:
        {{
            "claims": [
                {{
                    "claim_id": 1,
                    "claim_text": "statement of the claim",
                    "location": "section/paragraph where this claim appears",
                    "claim_type": "Nature of the claim",
                    "exact_quote": "complete verbatim text containing the claim"
                }}
            ]
        }}
        """

        response = self._get_claude_response(claims_prompt)
        self.execution_times["claims_analysis"] = time.time() - start_time

        return self._parse_json_response(response)

    def analyze_evidence(self, filename: str, claims: Dict) -> List[Dict]:
        """Find evidence for each claim"""
        if not self.paper_text:
            text = self.extract_text_from_pdf(filename)
        else:
            text = self.paper_text
        start_time = time.time()

        evidence_results = []
        
        for claim in claims['claims']:
            evidence_prompt = f"""
            Paper text: {text}
            
            For the following claim from the paper:
            "{claim['claim_text']}"
            
            Please identify relevant evidence that:
            1. Directly supports or contradicts the claim's specific assertion
            2. Is presented with experimental results, data, or concrete examples
            3. Can be traced to specific methods, results, or discussion sections
            4. Is not from the abstract or introduction

            If NO evidence is found for the given Claim, return:
            {{
                "claim_id": {claim['claim_id']},
                "evidence": [],
                "no_evidence_reason": "Explain why no evidence was found (e.g., 'Claim is unsupported', 'Claim is theoretical without empirical evidence', etc.)"
            }}
            ELSE:
            Return ONLY the following JSON structure:
            {{
                "claim_id": {claim['claim_id']},
                "evidence": [
                    {{
                        "evidence_id": 1,
                        "evidence_text": "specific experimental result/data point",
                        "evidence_type": "primary/secondary",
                        "strength": "strong/moderate/weak",
                        "limitations": "stated limitations or assumptions",
                        "location": "specific section & paragraph",
                        "exact_quote": "verbatim text from paper"
                    }}
                ]
            }}
            """

            response = self._get_claude_response(evidence_prompt)
            self.execution_times["evidence_analysis"] = time.time() - start_time

            result = self._parse_json_response(response)
            if result:
                evidence_results.append(result)
                
        return evidence_results

    def analyze_conclusions(self, filename: str, claims: Dict, evidence_results: List[Dict]) -> Dict:
        """Analyze conclusions by processing each claim-evidence pair individually"""
        if not self.paper_text:
            text = self.extract_text_from_pdf(filename)
        else:
            text = self.paper_text

        start_time = time.time()
        all_conclusions = []
        claims_list = claims.get('claims', [])

        def build_evidence_summary(claim_id):
            """Helper function to build evidence summary for a single claim"""
            claim_evidence = next((e['evidence'] for e in evidence_results if e.get('claim_id') == claim_id), [])
            evidence_text = []
            for idx, evidence in enumerate(claim_evidence, 1):
                evidence_text.append(
                    f"  Evidence {idx}:\n"
                    f"    - Text: {evidence.get('evidence_text', 'No text provided')}\n"
                    f"    - Strength: {evidence.get('strength', 'Not specified')}\n"
                    f"    - Limitations: {evidence.get('limitations', 'None specified')}\n"
                    f"    - Location: {evidence.get('location', 'Location not specified')}"
                )
            return "\n".join(evidence_text)

        # Process each claim individually
        for claim in claims_list:
            claim_id = claim.get('claim_id')
            print(f"\nAnalyzing conclusion for Claim {claim_id}...")

            # Build analysis for single claim
            single_claim_analysis = f"""
            Claim {claim_id}:
            Statement: {claim.get('claim_text', 'No text provided')}
            Location: {claim.get('location', 'Location not specified')}
            
            Evidence Summary:
            {build_evidence_summary(claim_id)}
            """

            # Create prompt for single claim
            single_conclusion_prompt = f"""
            Paper text: {text}
            
            Analyze the following claim and its supporting evidence:
            {single_claim_analysis}

            Provide a comprehensive conclusion analysis following these guidelines:

            1. Evidence Assessment:
            - Evaluate the strength and quality of ALL evidence presented
            - Consider both supporting and contradicting evidence
            - Assess the methodology and reliability of evidence

            2. Conclusion Analysis:
            - Determine what the authors concluded about this specific claim
            - Evaluate if the conclusion is justified by the evidence
            - Consider the relationship between evidence quality and conclusion strength

            3. Robustness Evaluation:
            - Assess how well the evidence supports the conclusion
            - Consider methodological strengths and weaknesses
            - Evaluate the consistency of evidence

            4. Limitations Analysis:
            - Identify specific limitations in both evidence and conclusion
            - Consider gaps in methodology or data
            - Note any potential biases or confounding factors

            Return ONLY the following JSON structure:
            {{
                "conclusions": [
                    {{
                        "claim_id": {claim_id},
                        "author_conclusion": "detailed description of authors' conclusion based on evidence",
                        "conclusion_justified": true/false,
                        "justification_explanation": "detailed explanation of why conclusion is/isn't justified",
                        "robustness_analysis": "comprehensive analysis of evidence strength and reliability",
                        "limitations": "specific limitations and caveats",
                        "location": "section/paragraph where conclusion appears",
                        "evidence_alignment": "analysis of how well evidence aligns with conclusion",
                        "confidence_level": "high/medium/low based on evidence quality"
                    }}
                ]
            }}
            """

            try:
                # Get response for this claim
                response = self._get_claude_response(single_conclusion_prompt)
                result = self._parse_json_response(response)

                if result and isinstance(result, dict) and 'conclusions' in result and result['conclusions']:
                    conclusion = result['conclusions'][0]
                    # Verify claim_id matches
                    if conclusion.get('claim_id') == claim_id:
                        all_conclusions.append(conclusion)
                    else:
                        raise ValueError(f"Mismatched claim_id in response for claim {claim_id}")
                else:
                    raise ValueError(f"Invalid response format for claim {claim_id}")

            except Exception as e:
                print(f"Error analyzing conclusion for claim {claim_id}: {str(e)}")
                # Add default conclusion on error
                all_conclusions.append({
                    "claim_id": claim_id,
                    "author_conclusion": "No conclusion available",
                    "conclusion_justified": False,
                    "justification_explanation": "Analysis not available",
                    "robustness_analysis": "No robustness analysis available",
                    "limitations": "No limitations analysis available",
                    "location": "Location not specified",
                    "evidence_alignment": "No alignment analysis available",
                    "confidence_level": "low"
                })

        self.execution_times["conclusions_analysis"] = time.time() - start_time

        return {
            "conclusions": all_conclusions,
            "analysis_metadata": {
                "total_claims_analyzed": len(claims_list),
                "claims_with_conclusions": len(all_conclusions),
                "analysis_timestamp": str(datetime.datetime.now())
            }
        }

    # def analyze_conclusions(self, filename: str, claims: Dict, evidence_results: List[Dict]) -> Dict:
    #     """Analyze conclusions considering claims and evidence"""
    #     if not self.paper_text:
    #         text = self.extract_text_from_pdf(filename)
    #     else:
    #         text = self.paper_text
 
    #     def build_evidence_summary(claim_id):
    #         claim_evidence = next((e['evidence'] for e in evidence_results if e.get('claim_id') == claim_id), [])
    #         evidence_text = []
    #         for idx, evidence in enumerate(claim_evidence, 1):
    #             evidence_text.append(
    #                 f"  Evidence {idx}:\n"
    #                 f"    - Text: {evidence.get('evidence_text', 'No text provided')}\n"
    #                 f"    - Strength: {evidence.get('strength', 'Not specified')}\n"
    #                 f"    - Limitations: {evidence.get('limitations', 'None specified')}\n"
    #                 f"    - Location: {evidence.get('location', 'Location not specified')}"
    #             )
    #         return "\n".join(evidence_text)

    #     analysis_sections = []
    #     for claim in claims.get('claims', []):
    #         claim_id = claim.get('claim_id')
    #         claim_section = (
    #             f"\nClaim {claim_id}:\n"
    #             f"Statement: {claim.get('claim_text', 'No text provided')}\n"
    #             f"Location: {claim.get('location', 'Location not specified')}\n"
    #             f"\nEvidence Summary:\n{build_evidence_summary(claim_id)}"
    #         )
    #         analysis_sections.append(claim_section)

    #     full_analysis = "\n".join(analysis_sections)

    #     conclusions_prompt = f"""
    #     Paper text: {text}
        
    #     Analyze the following claims and their supporting evidence:
    #     {full_analysis}

    #     For each claim, provide a comprehensive conclusion analysis following these guidelines:

    #     1. Evidence Assessment:
    #     - Evaluate the strength and quality of ALL evidence presented
    #     - Consider both supporting and contradicting evidence
    #     - Assess the methodology and reliability of evidence

    #     2. Conclusion Analysis:
    #     - Determine what the authors concluded about each claim
    #     - Evaluate if conclusions are justified by the evidence
    #     - Consider the relationship between evidence quality and conclusion strength

    #     3. Robustness Evaluation:
    #     - Assess how well the evidence supports the conclusions
    #     - Consider methodological strengths and weaknesses
    #     - Evaluate the consistency of evidence across different sources

    #     4. Limitations Analysis:
    #     - Identify specific limitations in both evidence and conclusions
    #     - Consider gaps in methodology or data
    #     - Note any potential biases or confounding factors

    #     Return ONLY the following JSON structure:
    #     {{
    #         "conclusions": [
    #             {{
    #                 "claim_id": number,
    #                 "author_conclusion": "detailed description of authors' conclusion based on evidence",
    #                 "conclusion_justified": true/false,
    #                 "justification_explanation": "detailed explanation of why conclusion is/isn't justified",
    #                 "robustness_analysis": "comprehensive analysis of evidence strength and reliability",
    #                 "limitations": "specific limitations and caveats",
    #                 "location": "section/paragraph where conclusion appears",
    #                 "evidence_alignment": "analysis of how well evidence aligns with conclusion",
    #                 "confidence_level": "high/medium/low based on evidence quality",
    #             }}
    #         ]
    #     }}
    #     """

    #     response = self._get_claude_response(conclusions_prompt)
    #     result = self._parse_json_response(response)

    #     if not result or not isinstance(result, dict) or 'conclusions' not in result:
    #         return {"conclusions": []}

    #     claims_ids = set(claim['claim_id'] for claim in claims.get('claims', []))
    #     all_conclusions = result.get('conclusions', [])
    #     start_time = time.time()

    #     complete_conclusions = []
    #     for claim_id in claims_ids:
    #         existing_conclusion = next(
    #             (c for c in all_conclusions if c.get('claim_id') == claim_id),
    #             None
    #         )
            
    #         if existing_conclusion:
    #             complete_conclusions.append(existing_conclusion)
    #         else:
    #             complete_conclusions.append({
    #                 "claim_id": claim_id,
    #                 "author_conclusion": "No conclusion available",
    #                 "conclusion_justified": False,
    #                 "justification_explanation": "Analysis not available",
    #                 "robustness_analysis": "No robustness analysis available",
    #                 "limitations": "No limitations analysis available",
    #                 "location": "Location not specified",
    #                 "evidence_alignment": "No alignment analysis available",
    #                 "confidence_level": "low"
    #             })
    #     self.execution_times["conclusions_analysis"] = time.time() - start_time

    #     return {
    #         "conclusions": complete_conclusions,
    #         "analysis_metadata": {
    #             "total_claims_analyzed": len(claims_ids),
    #             "claims_with_conclusions": len(all_conclusions),
    #             "analysis_timestamp": str(datetime.datetime.now())
    #         }
    #     }



    def _parse_json_response(self, response: str) -> Dict:
        """Parse JSON response and handle errors"""
        try:
            start_idx = response.find('{')
            end_idx = response.rfind('}') + 1
            if start_idx == -1 or end_idx == 0:
                raise ValueError("No JSON content found in response")
                
            json_str = response[start_idx:end_idx]
            return json.loads(json_str)
        except Exception as e:
            print(f"Error parsing response: {e}")
            print("Raw response:", response)
            return None

    def combine_results(self, claims: Dict, evidence_results: List[Dict], conclusions: Dict) -> Dict:
        """Combine all analysis results into a final structured format"""
        final_results = {
            "paper_analysis": []
        }
        
        conclusions_dict = {
            c['claim_id']: c 
            for c in conclusions.get('conclusions', [])
        } if conclusions else {}
        
        evidence_dict = {
            e['claim_id']: e.get('evidence', [])
            for e in evidence_results if isinstance(e, dict)
        }
        
        for claim in claims.get('claims', []):
            claim_id = claim['claim_id']
            conclusion = conclusions_dict.get(claim_id, {})
            evidence = evidence_dict.get(claim_id, [])
            
            analysis = {
                "claim_id": claim_id,
                "claim": claim.get('claim_text', ''),
                "claim_location": claim.get('location', 'Location not specified'),
                "evidence": evidence,
                "evidence_locations": [ev.get('location', 'Location not specified') for ev in evidence],
                "conclusion": {
                    "author_conclusion": conclusion.get('author_conclusion', 'No conclusion available'),
                    "conclusion_justified": conclusion.get('conclusion_justified', False),
                    "robustness_analysis": conclusion.get('robustness_analysis', 'No robustness analysis available'),
                    "limitations": conclusion.get('limitations', 'No limitations analysis available'),
                    "conclusion_location": conclusion.get('location', 'Location not specified')
                }
            }
            
            final_results['paper_analysis'].append(analysis)

            # Add timing information
        final_results["execution_times"] = {
            "claims_analysis_time": f"{self.execution_times['claims_analysis']:.2f} seconds",
            "evidence_analysis_time": f"{self.execution_times['evidence_analysis']:.2f} seconds",
            "conclusions_analysis_time": f"{self.execution_times['conclusions_analysis']:.2f} seconds",
            "total_execution_time": f"{self.execution_times['total_time']:.2f} seconds"
        }
        
        return final_results

    def print_analysis_results(self, final_results: Dict):
        """Print the analysis results in a readable format"""
        print("\n=== Complete Paper Analysis ===\n")
        
        for analysis in final_results['paper_analysis']:
            print(f"Claim {analysis['claim_id']}:")
            print(f"Statement: {analysis['claim']}")
            print("\nEvidence:")
            for evidence in analysis['evidence']:
                print(f"- {evidence['evidence_text']}")
                print(f"  Strength: {evidence['strength']}")
                print(f"  Limitations: {evidence['limitations']}")
            
            print("\nConclusion:")
            print(f"Author's Conclusion: {analysis['conclusion']['author_conclusion']}")
            print(f"Justified by Evidence: {'Yes' if analysis['conclusion']['conclusion_justified'] else 'No'}")
            print(f"Robustness: {analysis['conclusion']['robustness_analysis']}")
            print(f"Limitations: {analysis['conclusion']['limitations']}")
            print("\n" + "-"*50 + "\n")

def results_exist(basefile_name: str, output_folder: str) -> bool:
    """Check if results already exist for the given file."""
    detailed_analysis_path = f'{output_folder}/{basefile_name}_analysis.json'
    intermediate_results_path = f'{output_folder}/{basefile_name}_intermediate.json'
    
    # Check if both detailed and intermediate results files exist
    return os.path.exists(detailed_analysis_path) and os.path.exists(intermediate_results_path)

def main():
    analyzer = PaperAnalyzer()
    
    input_folder = 'shashi_1_papers'
    output_folder = 'claude_one_by_one_shashi'
    os.makedirs(output_folder, exist_ok=True)  # Ensure the output directory exists

    pdf_files = [f for f in os.listdir(input_folder) if f.endswith('.pdf')]

    for filename in pdf_files:
        basefile_name = Path(filename).stem
        
        if results_exist(basefile_name, output_folder):
            print(f"Skipping {filename}, results already exist.")
            continue
        
        filename_with_path = f"{input_folder}/{filename}"
        try:
            print(f"Starting analysis of {filename_with_path}")
            
            # Analyze the paper
            analyzer.extract_text_from_pdf(filename_with_path)
            claims = analyzer.get_claims(filename_with_path)
            evidence_results = analyzer.analyze_evidence(filename_with_path, claims)
            conclusions = analyzer.analyze_conclusions(filename_with_path, claims, evidence_results)
            final_results = analyzer.combine_results(claims, evidence_results, conclusions)

            # Save final and intermediate results
            with open(f'{output_folder}/{basefile_name}_analysis.json', 'w') as f:
                json.dump(final_results, f, indent=4)
            with open(f'{output_folder}/{basefile_name}_intermediate.json', 'w') as f:
                json.dump({
                    "claims": claims,
                    "evidence": evidence_results,
                    "conclusions": conclusions,
                    "execution_times": final_results["execution_times"]
                }, f, indent=4)

            print(f"Analysis completed successfully for {filename}")

        except Exception as e:
            print(f"Error analyzing {filename}: {e}")

if __name__ == "__main__":
    main()

Starting analysis of shashi_1_papers/2502.12568v2.pdf
Processing shashi_1_papers/2502.12568v2.pdf...

Analyzing conclusion for Claim 1...

Analyzing conclusion for Claim 2...
Error analyzing 2502.12568v2.pdf: 'evidence'
Starting analysis of shashi_1_papers/2409.15915v1.pdf
Processing shashi_1_papers/2409.15915v1.pdf...

Analyzing conclusion for Claim 1...

Analyzing conclusion for Claim 2...

Analyzing conclusion for Claim 3...

Analyzing conclusion for Claim 4...

Analyzing conclusion for Claim 5...

Analyzing conclusion for Claim 6...

Analyzing conclusion for Claim 7...
Analysis completed successfully for 2409.15915v1.pdf
Starting analysis of shashi_1_papers/2405.04215v1.pdf
Processing shashi_1_papers/2405.04215v1.pdf...

Analyzing conclusion for Claim 1...
Error parsing response: Expecting property name enclosed in double quotes: line 13 column 9 (char 1993)
Raw response: {
    "conclusions": [
        {
            "claim_id": 1,
            "author_conclusion": "The authors concl

In [10]:
from anthropic import Anthropic
import json
from pathlib import Path
import pymupdf4llm
import time
import datetime
from typing import Dict, List, Any
import os 

load_dotenv()


class SinglePassPaperAnalyzer:
    def __init__(self):

        api_key = os.getenv("CLAUDE_AI_API_KEY")
        if not api_key:
            raise ValueError("API key not found. Please set it in the .env file.")
        self.client = Anthropic(api_key=api_key)
        self.model = "claude-3-5-sonnet-20241022"
        self.paper_text = None
        self.execution_times = {
        "single_pass_analysis": 0,
        "total_time": 0
        }


        
    def extract_text_from_pdf(self, filename: str) -> str:
        """Extract text from PDF file using PyMuPDF"""
        try:
            self.paper_text = pymupdf4llm.to_markdown(filename)
            return self.paper_text
        except Exception as e:
            print(f"Error extracting text from PDF: {e}")
            return ""

    def analyze_paper(self, filename):
        """Perform comprehensive single-pass analysis of the paper"""
        if not self.paper_text:
            text = self.extract_text_from_pdf(filename)
        else:
            text = self.paper_text
            
        if not text:
            raise Exception("Failed to extract text from PDF")
        start_time = time.time()

        comprehensive_prompt = f"""
        Analyze this research paper and provide a comprehensive evaluation.
        Paper text: {text}

        Follow these guidelines:

        1. Identify ALL claims in the paper where each claim:
           - Makes a specific, verifiable assertion
           - Is supported by concrete evidence
           - Represents findings, contributions, or methodological advantages
           - Can be from any section except abstract

        2. For each identified claim:
           - Extract ALL supporting or contradicting evidence (experimental results, data, or methodology)
           - Evaluate the evidence strength and limitations
           - Assess how well conclusions align with evidence

        Return ONLY the following JSON structure:
        {{
            "analysis": [
                {{
                    "claim_id": number,
                    "claim": {{
                        "text": "statement of the claim",
                        "type": "methodology/result/contribution/performance",
                        "location": "section/paragraph",
                        "exact_quote": "verbatim text from paper"
                    }},
                    "evidence": [
                        {{
                            "evidence_text": "specific experimental result/data",
                            "strength": "strong/moderate/weak",
                            "limitations": "specific limitations",
                            "location": "section/paragraph",
                            "exact_quote": "verbatim text from paper"
                        }}
                    ],
                    "evaluation": {{
                        "conclusion_justified": true/false,
                        "robustness": "high/medium/low",
                        "justification": "explanation of evidence-conclusion alignment",
                        "key_limitations": "critical limitations affecting validity",
                        "confidence_level": "high/medium/low"
                    }}
                }}
            ]
        }}

        Ensure:
        - ALL substantive claims are captured
        - Evaluations are objective and well-reasoned
        - All locations and quotes are precise
        - Multiple pieces of evidence per claim are included when present
        """
        
        # Add rate limiting
        # time.sleep(45)
        
        # Get response from Claude
        response = self.client.messages.create(
            model=self.model,
            system="You are a helpful assistant specialized in analyzing research papers.",
            max_tokens=8192,
            messages=[
                {"role": "user", "content": comprehensive_prompt}
            ]
        )
        self.execution_times["single_pass_analysis"] = time.time() - start_time

        return self._parse_json_response(response.content[0].text)

    def _parse_json_response(self, response: str) -> Dict:
        """Parse JSON response and handle errors"""
        try:
            start_idx = response.find('{')
            end_idx = response.rfind('}') + 1
            if start_idx == -1 or end_idx == 0:
                raise ValueError("No JSON content found in response")
            json_str = response[start_idx:end_idx]
            return json.loads(json_str)
        except Exception as e:
            print(f"Error parsing response: {e}")
            print("Raw response:", response)
            return None

    def combine_results(self, analysis_results: Dict) -> tuple:
        """Restructure the single-pass analysis results into the desired format"""
        claims = {
            "claims": [
                {
                    "claim_id": item["claim_id"],
                    "claim_text": item["claim"]["text"],
                    "location": item["claim"]["location"],
                    "claim_type": item["claim"]["type"],
                    "exact_quote": item["claim"]["exact_quote"]
                }
                for item in analysis_results["analysis"]
            ]
        }
        
        evidence_results = [
            {
                "claim_id": item["claim_id"],
                "evidence": [
                    {
                        "evidence_id": idx + 1,
                        "evidence_text": ev["evidence_text"],
                        "evidence_type": "primary",
                        "strength": ev["strength"],
                        "limitations": ev["limitations"],
                        "location": ev["location"],
                        "exact_quote": ev["exact_quote"]
                    }
                    for idx, ev in enumerate(item["evidence"])
                ]
            }
            for item in analysis_results["analysis"]
        ]
        
        conclusions = {
            "conclusions": [
                {
                    "claim_id": item["claim_id"],
                    "author_conclusion": item["evaluation"]["justification"],
                    "conclusion_justified": item["evaluation"]["conclusion_justified"],
                    "robustness_analysis": item["evaluation"]["robustness"],
                    "limitations": item["evaluation"]["key_limitations"],
                    "evidence_alignment": item["evaluation"]["justification"],
                    "confidence_level": item["evaluation"]["confidence_level"]
                }
                for item in analysis_results["analysis"]
            ],
            "analysis_metadata": {
                "total_claims_analyzed": len(analysis_results["analysis"]),
                "claims_with_conclusions": len(analysis_results["analysis"]),
                "analysis_timestamp": str(datetime.datetime.now())
            }
        }
        
        final_results = {
            "paper_analysis": []
        }
        
        for item in analysis_results["analysis"]:
            claim_id = item["claim_id"]
            analysis = {
                "claim_id": claim_id,
                "claim": item["claim"]["text"],
                "claim_location": item["claim"]["location"],
                "evidence": item["evidence"],
                "evidence_locations": [ev["location"] for ev in item["evidence"]],
                "conclusion": {
                    "author_conclusion": item["evaluation"]["justification"],
                    "conclusion_justified": item["evaluation"]["conclusion_justified"],
                    "robustness_analysis": item["evaluation"]["robustness"],
                    "limitations": item["evaluation"]["key_limitations"],
                    "conclusion_location": item["claim"]["location"]
                }
            }
            final_results["paper_analysis"].append(analysis)
        final_results["execution_times"] = {
        "single_pass_analysis_time": f"{self.execution_times['single_pass_analysis']:.2f} seconds",
        "total_execution_time": f"{self.execution_times['total_time']:.2f} seconds"
        }

        
        return claims, evidence_results, conclusions, final_results

    def print_analysis_results(self, final_results: Dict):
        """Print the analysis results in a readable format"""
        print("\n=== Complete Paper Analysis ===\n")
        
        for analysis in final_results['paper_analysis']:
            print(f"Claim {analysis['claim_id']}:")
            print(f"Statement: {analysis['claim']}")
            print("\nEvidence:")
            for evidence in analysis['evidence']:
                print(f"- {evidence['evidence_text']}")
                print(f"  Strength: {evidence['strength']}")
                print(f"  Limitations: {evidence['limitations']}")
            
            print("\nConclusion:")
            print(f"Author's Conclusion: {analysis['conclusion']['author_conclusion']}")
            print(f"Justified by Evidence: {'Yes' if analysis['conclusion']['conclusion_justified'] else 'No'}")
            print(f"Robustness: {analysis['conclusion']['robustness_analysis']}")
            print(f"Limitations: {analysis['conclusion']['limitations']}")
            print("\n" + "-"*50 + "\n")

    def save_results(self, results: Dict, base_filename: str):
        """Save analysis results to files"""
        output_dir = Path('claude_all_at_once_shashi')
        output_dir.mkdir(exist_ok=True)
        


        results["execution_times"] = {
        "single_pass_analysis_time": f"{self.execution_times['single_pass_analysis']:.2f} seconds",
        "total_execution_time": f"{self.execution_times['total_time']:.2f} seconds"
        
        }
        # Save full JSON results
        json_path = output_dir / f'{base_filename}_analysis.json'
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=4)
        
        # Save readable text summary
        text_path = output_dir / f'{base_filename}_summary.txt'
        with open(text_path, 'w', encoding='utf-8') as f:
            for analysis in results['analysis']:
                f.write(f"Claim {analysis['claim_id']}:\n")
                f.write(f"Type: {analysis['claim']['type']}\n")
                f.write(f"Statement: {analysis['claim']['text']}\n")
                f.write(f"Location: {analysis['claim']['location']}\n")
                f.write(f"Exact Quote: {analysis['claim']['exact_quote']}\n\n")
                
                f.write("Evidence:\n")
                for evidence in analysis['evidence']:
                    f.write(f"- Evidence Text: {evidence['evidence_text']}\n")
                    f.write(f"  Strength: {evidence['strength']}\n")
                    f.write(f"  Location: {evidence['location']}\n")
                    f.write(f"  Limitations: {evidence['limitations']}\n")
                    f.write(f"  Exact Quote: {evidence['exact_quote']}\n\n")
                
                eval_data = analysis['evaluation']
                f.write("Evaluation:\n")
                f.write(f"Conclusion Justified: {'Yes' if eval_data['conclusion_justified'] else 'No'}\n")
                f.write(f"Robustness: {eval_data['robustness']}\n")
                f.write(f"Confidence Level: {eval_data['confidence_level']}\n")
                f.write(f"Justification: {eval_data['justification']}\n")
                f.write(f"Key Limitations: {eval_data['key_limitations']}\n")
                
                f.write("\n" + "-"*50 + "\n\n")
        
        # Generate summary statistics
        stats_path = output_dir / f'{base_filename}_statistics.txt'
        with open(stats_path, 'w', encoding='utf-8') as f:
            total_claims = len(results['analysis'])
            justified_claims = sum(1 for a in results['analysis'] 
                                 if a['evaluation']['conclusion_justified'])
            
            f.write("Analysis Statistics:\n")
            f.write(f"Total Claims Analyzed: {total_claims}\n")
            f.write(f"Justified Claims: {justified_claims}\n")
            
            # Evidence strength distribution
            strength_levels = {}
            for analysis in results['analysis']:
                for evidence in analysis['evidence']:
                    strength = evidence['strength']
                    strength_levels[strength] = strength_levels.get(strength, 0) + 1
            
            f.write("\nEvidence Strength Distribution:\n")
            total_evidence = sum(strength_levels.values())
            for strength, count in strength_levels.items():
                f.write(f"{strength}: {count} pieces ({count/total_evidence*100:.1f}%)\n")

def main():
    # Initialize analyzer
    analyzer = SinglePassPaperAnalyzer()
    
    # Analyze paper
    # filename = "Ax_Hao_Hang_2.pdf"

    input_folder = 'shashi_1_papers'

    pdf_files = [f for f in os.listdir(input_folder) if f.endswith('.pdf')]

    for filename in pdf_files:
        basefile_name = Path(filename).stem
        try:
            filename = f"{input_folder}/{filename}"
            total_start_time = time.time()

            # Extract text from PDF
            print("Extracting text from PDF...")
            analyzer.extract_text_from_pdf(filename)
            
            # Perform single-pass analysis
            print("Analyzing paper...")
            analysis_results = analyzer.analyze_paper(filename)
            analyzer.execution_times["total_time"] = time.time() - total_start_time

            # Restructure results into desired format
            claims, evidence_results, conclusions, final_results = analyzer.combine_results(analysis_results)
            
            # Print results
            analyzer.print_analysis_results(final_results)
            
            # Save detailed results
            analyzer.save_results(analysis_results, basefile_name)
            
        except Exception as e:
            print(f"Error analyzing paper: {str(e)}")
    # try:

    #     total_start_time = time.time()

    #     # Extract text once at the beginning
    #     print("Extracting text from PDF...")
    #     analyzer.extract_text_from_pdf(filename)
        
    #     # Perform single-pass analysis
    #     print("Analyzing paper...")
    #     analysis_results = analyzer.analyze_paper(filename)

    #     analyzer.execution_times["total_time"] = time.time() - total_start_time

        
    #     # Restructure results into desired format
    #     claims, evidence_results, conclusions, final_results = analyzer.combine_results(analysis_results)
        
    #     # Print results
    #     analyzer.print_analysis_results(final_results)
        
    #     # Save detailed results
    #     # with open('detailed_analysis_results.json', 'w') as f:
    #     #     json.dump(final_results, f, indent=4)
    #     # print("Results saved to 'detailed_analysis_results.json'")
        
    #     # Save intermediate results
    #     intermediate_results = {
    #         "claims": claims,
    #         "evidence": evidence_results,
    #         "conclusions": conclusions,
    #         "execution_times": final_results["execution_times"]

    #     }
    #     # with open('intermediate_results.json', 'w') as f:
    #     #     json.dump(intermediate_results, f, indent=4)
    #     # print("Intermediate results saved to 'intermediate_results.json'")
        
    #     # Save additional analysis outputs
    #     base_filename = Path(filename).stem
    #     analyzer.save_results(analysis_results, base_filename)
            
    # except Exception as e:
    #     print(f"Error analyzing paper: {str(e)}")

if __name__ == "__main__":
    main()

Extracting text from PDF...
Processing shashi_1_papers/2502.12568v2.pdf...
Analyzing paper...

=== Complete Paper Analysis ===

Claim 1:
Statement: CogWriter surpasses GPT-4o by 22% in complex instruction completion accuracy while generating texts exceeding 10,000 words

Evidence:
- When using Qwen-2.5-14B as backbone, achieved higher accuracy than GPT-4o
  Strength: strong
  Limitations: Specific breakdown of the 22% improvement across different metrics not fully detailed

Conclusion:
Author's Conclusion: Results tables show clear performance improvements, though the exact 22% figure could be better detailed
Justified by Evidence: Yes
Robustness: medium
Limitations: Limited discussion of statistical significance; performance variability not fully addressed

--------------------------------------------------

Claim 2:
Statement: CogWriter reduces generation time by approximately 50% compared to baseline model

Evidence:
- Experimental comparison using LLaMA-3.3-70B on 4 NVIDIA A100 GPU

In [11]:
from anthropic import Anthropic
import json
import datetime
import pymupdf4llm
import time
from pathlib import Path
import os
import traceback
from typing import Dict, List, Any


load_dotenv()

class PaperAnalyzer:
    def __init__(self):


        api_key = os.getenv("CLAUDE_AI_API_KEY")
        if not api_key:
            raise ValueError("API key not found. Please set it in the .env file.")
        self.client = Anthropic()
        self.model = "claude-3-5-sonnet-20241022"
        self.paper_text = None


        self.execution_times = {
        "claims_analysis": 0,
        "evidence_analysis": 0,
        "conclusions_analysis": 0,
        "total_time": 0
       }
    def extract_text_from_pdf(self, filename: str) -> str:
        """Extract text from PDF file using PyMuPDF"""
        try:
            self.paper_text = pymupdf4llm.to_markdown(filename)
            return self.paper_text
        except Exception as e:
            print(f"Error extracting text from PDF: {e}")
            return ""

    def get_all_claims(self, filename: str) -> Dict:
        """Get all claims in one pass"""
        try:
            if not self.paper_text:
                text = self.extract_text_from_pdf(filename)
            else:
                text = self.paper_text

            print(f"Processing file: {filename}")
            start_time = time.time()

            claims_prompt = f"""
            paper text: {text}
            task is to identify all statements in the text that meet the following criteria for a claim:
            1. Makes a specific, testable assertion about results, methods, or contributions
            2. Represents a novel finding, improvement, or advancement
            3. Presents a clear position or conclusion

            Make sure to:
            1. Include both major and minor claims
            2. Don't miss any claims
            3. Present each claim as a separate item
            
            Return ONLY the following JSON structure:
            {{
                "claims": [
                    {{
                        "claim_id": 1,
                        "claim_text": "statement of the claim",
                        "location": "section/paragraph where this claim appears",
                        "claim_type": "Nature of the claim",
                        "exact_quote": "complete verbatim text containing the claim"
                    }}
                ]
            }}
            """


            
            # time.sleep(45)  # Rate limiting
            response = self.client.messages.create(
                model=self.model,
                system="You are a helpful assistant specialized in analyzing research papers.",
                max_tokens=8192,
                messages=[
                    {"role": "user", "content": claims_prompt}
                ]
            )
            
            result = self._parse_json_response(response.content[0].text)
            self.execution_times["claims_analysis"] = time.time() - start_time

            print("Claims extraction completed")
            return result
        except Exception as e:
            print(f"Error in get_all_claims: {str(e)}")
            raise

    def get_all_evidence(self, filename: str, claims: Dict) -> Dict:
        """Get evidence for all claims in one pass"""
        try:
            start_time = time.time()

            if not self.paper_text:
                text = self.extract_text_from_pdf(filename)
            else:
                text = self.paper_text
            
            claims_text = "\n".join([f"Claim {c['claim_id']}: {c['claim_text']}" 
                                   for c in claims['claims']])
            print("Processing evidence for claims:", claims_text)
            
            evidence_prompt = f"""
            Paper text: {text}

            For these claims:
            {claims_text}

             Please identify relevant evidence that:
            1. Directly supports or contradicts the claim's specific assertion
            2. Is presented with experimental results, data, or concrete examples
            3. Can be traced to specific methods, results, or discussion sections
            4. Is not from the abstract or introduction

            Return ONLY the following JSON:
            {{
                "evidence_sets": [
                    {{
                        "claim_id": number,
                        "evidence": [
                            {{
                                "evidence_id": number,
                                "evidence_text": "specific evidence",
                                "strength": "strong/moderate/weak",
                                "limitations": "key limitations",
                                "location": "section/paragraph",
                                "exact_quote": "verbatim text"
                            }}
                        ]
                    }}
                ]
            }}
            """

            
            
            # time.sleep(45)  # Rate limiting
            response = self.client.messages.create(
                model=self.model,
                system="You are a helpful assistant specialized in analyzing research papers.",
                max_tokens=8192,
                messages=[
                    {"role": "user", "content": evidence_prompt}
                ]
            )
            
            result = self._parse_json_response(response.content[0].text)
            self.execution_times["evidence_analysis"] = time.time() - start_time

            print("Evidence extraction completed")
            return result
        except Exception as e:
            print(f"Error in get_all_evidence: {str(e)}")
            raise


    def get_all_conclusions(self, filename: str, claims: Dict, evidence_sets: Dict) -> Dict:
            """Analyze conclusions for all claims and evidence in one pass"""
            try:
                if not self.paper_text:
                    text = self.extract_text_from_pdf(filename)
                else:
                    text = self.paper_text
                start_time = time.time()
                # Create summary of claims and evidence for the prompt
                analysis_summary = []
                for claim in claims['claims']:
                    claim_id = claim['claim_id']
                    claim_evidence = next((e['evidence'] for e in evidence_sets['evidence_sets'] 
                                        if e['claim_id'] == claim_id), [])
                    
                    summary = f"\nClaim {claim_id}: {claim['claim_text']}\n"
                    summary += "Evidence:\n"
                    for evidence in claim_evidence:
                        summary += f"- {evidence['evidence_text']}\n"
                    analysis_summary.append(summary)
                
                analysis_text = "\n".join(analysis_summary)
                
                conclusions_prompt = f"""
                Paper text: {text}

                Analyze these claims and their evidence:
                {analysis_text}

                For each claim-evidence pair, evaluate:
                1. Whether the evidence justifies the claim
                2. The overall strength of support
                3. Any important limitations

        
                Return ONLY the following JSON:
                {{
                    "conclusions": [
                        {{
                            "claim_id": number,
                            "conclusion_justified": true/false,
                            "robustness": "high/medium/low",
                            "key_limitations": "specific limitations",
                            "confidence_level": "high/medium/low"
                        }}
                    ]
                }}
                """
                
                # time.sleep(45)  # Rate limiting
                response = self.client.messages.create(
                    model=self.model,
                    system="You are a helpful assistant specialized in analyzing research papers.",
                    max_tokens=8192,
                    messages=[
                        {"role": "user", "content": conclusions_prompt}
                    ]
                )
                
                result = self._parse_json_response(response.content[0].text)
                self.execution_times["conclusions_analysis"] = time.time() - start_time
                print("Conclusions analysis completed")
                return result
                
            except Exception as e:
                print(f"Error in get_all_conclusions: {str(e)}")
                raise

    def _parse_json_response(self, response: str) -> Dict:
        """Parse JSON response with better error handling"""
        try:
            print("Parsing response...")
            print("Raw response:", response)
            
            start_idx = response.find('{')
            end_idx = response.rfind('}') + 1
            
            if start_idx == -1 or end_idx == 0:
                raise ValueError("No JSON content found in response")
                
            json_str = response[start_idx:end_idx]
            result = json.loads(json_str)
            
            print("Successfully parsed JSON response")
            return result
            
        except Exception as e:
            print(f"Error parsing response: {str(e)}")
            print("Raw response:", response)
            raise

    def analyze_paper(self, filename: str) -> Dict:
        """Complete paper analysis using three-prompt approach"""
        try:
            total_start_time = time.time()

            # Extract text once at the beginning
            print("Extracting text from PDF...")
            self.extract_text_from_pdf(filename)

            # Get all claims
            print("Extracting claims...")
            claims = self.get_all_claims(filename)
            if not claims:
                raise Exception("Failed to extract claims")

            # Get evidence for all claims
            print("Extracting evidence...")
            evidence_sets = self.get_all_evidence(filename, claims)
            if not evidence_sets:
                raise Exception("Failed to extract evidence")

            # Get conclusions for all claim-evidence pairs
            print("Analyzing conclusions...")
            conclusions = self.get_all_conclusions(filename, claims, evidence_sets)
            if not conclusions:
                raise Exception("Failed to generate conclusions")


            self.execution_times["total_time"] = time.time() - total_start_time

            # Structure final results
            final_results = {
                "paper_analysis": [] }
        
            


            for claim in claims['claims']:
                claim_id = claim['claim_id']
                
                # Get evidence for this claim
                evidence = next((e['evidence'] for e in evidence_sets['evidence_sets'] 
                            if e['claim_id'] == claim_id), [])
                
                # Get conclusion for this claim
                conclusion = next((c for c in conclusions['conclusions'] 
                                if c['claim_id'] == claim_id), {})

                analysis_item = {
                    "claim_id": claim_id,
                    "claim": {
                        "text": claim['claim_text'],
                        "location": claim['location'],
                        "type": claim['claim_type'],
                        "exact_quote": claim['exact_quote']
                    },
                    "evidence": evidence,
                    "conclusion": {
                        "conclusion_justified": conclusion.get('conclusion_justified', False),
                        "robustness": conclusion.get('robustness', 'Not evaluated'),
                        "limitations": conclusion.get('key_limitations', 'Not specified'),
                        "confidence_level": conclusion.get('confidence_level', 'low')
                    }
                }
                
                final_results['paper_analysis'].append(analysis_item)
            final_results["execution_times"] = {
            "claims_analysis_time": f"{self.execution_times['claims_analysis']:.2f} seconds",
            "evidence_analysis_time": f"{self.execution_times['evidence_analysis']:.2f} seconds",
            "conclusions_analysis_time": f"{self.execution_times['conclusions_analysis']:.2f} seconds",
            "total_execution_time": f"{self.execution_times['total_time']:.2f} seconds"
                }

            return final_results

        except Exception as e:
            print(f"Error in paper analysis: {str(e)}")
            return None

    def save_results(self, results: Dict, filename: str):
        """Save analysis results in multiple formats"""
        try:
            base_filename = Path(filename).stem
            
            # Create output directory
            os.makedirs('claude_3_prompts_shashi', exist_ok=True)
            output_dir = "claude_3_prompts_shashi"
            # Save detailed JSON results
            json_filename = f'{output_dir}/{base_filename}_analysis.json'
            with open(json_filename, 'w', encoding='utf-8') as f:
                json.dump(results, f, indent=4)

            # Save human-readable summary
            summary_filename = f'{output_dir}/{base_filename}_summary.txt'
            with open(summary_filename, 'w', encoding='utf-8') as f:
                f.write("=== Paper Analysis Summary ===\n\n")
                
                for analysis in results['paper_analysis']:
                    f.write(f"Claim {analysis['claim_id']}:\n")
                    f.write(f"Statement: {analysis['claim']['text']}\n")
                    f.write(f"Location: {analysis['claim']['location']}\n")
                    f.write(f"Type: {analysis['claim']['type']}\n")
                    f.write(f"Quote: {analysis['claim']['exact_quote']}\n\n")
                    
                    f.write("Evidence:\n")
                    for evidence in analysis['evidence']:
                        f.write(f"- {evidence['evidence_text']}\n")
                        f.write(f"  Strength: {evidence['strength']}\n")
                        f.write(f"  Location: {evidence['location']}\n")
                        f.write(f"  Limitations: {evidence['limitations']}\n")
                        f.write(f"  Quote: {evidence['exact_quote']}\n\n")
                    
                    f.write("Conclusion:\n")
                    f.write(f"Justified: {analysis['conclusion']['conclusion_justified']}\n")
                    f.write(f"Robustness: {analysis['conclusion']['robustness']}\n")
                    f.write(f"Limitations: {analysis['conclusion']['limitations']}\n")
                    f.write(f"Confidence: {analysis['conclusion']['confidence_level']}\n")
                    f.write("\n" + "="*50 + "\n\n")

            # Save statistics
            stats_filename = f'{output_dir}/{base_filename}_stats.txt'
            with open(stats_filename, 'w', encoding='utf-8') as f:
                f.write("Analysis Statistics:\n\n")
                f.write(f"Total Claims Analyzed: {len(results['paper_analysis'])}\n")
                
                # Evidence statistics
                total_evidence = sum(len(analysis['evidence']) for analysis in results['paper_analysis'])
                f.write(f"Total Evidence Pieces: {total_evidence}\n")
                
                # Confidence distribution
                confidence_levels = {}
                for analysis in results['paper_analysis']:
                    level = analysis['conclusion']['confidence_level']
                    confidence_levels[level] = confidence_levels.get(level, 0) + 1
                
                f.write("\nConfidence Level Distribution:\n")
                for level, count in confidence_levels.items():
                    f.write(f"{level}: {count} claims\n")

                f.write("\nExecution Times:\n")
                f.write(f"Claims Analysis: {self.execution_times['claims_analysis']:.2f} seconds\n")
                f.write(f"Evidence Analysis: {self.execution_times['evidence_analysis']:.2f} seconds\n")
                f.write(f"Conclusions Analysis: {self.execution_times['conclusions_analysis']:.2f} seconds\n")
                f.write(f"Total Execution Time: {self.execution_times['total_time']:.2f} seconds\n")


            print(f"Results saved to analysis_outputs/:")
            print(f"- Detailed analysis: {json_filename}")
            print(f"- Summary: {summary_filename}")
            print(f"- Statistics: {stats_filename}")


        except Exception as e:
            print(f"Error saving results: {str(e)}")

def main():



    input_folder = 'shashi_1_papers'


    pdf_files = [f for f in os.listdir(input_folder) if f.endswith('.pdf')]

    for filename in pdf_files:
        basefile_name = Path(filename).stem
        try:
            filename = f"{input_folder}/{filename}"
            total_start_time = time.time()

            # Initialize analyzer
            analyzer = PaperAnalyzer()
            
            # Analyze paper
            print(f"Starting analysis of {filename}")
            results = analyzer.analyze_paper(filename)
            
            if results:
                # Save results in structured format
                analyzer.save_results(results, filename)
                print("Analysis completed successfully")
            else:
                print("Analysis failed to produce results")
            
        except Exception as e:
            print(f"Error in main execution: {str(e)}")
            traceback.print_exc()
    # try:
    #     analyzer = PaperAnalyzer(api_key)
        
    #     filename = "Ax_Hao_Hang_2.pdf"
    #     print(f"Starting analysis of {filename}")
        
    #     # Analyze paper
    #     results = analyzer.analyze_paper(filename)
        
    #     if results:
    #         # Save results in structured format
    #         analyzer.save_results(results, filename)
    #         print("Analysis completed successfully")
    #     else:
    #         print("Analysis failed to produce results")
            
    # except Exception as e:
    #     print(f"Error in main execution: {str(e)}")
    #     traceback.print_exc()

if __name__ == "__main__":
    main()



Starting analysis of shashi_1_papers/2502.12568v2.pdf
Extracting text from PDF...
Processing shashi_1_papers/2502.12568v2.pdf...
Extracting claims...
Processing file: shashi_1_papers/2502.12568v2.pdf
Parsing response...
Raw response: {
    "claims": [
        {
            "claim_id": 1,
            "claim_text": "CogWriter surpasses GPT-4o by 22% in complex instruction completion accuracy while reliably generating texts exceeding 10,000 words",
            "location": "Abstract",
            "claim_type": "Performance improvement",
            "exact_quote": "Even when using Qwen-2.5-14B as its backbone, CogWriter surpasses GPT-4o by 22% in complex instruction completion accuracy while reliably generating texts exceeding 10,000 words."
        },
        {
            "claim_id": 2,
            "claim_text": "CogWriter achieves higher instruction completion accuracy than GPT-4o-mini while using fewer computational resources",
            "location": "Results section (Table 1)",
      