# Claim once then for each claims get Evidence one by one and then for each pair get the Conclusion one by one

In [11]:
from openai import OpenAI
from openai.types.beta.threads.message_create_params import (
    Attachment,
    AttachmentToolFileSearch,
)
import os
import json
import openai
import datetime
from pathlib import Path
import time
from dotenv import load_dotenv

load_dotenv()
## openreview scrape


class PaperAnalyzer:
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)
        self.assistant = None
        self.execution_times = {
            "claims_analysis": 0,
            "evidence_analysis": 0,
            "conclusions_analysis": 0,
            "total_time": 0
        }
        # self.thread = None
        
    def create_assistant(self):
        self.assistant = self.client.beta.assistants.create(
            model="gpt-4-turbo-preview",
            description="An assistant to analyze research papers and extract claims, evidence, and conclusions.",
            tools=[{"type": "file_search"}],
            name="Research Paper Analyzer"
        )

    def get_claims(self, filename):
        """Extract all claims from the paper"""

        start_time = time.time()

        # self.thread = self.client.beta.threads.create()
        file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")

        #constraint the claim types: to work on. 
        # one-shot prompting
        
        claims_prompt = """ 
        Please analyze the research paper and extract ALL possible claims made by the authors.
        Your task is to identify all statements in the text that meet the following criteria for a claim:
        1. Makes a specific, testable assertion about results, methods, or contributions
        2. Represents a novel finding, improvement, or advancement
        3. Presents a clear position or conclusion.

        Make sure to:
        1. Include both major and minor claims
        2. Don't miss any claims
        3. Present each claim as a separate item
        
        Return ONLY the following JSON structure:
        ```{
            "claims": [
                {
                    "claim_id": 1,
                    "claim_text": "statement of the claim"
                    "location": "section/paragraph where this claim appears"
                    "claim_type: "Nature of the claim" 
                    "exact_quote": "complete verbatim text containing the claim"
                    
                }
            ]
        }```
        """
                            # "Exact_claim_text": "Exact text from the document as it is"
# 
        r = self._execute_analysis(None, file.id, claims_prompt)
        self.execution_times["claims_analysis"] = time.time() - start_time
        return r


    def analyze_evidence(self, filename, claims):
        """Find evidence for each claim"""
        start_time = time.time()

        # self.thread = self.client.beta.threads.create()
        file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
        
        evidence_results = []
        for claim in claims['claims']:
            evidence_prompt = f"""
            For the following claim from the paper:
            "{claim['claim_text']}"
            
            Please:

            For the given claim, identify relevant evidence that:
            1. Directly supports or contradicts the claim's specific assertion
            2. Is presented with experimental results, data, or concrete examples
            3. Can be traced to specific methods, results, or discussion sections
            4. Is not from the abstract or introduction

            If NO evidence is found for the given Claim, return:
            ```{{
                "claim_id": {claim['claim_id']},
                "evidence": [],
                "no_evidence_reason": "Explain why no evidence was found (e.g., 'Claim is unsupported', 'Claim is theoretical without empirical evidence', etc.)"
            }}```
                ELSE:
            Return ONLY the following JSON structure:
            ```{{
                "claim_id": {claim['claim_id']},
                "evidence": [
                    {{  
                            "evidence_id": 1,
                            "evidence_text": "specific experimental result/data point",
                            "evidence_type": "primary/secondary",
                            "strength": "strong/moderate/weak",
                            "limitations": "stated limitations or assumptions",
                            "location": "specific section & paragraph",
                            "exact_quote": "verbatim text from paper"

                    }}
                ]
            }}```
            """


                                    # "Exact_evidence_text": "Exact text from the document as it is"
            result = self._execute_analysis(None, file.id, evidence_prompt)
            if result:
                evidence_results.append(result)
        self.execution_times["evidence_analysis"] = time.time() - start_time

        return evidence_results
        
    def analyze_conclusions(self, filename, claims, evidence_results):
        """
        Analyze conclusions by processing each claim and its evidence individually
        while maintaining the same return structure.
        """
        start_time = time.time()
        file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
        
        all_conclusions = []
        claims_list = claims.get('claims', [])
        
        # Process each claim individually
        for claim in claims_list:
            claim_id = claim.get('claim_id')
            
            # Get evidence for this specific claim
            claim_evidence = next((e['evidence'] for e in evidence_results if e.get('claim_id') == claim_id), [])
            
            # Build evidence summary for this claim only
            evidence_text = []
            for idx, evidence in enumerate(claim_evidence, 1):
                evidence_text.append(
                    f"  Evidence {idx}:\n"
                    f"    - Text: {evidence.get('evidence_text', 'No text provided')}\n"
                    f"    - Strength: {evidence.get('strength', 'Not specified')}\n"
                    f"    - Limitations: {evidence.get('limitations', 'None specified')}\n"
                    f"    - Location: {evidence.get('location', 'Location not specified')}"
                )
            
            # Create analysis for single claim
            single_analysis = f"""
            Claim {claim_id}:
            Statement: {claim.get('claim_text', 'No text provided')}
            Location: {claim.get('location', 'Location not specified')}
            
            Evidence Summary:
            {{'\n'.join(evidence_text)}}
            """
            
            # Create prompt for single claim-evidence pair
            single_conclusion_prompt = f"""
            Analyze the following claim and its supporting evidence from the research paper:

            {single_analysis}

            Provide a comprehensive conclusion analysis following these guidelines:

            1. Evidence Assessment:
            - Evaluate the strength and quality of ALL evidence presented
            - Consider both supporting and contradicting evidence
            - Assess the methodology and reliability of evidence

            2. Conclusion Analysis:
            - Determine what the authors concluded about this specific claim
            - Evaluate if the conclusion is justified by the evidence
            - Consider the relationship between evidence quality and conclusion strength

            3. Robustness Evaluation:
            - Assess how well the evidence supports the conclusion
            - Consider methodological strengths and weaknesses
            - Evaluate the consistency of evidence

            4. Limitations Analysis:
            - Identify specific limitations in both evidence and conclusion
            - Consider gaps in methodology or data
            - Note any potential biases or confounding factors

            Return ONLY the following JSON structure:
            {{
                "conclusions": [
                    {{
                        "claim_id": {claim_id},
                        "author_conclusion": "detailed description of authors' conclusion based on evidence",
                        "conclusion_justified": true/false,
                        "justification_explanation": "detailed explanation of why conclusion is/isn't justified",
                        "robustness_analysis": "comprehensive analysis of evidence strength and reliability",
                        "limitations": "specific limitations and caveats",
                        "location": "section/paragraph where conclusion appears",
                        "evidence_alignment": "analysis of how well evidence aligns with conclusion",
                        "confidence_level": "high/medium/low based on evidence quality"
                    }}
                ]
            }}
            """
            
            # Execute analysis for this claim
            try:
                result = self._execute_analysis(None, file.id, single_conclusion_prompt)
                if result and isinstance(result, dict) and 'conclusions' in result:
                    conclusion = result['conclusions'][0]
                    all_conclusions.append(conclusion)
                else:
                    # Add default conclusion if analysis fails
                    all_conclusions.append({
                        "claim_id": claim_id,
                        "author_conclusion": "No conclusion available",
                        "conclusion_justified": False,
                        "justification_explanation": "Analysis not available",
                        "robustness_analysis": "No robustness analysis available",
                        "limitations": "No limitations analysis available",
                        "location": "Location not specified",
                        "evidence_alignment": "No alignment analysis available",
                        "confidence_level": "low",
                        "distance_between_claim_and_evidence": []
                    })
            except Exception as e:
                print(f"Error analyzing conclusion for claim {claim_id}: {str(e)}")
                # Add default conclusion on error
                all_conclusions.append({
                    "claim_id": claim_id,
                    "author_conclusion": "Error in analysis",
                    "conclusion_justified": False,
                    "justification_explanation": "Analysis failed",
                    "robustness_analysis": "Analysis failed",
                    "limitations": "Analysis failed",
                    "location": "Location not specified",
                    "evidence_alignment": "Analysis failed",
                    "confidence_level": "low",
                    "distance_between_claim_and_evidence": []
                })

        self.execution_times["conclusions_analysis"] = time.time() - start_time

        # Return in the same structure as before
        return {
            "conclusions": all_conclusions,
            "analysis_metadata": {
                "total_claims_analyzed": len(claims_list),
                "claims_with_conclusions": len(all_conclusions),
                "analysis_timestamp": str(datetime.datetime.now())
            }
        }

    # def analyze_conclusions(self, filename, claims, evidence_results):
    #     """
    #     Analyze final decisions and conclusions by considering both claims and their evidence
        
    #     Args:
    #         filename: PDF file to analyze
    #         claims: Dictionary containing claims data
    #         evidence_results: List of dictionaries containing evidence for each claim
        
    #     Returns:
    #         Dictionary containing structured conclusions


    #     """

    #     start_time = time.time()

    #     # self.thread = self.client.beta.threads.create()
    #     file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
        
    #     # Build comprehensive analysis summary
    #     def build_evidence_summary(claim_id):
    #         """Helper function to build evidence summary for a claim"""
    #         claim_evidence = next((e['evidence'] for e in evidence_results if e.get('claim_id') == claim_id), [])
    #         evidence_text = []
    #         for idx, evidence in enumerate(claim_evidence, 1):
    #             evidence_text.append(
    #                 f"  Evidence {idx}:\n"
    #                 f"    - Text: {evidence.get('evidence_text', 'No text provided')}\n"
    #                 f"    - Strength: {evidence.get('strength', 'Not specified')}\n"
    #                 f"    - Limitations: {evidence.get('limitations', 'None specified')}\n"
    #                 f"    - Location: {evidence.get('location', 'Location not specified')}"
    #             )
    #         return "\n".join(evidence_text)

    #     # Create comprehensive analysis summary
    #     analysis_sections = []
    #     for claim in claims.get('claims', []):
    #         claim_id = claim.get('claim_id')
    #         claim_section = (
    #             f"\nClaim {claim_id}:\n"
    #             f"Statement: {claim.get('claim_text', 'No text provided')}\n"
    #             f"Location: {claim.get('location', 'Location not specified')}\n"
    #             f"\nEvidence Summary:\n{build_evidence_summary(claim_id)}"
    #         )
    #         analysis_sections.append(claim_section)

    #     full_analysis = "\n".join(analysis_sections)

    #     # Create detailed prompt incorporating claims and evidence
    #     conclusions_prompt = f"""
    #     Analyze the following claims and their supporting evidence from the research paper:

    #     {full_analysis}

    #     For each claim, provide a comprehensive conclusion analysis following these guidelines:

    #     1. Evidence Assessment:
    #     - Evaluate the strength and quality of ALL evidence presented
    #     - Consider both supporting and contradicting evidence
    #     - Assess the methodology and reliability of evidence

    #     2. Conclusion Analysis:
    #     - Determine what the authors concluded about each claim
    #     - Evaluate if conclusions are justified by the evidence
    #     - Consider the relationship between evidence quality and conclusion strength

    #     3. Robustness Evaluation:
    #     - Assess how well the evidence supports the conclusions
    #     - Consider methodological strengths and weaknesses
    #     - Evaluate the consistency of evidence across different sources

    #     4. Limitations Analysis:
    #     - Identify specific limitations in both evidence and conclusions
    #     - Consider gaps in methodology or data
    #     - Note any potential biases or confounding factors

    #     Return ONLY the following JSON structure:
    #     {{
    #         "conclusions": [
    #             {{
    #                 "claim_id": number,
    #                 "author_conclusion": "detailed description of authors' conclusion based on evidence",
    #                 "conclusion_justified": true/false,
    #                 "justification_explanation": "detailed explanation of why conclusion is/isn't justified",
    #                 "robustness_analysis": "comprehensive analysis of evidence strength and reliability",
    #                 "limitations": "specific limitations and caveats",
    #                 "location": "section/paragraph where conclusion appears",
    #                 "evidence_alignment": "analysis of how well evidence aligns with conclusion",
    #                 "confidence_level": "high/medium/low based on evidence quality",
    #             }}
    #         ]
    #     }}
    #     """

    #     # Execute analysis
    #     result = self._execute_analysis(None, file.id, conclusions_prompt)

    #     # Validate and process results
    #     if not result or not isinstance(result, dict) or 'conclusions' not in result:
    #         print("Warning: Invalid conclusions format received")
    #         return {"conclusions": []}

    #     # Ensure complete coverage of all claims
    #     all_conclusions = result.get('conclusions', [])
    #     claims_ids = set(claim['claim_id'] for claim in claims.get('claims', []))
        
    #     # Create complete conclusions list with defaults for missing entries
    #     complete_conclusions = []
    #     for claim_id in claims_ids:
    #         existing_conclusion = next(
    #             (c for c in all_conclusions if c.get('claim_id') == claim_id),
    #             None
    #         )
            
    #         if existing_conclusion:
    #             complete_conclusions.append(existing_conclusion)
    #         else:
    #             # Default structure for missing conclusions
    #             complete_conclusions.append({
    #                 "claim_id": claim_id,
    #                 "author_conclusion": "No conclusion available",
    #                 "conclusion_justified": False,
    #                 "justification_explanation": "Analysis not available",
    #                 "robustness_analysis": "No robustness analysis available",
    #                 "limitations": "No limitations analysis available",
    #                 "location": "Location not specified",
    #                 "evidence_alignment": "No alignment analysis available",
    #                 "confidence_level": "low",
    #                 "distance_between_claim_and_evidence": []
    #             })
    #     self.execution_times["conclusions_analysis"] = time.time() - start_time


    #     return {
    #         "conclusions": complete_conclusions,
    #         "analysis_metadata": {
    #             "total_claims_analyzed": len(claims_ids),
    #             "claims_with_conclusions": len(all_conclusions),
    #             "analysis_timestamp": str(datetime.datetime.now())
    #         }
    #     }


    def _execute_analysis(self, thread_id, file_id, prompt):
        """Execute analysis with given prompt and return results"""
        # Create a new thread for each analysis
        thread = self.client.beta.threads.create()
        
        # Create message
        self.client.beta.threads.messages.create(
            thread_id=thread.id,
            role="user",
            attachments=[
                Attachment(
                    file_id=file_id,
                    tools=[AttachmentToolFileSearch(type="file_search")]
                )
            ],
            content=prompt
        )

        # Run analysis
        run = self.client.beta.threads.runs.create_and_poll(
            thread_id=thread.id,
            assistant_id=self.assistant.id,
            timeout=5000
        )

        if run.status != "completed":
            raise Exception("Analysis failed:", run.status)

        # Get messages
        messages = list(self.client.beta.threads.messages.list(thread_id=thread.id))
        print(messages)
        
        # Clean up
        try:
            self.client.beta.threads.delete(thread.id)
        except Exception as e:
            print(f"Error deleting thread: {e}")
            
        return self._parse_json_response(messages[0].content[0].text.value)

    def _parse_json_response(self, response):
        """Parse JSON response and handle errors"""
        try:
            # Look for JSON content between curly braces
            start_idx = response.find('{')
            end_idx = response.rfind('}') + 1
            if start_idx == -1 or end_idx == 0:
                raise ValueError("No JSON content found in response")
                
            json_str = response[start_idx:end_idx]
            return json.loads(json_str)
        except Exception as e:
            print(f"Error parsing response: {e}")
            print("Raw response:", response)
            return None


    def combine_results(self, claims, evidence_results, conclusions):
        """Combine all analysis results into a final structured format"""
        final_results = {
            "paper_analysis": []
        }
        
        # Get conclusions dict
        conclusions_dict = {
            c['claim_id']: c 
            for c in conclusions.get('conclusions', [])
        } if conclusions else {}
        
        # Get evidence dict
        evidence_dict = {
            e['claim_id']: e.get('evidence', [])
            for e in evidence_results if isinstance(e, dict)
        }
        
        for claim in claims.get('claims', []):
            claim_id = claim['claim_id']
            conclusion = conclusions_dict.get(claim_id, {})
            evidence = evidence_dict.get(claim_id, [])
            
            analysis = {
                "claim_id": claim_id,
                "claim": claim.get('claim_text', ''),
                "claim_location": claim.get('location', 'Location not specified'),  # Add claim location
                "evidence": evidence,
                "evidence_locations": [ev.get('location', 'Location not specified') for ev in evidence],  # Add evidence locations
                "conclusion": {
                    "author_conclusion": conclusion.get('author_conclusion', 'No conclusion available'),
                    "conclusion_justified": conclusion.get('conclusion_justified', False),
                    "robustness_analysis": conclusion.get('robustness_analysis', 'No robustness analysis available'),
                    "limitations": conclusion.get('limitations', 'No limitations analysis available'),
                    "conclusion_location": conclusion.get('location', 'Location not specified')  # Add conclusion location
                }
            }
            
            final_results['paper_analysis'].append(analysis)
        
        return final_results

    def print_analysis_results(self, final_results):
        """Print the analysis results in a readable format"""
        print("\n=== Complete Paper Analysis ===\n")
        
        for analysis in final_results['paper_analysis']:
            print(f"Claim {analysis['claim_id']}:")
            print(f"Statement: {analysis['claim']}")
            print("\nEvidence:")
            for evidence in analysis['evidence']:
                print(f"- {evidence['evidence_text']}")
                print(f"  Strength: {evidence['strength']}")
                print(f"  Limitations: {evidence['limitations']}")
            
            print("\nConclusion:")
            print(f"Author's Conclusion: {analysis['conclusion']['author_conclusion']}")
            print(f"Justified by Evidence: {'Yes' if analysis['conclusion']['conclusion_justified'] else 'No'}")
            print(f"Robustness: {analysis['conclusion']['robustness_analysis']}")
            print(f"Limitations: {analysis['conclusion']['limitations']}")
            print("\n" + "-"*50 + "\n")

def main():
    # Initialize analyzer
    openai.api_key = os.getenv("OPEN_AI_API_KEY")
    analyzer = PaperAnalyzer(openai.api_key)
    analyzer.create_assistant()
    
    # Analyze paper
    # filename = "Ax_Hao_Hang_2.pdf"
    # input_folder = 'all_papers_trimmed'
    input_folder = 'shashi_1_papers'

    pdf_files = [f for f in os.listdir(input_folder) if f.endswith('.pdf')]

    for filename in pdf_files:    
        basefile_name = Path(filename).stem
        try:
            filename = f"{input_folder}/{filename}"
            start_time = time.time()
            # Step 1: Extract claims
            print("Extracting claims...")
            claims = analyzer.get_claims(filename)

            #noise-addition code using some model or human.
            #without noise-addition.
            
            # Step 2: Analyze evidence for each claim
            print("Analyzing evidence...")
            evidence_results = analyzer.analyze_evidence(filename, claims)
            print(evidence_results)
            
            #noise-addition code using some model or human.
            #without noise-addition.

            # Step 3: Analyze conclusions
            print("Analyzing conclusions...")
            conclusions = analyzer.analyze_conclusions(filename, claims, evidence_results)
            

            total_time = time.time() - start_time
            # Combine all results
            final_results = analyzer.combine_results(claims, evidence_results, conclusions)
            


            final_results["execution_times"] = {
                "claims_analysis_time": f"{analyzer.execution_times['claims_analysis']:.2f} seconds",
                "evidence_analysis_time": f"{analyzer.execution_times['evidence_analysis']:.2f} seconds",
                "conclusions_analysis_time": f"{analyzer.execution_times['conclusions_analysis']:.2f} seconds",
                "total_execution_time": f"{analyzer.execution_times['total_time']:.2f} seconds"
            }
            
            # Print results
            analyzer.print_analysis_results(final_results)

            #check if the folder exists or not and create the folder if it does not exist
            if not os.path.exists('GPT_one_by_one_shashi'):
                os.makedirs('GPT_one_by_one_shashi')
            
            # Save results to file
            with open(f'GPT_one_by_one_shashi/{basefile_name}_analysis.json', 'w') as f:
                json.dump(final_results, f, indent=4)
            print("Results have been saved to 'detailed_analysis_results.json'")
            
            # Save intermediate results for reference
            intermediate_results = {
                "claims": claims,
                "evidence": evidence_results,
                "conclusions": conclusions,
                "execution_times": final_results["execution_times"]

            }
            with open(f'GPT_one_by_one_shashi/{basefile_name}_intermediate.json', 'w') as f:
                json.dump(intermediate_results, f, indent=4)
            print("Intermediate results saved to 'intermediate_results.json'")
                
        except Exception as e:
            print(f"Error analyzing paper: {str(e)}")

if __name__ == "__main__":
    main()   

Extracting claims...
[Message(id='msg_InJsmQbvn88687zL4YVIDDIW', assistant_id='asst_zAbgbUYDI8vEQBU41HkG1lVM', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='```json\n{\n    "claims": [\n        {\n            "claim_id": 1,\n            "claim_text": "CogWriter achieves a 22% higher instruction completion accuracy rate compared to GPT-4o",\n            "location": "Abstract",\n            "claim_type": "Performance improvement",\n            "exact_quote": "CogWriter surpasses GPT-4o by 22% in complex instruction completion accuracy while reliably generating texts exceeding 10,000 words."\n        },\n        {\n            "claim_id": 2,\n            "claim_text": "CogWriter incorporates human writing strategies into LLMs without requiring additional training",\n            "location": "Contribution",\n            "claim_type": "Methodological advancement",\n            "exact_quote": "We demonstrate that CogWriter remarkably enhances LL

# Claim, Evidence and Conclusion all at Once

In [9]:
from openai import OpenAI
from openai.types.beta.threads.message_create_params import (
    Attachment,
    AttachmentToolFileSearch,
)
import json
from pathlib import Path
import time
import os
from dotenv import load_dotenv

class SinglePassPaperAnalyzer:
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)
        self.assistant = None
        self.execution_times = {
            "single_pass_analysis": 0,
            "total_time": 0
        }




    def create_assistant(self):
        self.assistant = self.client.beta.assistants.create(
            model="gpt-4-turbo-preview",
            description="Assistant for comprehensive research paper analysis",
            tools=[{"type": "file_search"}],
            name="Research Paper Analyzer"
        )

    def analyze_paper(self, filename):

        start_time = time.time()

        # thread = self.client.beta.threads.create()
        file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
        
        comprehensive_prompt = """
        Analyze the research paper and provide a comprehensive evaluation following these guidelines:

        1. Identify ALL claims in the paper where each claim:
           - Makes a specific, verifiable assertion
           - Is supported by concrete evidence
           - Represents findings, contributions, or methodological advantages
           - Can be from any section except abstract

        2. For each identified claim:
           - Extract ALL supporting or contradicting evidence (experimental results, data, or methodology)
           - Evaluate the evidence strength and limitations
           - Assess how well conclusions align with evidence

        Return ONLY the following JSON structure:
        {
            "analysis": [
                {
                    "claim_id": number,
                    "claim": {
                        "text": "statement of the claim",
                        "type": "methodology/result/contribution/performance",
                        "location": "section/paragraph",
                        "exact_quote": "verbatim text from paper"
                    },
                    "evidence": [
                        {
                            "evidence_text": "specific experimental result/data",
                            "strength": "strong/moderate/weak",
                            "limitations": "specific limitations",
                            "location": "section/paragraph",
                            "exact_quote": "verbatim text from paper"
                        }
                    ],
                    "evaluation": {
                        "conclusion_justified": true/false,
                        "robustness": "high/medium/low",
                        "justification": "explanation of evidence-conclusion alignment",
                        "key_limitations": "critical limitations affecting validity",
                        "confidence_level": "high/medium/low"
                    }
                }
            ]
        }

        Ensure:
        - ALL substantive claims are captured
        - Evaluations are objective and well-reasoned
        - All locations and quotes are precise
        - Multiple pieces of evidence per claim are included when present
        """
        
        result = self._execute_analysis(None, file.id, comprehensive_prompt)
        self.execution_times["single_pass_analysis"] = time.time() - start_time
        return result


    def _execute_analysis(self, thread_id, file_id, prompt):
        """Execute analysis with enhanced error handling"""
        try:
            # Create a new thread
            thread = self.client.beta.threads.create()
            thread_id = thread.id  # Get the thread ID
            
            print("Creating message...")
            message = self.client.beta.threads.messages.create(
                thread_id=thread_id,  # Use the created thread ID
                role="user",
                attachments=[
                    Attachment(
                        file_id=file_id,
                        tools=[AttachmentToolFileSearch(type="file_search")]
                    )
                ],
                content=prompt
            )
            print("Message created successfully")

            print("Starting analysis run...")
            run = self.client.beta.threads.runs.create(
                thread_id=thread_id,
                assistant_id=self.assistant.id
            )

            # Poll for completion with timeout
            timeout = 300  # 5 minutes timeout
            start_time = time.time()
            while True:
                if time.time() - start_time > timeout:
                    raise Exception("Analysis timed out")

                run_status = self.client.beta.threads.runs.retrieve(
                    thread_id=thread_id,
                    run_id=run.id
                )
                
                print(f"Run status: {run_status.status}")
                
                if run_status.status == 'completed':
                    break
                elif run_status.status in ['failed', 'cancelled', 'expired']:
                    raise Exception(f"Run failed with status: {run_status.status}")
                
                time.sleep(5)  # Wait 5 seconds before checking again

            print("Retrieving messages...")
            messages = list(self.client.beta.threads.messages.list(thread_id=thread_id))
            if not messages:
                raise Exception("No messages received")

            # Clean up the thread
            try:
                self.client.beta.threads.delete(thread_id)
            except Exception as e:
                print(f"Error deleting thread: {e}")

            return self._parse_json_response(messages[0].content[0].text.value)
        except Exception as e:
            print(f"Error in _execute_analysis: {str(e)}")
            print(f"Thread ID: {thread_id}")
            print(f"File ID: {file_id}")
            raise


    def _parse_json_response(self, response):
        try:
            start_idx = response.find('{')
            end_idx = response.rfind('}') + 1
            if start_idx == -1 or end_idx == 0:
                raise ValueError("No JSON content found in response")
            json_str = response[start_idx:end_idx]
            return json.loads(json_str)
        except Exception as e:
            print(f"Error parsing response: {e}")
            print("Raw response:", response)
            return None

    def print_analysis_results(self, results):
        if not results or 'analysis' not in results:
            print("No valid analysis results to display")
            return

        print("\n=== Paper Analysis Results ===\n")
        
        for analysis in results['analysis']:
            print(f"Claim {analysis['claim_id']}:")
            print(f"Type: {analysis['claim']['type']}")
            print(f"Statement: {analysis['claim']['text']}")
            print(f"Location: {analysis['claim']['location']}")
            print(f"Exact Quote: {analysis['claim']['exact_quote']}")
            
            print("\nEvidence:")
            for evidence in analysis['evidence']:
                print(f"- Evidence Text: {evidence['evidence_text']}")
                print(f"  Strength: {evidence['strength']}")
                print(f"  Location: {evidence['location']}")
                print(f"  Limitations: {evidence['limitations']}")
                print(f"  Exact Quote: {evidence['exact_quote']}")
            
            eval_data = analysis['evaluation']
            print("\nEvaluation:")
            print(f"Conclusion Justified: {'Yes' if eval_data['conclusion_justified'] else 'No'}")
            print(f"Robustness: {eval_data['robustness']}")
            print(f"Confidence Level: {eval_data['confidence_level']}")
            print(f"Justification: {eval_data['justification']}")
            print(f"Key Limitations: {eval_data['key_limitations']}")
            
            print("\n" + "-"*50 + "\n")

    def save_results(self, results, base_filename):
        output_dir = Path('GPT_all_at_once_shashi')
        output_dir.mkdir(exist_ok=True)
        

        results["execution_times"] = {
        "single_pass_analysis_time": f"{self.execution_times['single_pass_analysis']:.2f} seconds",
        "total_execution_time": f"{self.execution_times['total_time']:.2f} seconds"
        }
        # Save full JSON results
        json_path = output_dir / f'{base_filename}_analysis.json'
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=4)
        
        # Save readable text summary
        text_path = output_dir / f'{base_filename}_summary.txt'
        with open(text_path, 'w', encoding='utf-8') as f:
            for analysis in results['analysis']:
                f.write(f"Claim {analysis['claim_id']}:\n")
                f.write(f"Type: {analysis['claim']['type']}\n")
                f.write(f"Statement: {analysis['claim']['text']}\n")
                f.write(f"Location: {analysis['claim']['location']}\n")
                f.write(f"Exact Quote: {analysis['claim']['exact_quote']}\n\n")
                
                f.write("Evidence:\n")
                for evidence in analysis['evidence']:
                    f.write(f"- Evidence Text: {evidence['evidence_text']}\n")
                    f.write(f"  Strength: {evidence['strength']}\n")
                    f.write(f"  Location: {evidence['location']}\n")
                    f.write(f"  Limitations: {evidence['limitations']}\n")
                    f.write(f"  Exact Quote: {evidence['exact_quote']}\n\n")
                
                eval_data = analysis['evaluation']
                f.write("Evaluation:\n")
                f.write(f"Conclusion Justified: {'Yes' if eval_data['conclusion_justified'] else 'No'}\n")
                f.write(f"Robustness: {eval_data['robustness']}\n")
                f.write(f"Confidence Level: {eval_data['confidence_level']}\n")
                f.write(f"Justification: {eval_data['justification']}\n")
                f.write(f"Key Limitations: {eval_data['key_limitations']}\n")
                
                f.write("\n" + "-"*50 + "\n\n")
        
        # Generate summary statistics
        stats_path = output_dir / f'{base_filename}_statistics.txt'
        with open(stats_path, 'w', encoding='utf-8') as f:
            total_claims = len(results['analysis'])
            justified_claims = sum(1 for a in results['analysis'] 
                                 if a['evaluation']['conclusion_justified'])
            
            f.write("Analysis Statistics:\n")
            f.write(f"Total Claims Analyzed: {total_claims}\n")
            f.write(f"Justified Claims: {justified_claims}\n")
            
            # Evidence strength distribution
            strength_levels = {}
            for analysis in results['analysis']:
                for evidence in analysis['evidence']:
                    strength = evidence['strength']
                    strength_levels[strength] = strength_levels.get(strength, 0) + 1
            
            f.write("\nEvidence Strength Distribution:\n")
            total_evidence = sum(strength_levels.values())
            for strength, count in strength_levels.items():
                f.write(f"{strength}: {count} pieces ({count/total_evidence*100:.1f}%)\n")

        print(f"Analysis results saved to {output_dir}:")
        print(f"- Full analysis: {json_path}")
        print(f"- Summary: {text_path}")
        print(f"- Statistics: {stats_path}")

def main():

    
    load_dotenv()
    api_key = os.getenv("OPEN_AI_API_KEY")
    
    if not api_key:
        raise ValueError("OpenAI API key not found. Please set OPENAI_API_KEY in your environment variables.")


    input_folder = 'shashi_1_papers'

    pdf_files = [f for f in os.listdir(input_folder) if f.endswith('.pdf')]

    for filename in pdf_files:    
        basefile_name = Path(filename).stem
        try:
            filename = f"{input_folder}/{filename}"
    # try:
            analyzer = SinglePassPaperAnalyzer(api_key)
            analyzer.create_assistant()
            
            input_file = filename
            if not os.path.exists(input_file):
                raise FileNotFoundError(f"File not found: {input_file}")
            
            # base_filename = Path(input_file).stem
            
            total_start_time = time.time()


            print("\nAnalyzing paper...")
            results = analyzer.analyze_paper(input_file)
            
            analyzer.execution_times["total_time"] = time.time() - total_start_time

            results["execution_times"] = {
                "single_pass_analysis_time": f"{analyzer.execution_times['single_pass_analysis']:.2f} seconds",
                "total_execution_time": f"{analyzer.execution_times['total_time']:.2f} seconds"
            }

            analyzer.print_analysis_results(results)
            analyzer.save_results(results, basefile_name)
            
        except Exception as e:
            print(f"Error during analysis: {str(e)}")

if __name__ == "__main__":
    main()


Analyzing paper...
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...

=== Paper Analysis Results ===

Claim 1:
Type: performance
Statement: CogWriter significantly enhances the instruction completion accuracy and text generation length of LLMs, with specific improvements over GPT-4 and others.
Location: Introduction/Main Results
Exact Quote: CogWriter achieves a 22% higher instruction completion accuracy rate compared to GPT-4o, while reliably generating texts exceeding 10,000 words.

Evidence:
- Evidence Text: Empirical demonstration of CogWriter's performance on LongGenBench, showing superior instruction completion accuracy and generation length across various LLMs.
  Strength: strong
  Location: Main Results Table 1
  Limitations: Limited comparative bas

# Claim once, Evidence Once, Conclusion Once

In [10]:
import traceback
import time
import os 
import json 
from dotenv import load_dotenv

load_dotenv()


class PaperAnalyzer:
    def __init__(self):
        
        api_key = os.getenv("OPEN_AI_API_KEY")
        if not api_key:
            raise ValueError("API key not found. Please set it in the .env file.")
        self.client = OpenAI(api_key=api_key)
        self.assistant = None
        self.execution_times = {
        "claims_analysis": 0,
        "evidence_analysis": 0,
        "conclusions_analysis": 0,
        "total_time": 0
        }
        
    def create_assistant(self):
        try:
            self.assistant = self.client.beta.assistants.create(
                model="gpt-4-turbo-preview",
                description="Assistant for analyzing research papers",
                tools=[{"type": "file_search"}],
                name="Research Paper Analyzer"
            )
            print("Assistant created successfully")
        except Exception as e:
            print(f"Error creating assistant: {str(e)}")
            raise

    def get_all_claims(self, filename):
        """Get all claims in one pass"""
        try:
            # thread = self.client.beta.threads.create()
            start_time = time.time()

            file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
            print(f"Processing file: {filename}")
            
            claims_prompt = f"""
            task is to identify all statements in the text that meet the following criteria for a claim:
            1. Makes a specific, testable assertion about results, methods, or contributions
            2. Represents a novel finding, improvement, or advancement
            3. Presents a clear position or conclusion

            Make sure to:
            1. Include both major and minor claims
            2. Don't miss any claims
            3. Present each claim as a separate item
            
            Return ONLY the following JSON structure:
            {{
                "claims": [
                    {{
                        "claim_id": 1,
                        "claim_text": "statement of the claim",
                        "location": "section/paragraph where this claim appears",
                        "claim_type": "Nature of the claim",
                        "exact_quote": "complete verbatim text containing the claim"
                    }}
                ]
            }}
            """
            
            result = self._execute_analysis(None, file.id, claims_prompt)
            self.execution_times["claims_analysis"] = time.time() - start_time

            print("Claims extraction completed")
            return result
        except Exception as e:
            print(f"Error in get_all_claims: {str(e)}")
            raise

    def get_all_evidence(self, filename, claims):
        """Get evidence for all claims in one pass"""
        try:
            start_time = time.time()

            # thread = self.client.beta.threads.create()
            file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
            
            # Format claims for prompt
            claims_text = "\n".join([f"Claim {c['claim_id']}: {c['claim_text']}" for c in claims['claims']])
            print("Processing evidence for claims:", claims_text)
            
            evidence_prompt = f"""
            For these claims:
            {claims_text}

            Find the strongest supporting evidence for each claim. Evidence should:
            1. Directly supports or contradicts the claim's specific assertion
            2. Include specific results or data
            3. Come from the paper's results or evaluation
            4. Each claim can have multiple evidence, give each evidence as a seperate item
            5. Is not from the abstract or introduction


            Return ONLY the following JSON:
            {{
                "evidence_sets": [
                    {{
                        "claim_id": number,
                        "evidence": [
                            {{
                                "evidence_id": number,
                                "evidence_text": "specific evidence",
                                "strength": "strong/moderate/weak",
                                "limitations": "key limitations",
                                "location": "section/paragraph",
                                "exact_quote": "verbatim text"
                            }}
                        ]
                    }}
                ]
            }}
            """
            
            result = self._execute_analysis(None, file.id, evidence_prompt)
            self.execution_times["evidence_analysis"] = time.time() - start_time

            print("Evidence extraction completed")
            return result
        except Exception as e:
            print(f"Error in get_all_evidence: {str(e)}")
            raise

    def get_all_conclusions(self, filename, claims, evidence_sets):
        """Analyze conclusions for all claims and evidence in one pass"""
        try:
            # thread = self.client.beta.threads.create()
            start_time = time.time()

            file = self.client.files.create(file=open(filename, "rb"), purpose="assistants")
            
            # Create summary of claims and evidence for the prompt
            analysis_summary = []
            for claim in claims['claims']:
                claim_id = claim['claim_id']
                claim_evidence = next((e['evidence'] for e in evidence_sets['evidence_sets'] 
                                    if e['claim_id'] == claim_id), [])
                
                summary = f"\nClaim {claim_id}: {claim['claim_text']}\n"
                summary += "Evidence:\n"
                for evidence in claim_evidence:
                    summary += f"- {evidence['evidence_text']}\n"
                analysis_summary.append(summary)
            
            analysis_text = "\n".join(analysis_summary)
            
            conclusions_prompt = f"""
            Analyze these claims and their evidence:
            {analysis_text}

            For each claim-evidence pair, evaluate:
            1. Whether the evidence justifies the claim
            2. The overall strength of support
            3. Any important limitations

            Return ONLY the following JSON:
            {{
                "conclusions": [
                    {{
                        "claim_id": number,
                        "conclusion_justified": true/false,
                        "robustness": "high/medium/low",
                        "key_limitations": "specific limitations",
                        "confidence_level": "high/medium/low"
                    }}
                ]
            }}
            """
            
            result = self._execute_analysis(None, file.id, conclusions_prompt)
            self.execution_times["conclusions_analysis"] = time.time() - start_time

            print("Conclusions analysis completed")
            return result
            
        except Exception as e:
            print(f"Error in get_all_conclusions: {str(e)}")
            raise


    def _execute_analysis(self, thread_id, file_id, prompt):
        """Execute analysis with enhanced error handling"""
        try:
            # Create a new thread
            # total_start_time = time.time()

            thread = self.client.beta.threads.create()
            thread_id = thread.id  # Get the thread ID
            
            print("Creating message...")
            message = self.client.beta.threads.messages.create(
                thread_id=thread_id,  # Use the created thread ID
                role="user",
                attachments=[
                    Attachment(
                        file_id=file_id,
                        tools=[AttachmentToolFileSearch(type="file_search")]
                    )
                ],
                content=prompt
            )
            print("Message created successfully")

            print("Starting analysis run...")
            run = self.client.beta.threads.runs.create(
                thread_id=thread_id,
                assistant_id=self.assistant.id
            )

            # Poll for completion with timeout
            timeout = 300  # 5 minutes timeout
            start_time = time.time()
            while True:
                if time.time() - start_time > timeout:
                    raise Exception("Analysis timed out")

                run_status = self.client.beta.threads.runs.retrieve(
                    thread_id=thread_id,
                    run_id=run.id
                )
                
                print(f"Run status: {run_status.status}")
                
                if run_status.status == 'completed':
                    break
                elif run_status.status in ['failed', 'cancelled', 'expired']:
                    raise Exception(f"Run failed with status: {run_status.status}")
                
                time.sleep(5)  # Wait 5 seconds before checking again

            print("Retrieving messages...")
            messages = list(self.client.beta.threads.messages.list(thread_id=thread_id))
            if not messages:
                raise Exception("No messages received")

            # Clean up the thread
            try:
                self.client.beta.threads.delete(thread_id)
            except Exception as e:
                print(f"Error deleting thread: {e}")

            return self._parse_json_response(messages[0].content[0].text.value)
        except Exception as e:
            print(f"Error in _execute_analysis: {str(e)}")
            print(f"Thread ID: {thread_id}")
            print(f"File ID: {file_id}")
            raise

    def _parse_json_response(self, response):
        """Parse JSON response with better error handling"""
        try:
            print("Parsing response...")
            print("Raw response:", response)
            
            start_idx = response.find('{')
            end_idx = response.rfind('}') + 1
            
            if start_idx == -1 or end_idx == 0:
                raise ValueError("No JSON content found in response")
                
            json_str = response[start_idx:end_idx]
            result = json.loads(json_str)
            
            print("Successfully parsed JSON response")
            return result
            
        except Exception as e:
            print(f"Error parsing response: {str(e)}")
            print("Raw response:", response)
            raise

    def analyze_paper(self, filename):
        """Complete paper analysis using three-prompt approach"""
        try:
            # Get all claims

            total_start_time = time.time()

            print("Extracting claims...")
            claims = self.get_all_claims(filename)
            if not claims:
                raise Exception("Failed to extract claims")

            # Get evidence for all claims
            print("Extracting evidence...")
            evidence_sets = self.get_all_evidence(filename, claims)
            if not evidence_sets:
                raise Exception("Failed to extract evidence")

            # Get conclusions for all claim-evidence pairs
            print("Analyzing conclusions...")
            conclusions = self.get_all_conclusions(filename, claims, evidence_sets)
            if not conclusions:
                raise Exception("Failed to generate conclusions")
            self.execution_times["total_time"] = time.time() - total_start_time

            # Structure final results
            final_results = {
                "paper_analysis": []
            }

            for claim in claims['claims']:
                claim_id = claim['claim_id']
                
                # Get evidence for this claim
                evidence = next((e['evidence'] for e in evidence_sets['evidence_sets'] 
                            if e['claim_id'] == claim_id), [])
                
                # Get conclusion for this claim
                conclusion = next((c for c in conclusions['conclusions'] 
                                if c['claim_id'] == claim_id), {})

                analysis_item = {
                    "claim_id": claim_id,
                    "claim": {
                        "text": claim['claim_text'],
                        "location": claim['location'],
                        "type": claim['claim_type'],
                        "exact_quote": claim['exact_quote']
                    },
                    "evidence": evidence,
                    "conclusion": {
                        "conclusion_justified": conclusion.get('conclusion_justified', False),
                        "robustness": conclusion.get('robustness', 'Not evaluated'),
                        "limitations": conclusion.get('key_limitations', 'Not specified'),
                        "confidence_level": conclusion.get('confidence_level', 'low')
                    }
                }
                
                final_results['paper_analysis'].append(analysis_item)
            final_results["execution_times"] = {
            "claims_analysis_time": f"{self.execution_times['claims_analysis']:.2f} seconds",
            "evidence_analysis_time": f"{self.execution_times['evidence_analysis']:.2f} seconds",
            "conclusions_analysis_time": f"{self.execution_times['conclusions_analysis']:.2f} seconds",
            "total_execution_time": f"{self.execution_times['total_time']:.2f} seconds"
            }

            return final_results

        except Exception as e:
            print(f"Error in paper analysis: {str(e)}")
            return None

    def save_results(self, results, base_filename):
        output_dir = Path('GPT_3_prompts_shashi')
        output_dir.mkdir(exist_ok=True)
        
        # Save full JSON results
        json_path = output_dir / f'{base_filename}_analysis.json'
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=4)
        
        # Save readable text summary
        text_path = output_dir / f'{base_filename}_summary.txt'
        with open(text_path, 'w', encoding='utf-8') as f:
            for analysis in results['analysis']:
                f.write(f"Claim {analysis['claim_id']}:\n")
                f.write(f"Type: {analysis['claim']['type']}\n")
                f.write(f"Statement: {analysis['claim']['text']}\n")
                f.write(f"Location: {analysis['claim']['location']}\n")
                f.write(f"Exact Quote: {analysis['claim']['exact_quote']}\n\n")
                




                f.write("Evidence:\n")
                for evidence in analysis['evidence']:
                    f.write(f"- Evidence Text: {evidence['evidence_text']}\n")
                    f.write(f"  Strength: {evidence['strength']}\n")
                    f.write(f"  Location: {evidence['location']}\n")
                    f.write(f"  Limitations: {evidence['limitations']}\n")
                    f.write(f"  Exact Quote: {evidence['exact_quote']}\n\n")
                
                eval_data = analysis['evaluation']
                f.write("Evaluation:\n")
                f.write(f"Conclusion Justified: {'Yes' if eval_data['conclusion_justified'] else 'No'}\n")
                f.write(f"Robustness: {eval_data['robustness']}\n")
                f.write(f"Confidence Level: {eval_data['confidence_level']}\n")
                f.write(f"Justification: {eval_data['justification']}\n")
                f.write(f"Key Limitations: {eval_data['key_limitations']}\n")


                
                
                f.write("\n" + "-"*50 + "\n\n")
            f.write("\nExecution Times:\n")
            f.write(f"Claims Analysis: {self.execution_times['claims_analysis']:.2f} seconds\n")
            f.write(f"Evidence Analysis: {self.execution_times['evidence_analysis']:.2f} seconds\n")
            f.write(f"Conclusions Analysis: {self.execution_times['conclusions_analysis']:.2f} seconds\n")
            f.write(f"Total Execution Time: {self.execution_times['total_time']:.2f} seconds\n")
        # Generate summary statistics
        stats_path = output_dir / f'{base_filename}_statistics.txt'
        with open(stats_path, 'w', encoding='utf-8') as f:
            total_claims = len(results['analysis'])
            justified_claims = sum(1 for a in results['analysis'] 
                                 if a['evaluation']['conclusion_justified'])
            
            f.write("Analysis Statistics:\n")
            f.write(f"Total Claims Analyzed: {total_claims}\n")
            f.write(f"Justified Claims: {justified_claims}\n")
            
            # Evidence strength distribution
            strength_levels = {}
            for analysis in results['analysis']:
                for evidence in analysis['evidence']:
                    strength = evidence['strength']
                    strength_levels[strength] = strength_levels.get(strength, 0) + 1
            
            f.write("\nEvidence Strength Distribution:\n")
            total_evidence = sum(strength_levels.values())
            for strength, count in strength_levels.items():
                f.write(f"{strength}: {count} pieces ({count/total_evidence*100:.1f}%)\n")

        print(f"Analysis results saved to {output_dir}:")
        print(f"- Full analysis: {json_path}")
        print(f"- Summary: {text_path}")
        print(f"- Statistics: {stats_path}")


def results_exist(base_filename: str, output_folder: str) -> bool:
    """Check if results already exist for the given file."""
    output_dir = Path(output_folder)
    analysis_path = output_dir / f'{base_filename}_analysis.json'
    summary_path = output_dir / f'{base_filename}_summary.txt'
    statistics_path = output_dir / f'{base_filename}_statistics.txt'
    
    # Check if all expected output files exist
    return all(file.exists() for file in [analysis_path, summary_path, statistics_path])

def main():
    input_folder = 'shashi_1_papers'
    output_folder = 'GPT_3_prompts_shashi'
    Path(output_folder).mkdir(exist_ok=True)  # Ensure the output directory exists

    analyzer = PaperAnalyzer()
    analyzer.create_assistant()
    
    pdf_files = [f for f in os.listdir(input_folder) if f.endswith('.pdf')]

    for filename in pdf_files:
        base_filename = Path(filename).stem
        
        if results_exist(base_filename, output_folder):
            print(f"Skipping {filename}, results already exist.")
            continue
        
        try:
            filename_with_path = f"{input_folder}/{filename}"
            print(f"Starting analysis of {filename_with_path}")
            
            results = analyzer.analyze_paper(filename_with_path)
            if results:
                analyzer.save_results(results, base_filename)
                print("Analysis completed successfully for", filename)
            else:
                print("Analysis failed to produce results for", filename)
                
        except Exception as e:
            print(f"Error analyzing {filename}: {str(e)}")
            traceback.print_exc()

if __name__ == "__main__":
    main()

Assistant created successfully
Starting analysis of shashi_1_papers/2502.12568v2.pdf
Extracting claims...
Processing file: shashi_1_papers/2502.12568v2.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
    "claims": [
        {
            "claim_id": 1,
            "claim_text": "CogWriter transforms LLM constrained long-form text generation into a systematic cognitive writing paradigm.",
            "location": "Abstract",
            "claim_type": "Innovation",
            "exact_quote": "we aim to equip LLMs with human-like cognitive writing capabilities through CogWriter, a novel training-free framework that transforms LLM constrained long-form text generation into a systematic cognitive writing paradigm."
        },
        {
  

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2409.15915v1.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
  "claims": [
    {
      "claim_id": 1,
      "claim_text": "Semantic equivalence across different representations holds true in our context.",
      "location": "Experiments",
      "claim_type": "Testable Hypothesis",
      "exact_quote": "(H1) Semantic equivalence across different representations, as discussed by Weaver, holds true in our context."
    },
    {
      "claim_id": 2,
      "claim_text": "Ambiguity in natural language descriptions leads to multiple interpretations.",
      "location": "Experiments",
      "claim_type": "Testable Hypothesis",
      "exact_quote": "(H2) Ambiguity in natural language descriptions leads to multiple interpretations."


Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2405.04215v1.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
    "claims": [
        {
            "claim_id": 1,
            "claim_text": "NL2Plan shows much higher robustness than Zero-Shot CoT, successfully solving 10 of the 15 tasks.",
            "location": "NL2Plan Results",
            "claim_type": "Results Assertion",
            "exact_quote": "NL2Plan shows much higher robustness than Zero-Shot CoT. It successfully solves 10 of the 15 tasks, a superset of those solved by Zero-Shot CoT (including those with questionable plans)."
        },
        {
            "claim_id": 2,
            "claim_text": "NL2Plan's primary cause of failure is incorrect task modeling."

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2501.18817v1.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
  "claims": [
    {
      "claim_id": 1,
      "claim_text": "Generalised strategies and iterative error correction can substantially enhance the reasoning ability of weaker LLMs, achieving performance comparable to more resource-intensive models at lower costs.",
      "location": "Abstract",
      "claim_type": "Improvement",
      "exact_quote": "Our empirical results from planning and mathematical reasoning tasks demonstrate that these methods improve the performance of less resource-intensive LLMs to levels comparable with their more resource-intensive counterparts, at a fraction of the cost."
    },
    {
     

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2409.08642v2.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
    "claims": [
        {
            "claim_id": 1,
            "claim_text": "CPL significantly outperforms the DeepSeekMath-7B-Base model in in-domain and out-of-domain reasoning tasks.",
            "location": "Abstract",
            "claim_type": "Improvement & Result",
            "exact_quote": "trained exclusively on GSM8K and MATH, not only significantly improves performance on GSM8K (+10.5%) and MATH (+6.5%), but also enhances out-of-domain reasoning benchmarks, such as HumanEval (+12.2%), GPQA (+8.6%), ARC-C (+4.0%), MMLU-STEM (+2.2%), and BBH (+1.8%)"
        },
        {
            "claim_id": 2,
            "claim_text": "Plan-based learning offers

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2502.12130v1.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
  "claims": [
    {
      "claim_id": 1,
      "claim_text": "The ARMAP framework can automatically learn a reward model from the environment without human annotations, used to evaluate action trajectories of LLM agents for task planning.",
      "location": "Abstract",
      "claim_type": "Methodology",
      "exact_quote": "we propose a framework that can automatically learn a reward model from the environment without human annotations. This model can be used to evaluate the action trajectories of LLM agents and provide heuristics for task planning."
    },
    {
      "claim_id": 2,
      "claim_text": "ARMAP involves employing one LLM-b

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2410.14255v2.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
  "claims": [
    {
      "claim_id": 1,
      "claim_text": "LLMs can generate ideas more novel than those written by human experts.",
      "location": "2 Related work/2.1 LLM-based Scientific Innovation",
      "claim_type": "Novelty in Idea Generation",
      "exact_quote": "Concurrent with our research, Si et al. (2024) introduce AI-Researcher, which, for the first time, demonstrates that LLMs can generate ideas deemed more novel than those written by human experts."
    },
    {
      "claim_id": 2,
      "claim_text": "An idea ranking method based on pairwise comparison achieves 71.4% accuracy in distinguishin

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2402.02716v1.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
    "claims": [
        {
            "claim_id": 1,
            "claim_text": "This survey provides the first systematic view of LLM-based agents planning.",
            "location": "Abstract",
            "claim_type": "Novelty",
            "exact_quote": "This survey provides the first systematic view of LLM-based agents planning, covering recent works aiming to improve planning ability."
        },
        {
            "claim_id": 2,
            "claim_text": "The taxonomy categorizes LLM-Agent planning into Task Decomposition, Plan Selection, External Module, Reflection, and Memory.",
            "location": "Abstract",
            "claim_type": "Contributi

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'


Processing file: shashi_1_papers/2305.16653v1.pdf
Creating message...
Message created successfully
Starting analysis run...
Run status: queued
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: in_progress
Run status: completed
Retrieving messages...
Parsing response...
Raw response: ```json
{
    "claims": [
        {
            "claim_id": 1,
            "claim_text": "AdaPlanner adapts its plan from feedback with in-plan and out-of-plan refinement strategies.",
            "location": "Abstract",
            "claim_type": "Methodology/Approach",
            "exact_quote": "AdaPlanner, which allows the LLM agent to refine its self-generated plan adaptively in response to environmental feedback. In AdaPlanner, the LLM agent adaptively refines its plan from feedback with both in-plan and out-of-plan refinement strategies."
        },
        {
            "claim_id": 2,
            "claim_text": "AdaPlanner outperforms state-of-

Traceback (most recent call last):
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 460, in main
    analyzer.save_results(results, base_filename)
  File "/var/folders/jd/w0m1lwt10fz9fhspxkbcd32c0000gq/T/ipykernel_22293/3745754536.py", line 357, in save_results
    for analysis in results['analysis']:
KeyError: 'analysis'
