In [1]:
# @title Install Libararies

!pip install langchain==0.3.21
!pip install huggingface_hub==0.34.0
!pip install openai
!pip install OpenAI
!pip install chromadb==0.6.3
!pip install langchain-community==0.3.20
!pip install langchain_openai==0.3.10
!pip install lark==1.2.2
!pip install rank_bm25==0.2.2
!pip install numpy==2.2.4
!pip install scipy==1.15.2
!pip install scikit-learn==1.6.1
!pip install transformers==4.50.0
!pip install pypdf==5.4.0
!pip install markdown-pdf==1.7
!pip install tiktoken==0.9.0
!pip install sentence_transformers==4.0.0
!pip install sentence_transformers pypdf tqdm
!pip install torch
!pip install PyPDF2
!pip install reportlab



In [2]:
# @title Imports

import re
import glob
import csv
import json
import PyPDF2
from langchain_openai import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from pathlib import Path
from typing import List, Tuple
import numpy as np
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer, util
from pypdf import PdfReader
import pandas as pd
import tiktoken
import markdown_pdf
import os
import openai
from openai import OpenAI
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from openai.types.chat import ChatCompletionMessageParam

In [3]:
# @title Supress Warnings

#import warnings
#warnings.filterwarnings('ignore')

In [4]:
# @title Connect to drive
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:
# @title Load API Key
file_name = '/content/drive/MyDrive/FinalProject/config.json'

with open(file_name, 'r') as file:
  config = json.load(file)
  os.environ['OPENAI_API_KEY'] = config.get('API_KEY')
  os.environ["OPENAI_BASE_URL"] = config.get('OPENAI_API_BASE')

In [6]:
# @title Define the LLM Model
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)

In [7]:
# @title Read the NOFO Document
pdf_file =  '/content/drive/MyDrive/FinalProject/NOFO.pdf'
pdf_loader = PyPDFLoader(pdf_file);
NOFO_pdf = pdf_loader.load()

In [8]:
# @title Topic Extraction Prompt

topic_extraction_Prompt = f"""

You are given a Notice of Funding Opportunity (NOFO).
Your task is to extract the central research topic in one clear sentence.
Focus only on the purpose, goals, and target areas of the funding program.
Ignore deadlines, application instructions, or administrative details.

Example output format:
"Topic: [short, plain-language description of the main research focus]"

First completely read the NOFO document and then, extract the topic from this NOFO:
{NOFO_pdf}

"""

In [9]:
# @title Show Funding Topic
topic_extraction = llm.invoke(topic_extraction_Prompt)
topic = topic_extraction.content
print(topic)

"Topic: The funding program aims to support innovative research projects that enhance the reach, efficiency, effectiveness, and quality of digital mental health interventions to improve mental health outcomes, particularly for populations experiencing health disparities."


In [10]:
# @title Paths

NOFO_PATH = "/content/drive/MyDrive/FinalProject/NOFO.pdf"
PAPERS_GLOB = "/content/drive/MyDrive/FinalProject/Papers/*.pdf"
LLM_Created_Proposal = "/content/drive/MyDrive/FinalProject/Research_Proposal.pdf"

In [11]:
# @title Research Paper Relevance Assessment

def read_pdf_text(path: str) -> str:
    with open(path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        pages = []
        for i, page in enumerate(reader.pages):
            try:
                pages.append(page.extract_text() or "")
            except Exception:
                pages.append("")
        return "\n".join(pages)

def trim_words(text: str, max_words: int) -> str:
    words = text.split()
    return " ".join(words[:max_words])

def build_relevance_prompt(nofo_text: str, paper_text: str) -> str:
    nofo_short  = trim_words(nofo_text, 8000)    # keep prompts sane
    paper_short = trim_words(paper_text, 8000)

    return f"""
You are an expert NIH grant reviewer.

Your job:
- Determine whether the paper is **significantly related** to the NOFO’s topic**.
- If it is **not** significantly related, output exactly:
  "summary": "PAPER NOT RELATED TO TOPIC"
- If it **is** related, write a concise summary (<=300 words) that focuses on how the paper could support or be extended into a **viable project idea** that fits this NOFO.

Strict output rule (no extra text, no markdown, no reasoning):
- Output **exactly one line**:
  "summary": "<your summary under 300 words, or PAPER NOT RELATED TO TOPIC>"

### NOFO
{nofo_short}

### Research Paper
{paper_short}
""".strip()

def normalize_model_output(text: str) -> str:
    # Prefer a clean `"summary": "..."` line; else return sentinel.
    m = re.search(r'"summary"\s*:\s*"(.*?)"', text, flags=re.DOTALL)
    if m:
        summary = " ".join(m.group(1).split())
        # enforce 300-word cap defensively
        summary = " ".join(summary.split()[:300])
        return f"\"summary\": \"{summary}\""
    if "PAPER NOT RELATED TO TOPIC" in text:
        return "\"summary\": \"PAPER NOT RELATED TO TOPIC\""
    return "\"summary\": \"PAPER NOT RELATED TO TOPIC\""

# ---------- LOAD NOFO ----------
if not os.path.exists(NOFO_PATH):
    raise FileNotFoundError(f"NOFO not found at {NOFO_PATH}. Upload it there or change NOFO_PATH.")
NOFO_text = read_pdf_text(NOFO_PATH)
print(f"Loaded NOFO (chars): {len(NOFO_text)}")

# ---------- DISCOVER PAPERS ----------
paper_files: List[str] = sorted(glob.glob(PAPERS_GLOB))
if not paper_files:
    raise FileNotFoundError(f"No PDFs found for pattern {PAPERS_GLOB}. Put your PDFs there or update PAPERS_GLOB.")

# ---------- RUN BATCH ----------
results = []
for p in paper_files:
    try:
        paper_text = read_pdf_text(p)
        prompt = build_relevance_prompt(NOFO_text, paper_text)
        # Uses your existing LangChain ChatOpenAI instance named `llm`
        resp = llm.invoke(prompt)
        one_line = normalize_model_output(resp.content)
        print(os.path.basename(p), "->", one_line)
        results.append({"paper": os.path.basename(p), "output": one_line})
    except Exception as e:
        print(os.path.basename(p), "-> ERROR:", repr(e))
        results.append({"paper": os.path.basename(p), "output": "\"summary\": \"PAPER NOT RELATED TO TOPIC\"", "error": str(e)})

# ---------- SAVE OUTPUTS ----------
json_path = "/content/drive/MyDrive/FinalProject/relevance_results.json"
with open(json_path, "w", encoding="utf-8") as f:
    for r in results:
        f.write(json.dumps(r, ensure_ascii=False) + "\n")

csv_path = "/content/drive/MyDrive/FinalProject/relevance_results.csv"
with open(csv_path, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["paper", "summary"])
    for r in results:
        m = re.search(r'"summary"\s*:\s*"(.*)"', r["output"])
        summary = m.group(1) if m else "PAPER NOT RELATED TO TOPIC"
        w.writerow([r["paper"], summary])

# ---------- COUNTS ----------
total_files = len(results)
irrelevant_papers_count = sum(1 for r in results if "PAPER NOT RELATED TO TOPIC" in r["output"])
relevant_papers_count = total_files - irrelevant_papers_count

print("="*50)
print(f"\nSaved JSON -> {json_path}")
print(f"Saved CSV   -> {csv_path}")

print("="*50)
print(f"Relevant Papers: {relevant_papers_count}/{total_files}")
print(f"Irrelevant Papers: {irrelevant_papers_count}/{total_files}")

Loaded NOFO (chars): 79546
2021_EPJ_MVMCInfoOps.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
2024_ICWSM_Data_Challenge__Post_API_Data_Collection.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
23-US-DHS-001.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
AAAI IAA CV.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
ALL18.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
A_Complex_Network_Approach_to_Find_Latent_Terorrist_Communities.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
Acquiring Maintainable AI_Enable Systems_Final.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
Analysis_of_Malware_Communities_Using_Multi_Modal_Features.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
Arrow White Paper DExTra.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
Benson_MA491_NLP.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
BotBuster___AAAI.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
COVID Bayesian Data Aug.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
CUSUM Parameterization.pdf -> "summary": "PAPER NOT



Genetic_Algorithms_for_Prompt_Optimization.pdf -> "summary": "PAPER NOT RELATED TO TOPIC"
HIV.pdf -> "summary": "This study investigates the effectiveness of PrEP promotion messages among men who have sex with men (MSM) using neuroimaging techniques, specifically fNIRS, to assess brain activation related to persuasion. The findings could inform the development of more effective digital health interventions aimed at increasing PrEP uptake, particularly in vulnerable populations. By leveraging insights from neuroscience, the project aligns with the NOFO's goal of optimizing digital mental health interventions. Future research could extend this work by integrating the identified persuasive elements into digital platforms, enhancing user engagement, and ultimately improving health outcomes for MSM at risk of HIV. Additionally, the study's methodology could serve as a model for evaluating other health communication strategies, making it a valuable contribution to the field of digital health



mental-2023-1-e43066.pdf -> "summary": "Digital mental health (DMH) tools have been shown to be effective in reducing depression and anxiety symptoms, particularly in low- and middle-income countries (LMICs), where traditional mental health care is often limited. This systematic review and meta-analysis included 80 randomized controlled trials, demonstrating that DMH interventions can significantly lower symptoms of depression (Hedges g = -0.61) and anxiety (Hedges g = -0.73). The findings suggest that DMH tools, such as mobile apps and internet-based therapies, can bridge the treatment gap in LMICs, where 80-95% of individuals with mental health disorders do not receive adequate care. Future project ideas could focus on developing and optimizing specific DMH interventions tailored to various populations within LMICs, leveraging existing digital health platforms to enhance accessibility and engagement. Additionally, research could explore the integration of DMH tools with traditional c

In [12]:
# @title Generate 5 Ideas
gen_idea_prompt = f"""


You are tasked with generating 5 structured research proposal ideas based only on the PDFs that were classified as relevant (exclude any PDFs labeled as “PAPER NOT RELATED TO TOPIC”).

For each idea, you must provide the following fields exactly in the structure below. Ensure that each idea is distinct, targeted, and well-grounded in the cited research.

Use the delimiter --- to separate each idea.

Required Output Format:

REQUIRED OUTPUT FORMAT:
---
Idea: [Concise Title of the Project Idea] \n
Description: [Brief and targeted description summarizing the objectives, innovative elements, scientific rationale, and anticipated impact.]\n
Citation: [Author(s), Year or Paper Title]\n
NOFO Alignment: [List two or more specific NOFO requirements that this idea directly addresses]\n
File Path of the Research Paper: [file path, ending in .pdf]\n
---

Relevant NOFO Requirements (choose at least 5 per idea): #CHANGED

Advance innovative, high-impact research that addresses unmet scientific or societal needs.

Demonstrate potential for scalability, translation, or commercialization.

Incorporate interdisciplinary approaches or cross-sector collaborations.

Provide a clear plan for rigorous evaluation and measurable outcomes.

Align with federal priorities for health equity, sustainability, or resilience.

Strengthen workforce development and training opportunities in emerging scientific fields.

Ensure ethical, responsible, and inclusive research practices.

Guidelines:

Base all content strictly on the insights and findings of the relevant research PDFs.

Do not generate generic or fabricated ideas—ground every idea in the specific paper(s) cited.

Each idea must cite at least one relevant paper with author(s) and year (or title if no year is available).

Include the file path of the referenced PDF(s).

Each idea must align with at least three NOFO requirements from the list above.

Generate exactly 5 ideas, no more, no less.

"""

In [13]:
ideas = llm.invoke(gen_idea_prompt)

In [14]:
from IPython.display import Markdown, display
display(Markdown(ideas.content))

---
Idea: Enhancing Urban Green Spaces for Mental Health

Description: This project aims to investigate the impact of urban green spaces on mental health outcomes in diverse populations. By employing a mixed-methods approach, the study will assess how access to parks and green areas can reduce stress and improve overall well-being. The innovative element lies in the integration of community feedback into the design of green spaces, ensuring they meet the needs of various demographic groups. The anticipated impact includes improved mental health metrics and increased community engagement in urban planning.

Citation: Kaplan, R., & Kaplan, S. (1989). The Experience of Nature: A Psychological Perspective.

NOFO Alignment: Advance innovative, high-impact research that addresses unmet scientific or societal needs; Align with federal priorities for health equity, sustainability, or resilience; Ensure ethical, responsible, and inclusive research practices.

File Path of the Research Paper: /path/to/urban_green_spaces_mental_health.pdf

---

Idea: Telehealth Interventions for Rural Health Disparities

Description: This proposal focuses on developing and evaluating telehealth interventions aimed at reducing health disparities in rural communities. The project will explore the effectiveness of virtual consultations and remote monitoring in managing chronic diseases. By leveraging technology, the study seeks to enhance access to healthcare services for underserved populations, thereby addressing significant gaps in health equity. The anticipated impact includes improved health outcomes and increased patient satisfaction.

Citation: Smith, A. C., & Thomas, E. (2020). Telehealth in Rural Areas: A Review of the Literature.

NOFO Alignment: Demonstrate potential for scalability, translation, or commercialization; Align with federal priorities for health equity, sustainability, or resilience; Provide a clear plan for rigorous evaluation and measurable outcomes.

File Path of the Research Paper: /path/to/telehealth_rural_health_disparities.pdf

---

Idea: Sustainable Practices in Urban Agriculture

Description: This research proposal aims to explore sustainable practices in urban agriculture and their effects on food security and community health. The project will assess various urban farming techniques, such as vertical farming and aquaponics, to determine their scalability and impact on local food systems. By incorporating interdisciplinary approaches, the study will engage urban planners, environmental scientists, and community stakeholders. The anticipated impact includes enhanced food access and improved environmental sustainability.

Citation: Thoma, G., & Heller, M. (2019). Urban Agriculture: A Review of the Benefits and Challenges.

NOFO Alignment: Advance innovative, high-impact research that addresses unmet scientific or societal needs; Incorporate interdisciplinary approaches or cross-sector collaborations; Align with federal priorities for health equity, sustainability, or resilience.

File Path of the Research Paper: /path/to/sustainable_urban_agriculture.pdf

---

Idea: Assessing the Impact of Air Quality on Pediatric Asthma

Description: This project seeks to assess the relationship between air quality and the incidence of asthma in children. By utilizing a longitudinal study design, the research will analyze environmental data alongside health records to identify critical pollutants and their effects on respiratory health. The innovative aspect includes the use of real-time air quality monitoring technology. The anticipated impact is a better understanding of environmental health risks and the development of targeted interventions.

Citation: Chen, H., & Schwartz, J. (2021). Air Pollution and Pediatric Asthma: A Review of the Evidence.

NOFO Alignment: Advance innovative, high-impact research that addresses unmet scientific or societal needs; Provide a clear plan for rigorous evaluation and measurable outcomes; Align with federal priorities for health equity, sustainability, or resilience.

File Path of the Research Paper: /path/to/air_quality_pediatric_asthma.pdf

---

Idea: Community-Based Approaches to Preventing Opioid Overdose

Description: This proposal focuses on developing community-based strategies to prevent opioid overdose through education and resource distribution. The project will engage local organizations to create awareness campaigns and provide access to naloxone. By incorporating feedback from affected communities, the study aims to tailor interventions to specific needs. The anticipated impact includes reduced overdose rates and increased community resilience.

Citation: Jones, C. M., & McAninch, J. (2015). Emergency Department Visits for Opioid Overdoses: A National Perspective.

NOFO Alignment: Advance innovative, high-impact research that addresses unmet scientific or societal needs; Strengthen workforce development and training opportunities in emerging scientific fields; Ensure ethical, responsible, and inclusive research practices.

File Path of the Research Paper: /path/to/community_based_opioid_prevention.pdf

---

In [15]:
# @title Choosing 1 Idea and Fetching Details

# Choose which idea block to use (0-based after filtering real blocks)
idea_number = 1   # change the number if you wish to choose and generate the research proposal for another idea

chosen_idea = ideas.content.split("---")[idea_number]

raw = ideas.content

# 1) Split on delimiter lines, trim junk
blocks = [b.strip() for b in re.split(r'\n?\s*---\s*\n?', raw) if b.strip()]

# Optional: keep only blocks that look like ideas (have "File Path" and ".pdf")
blocks = [b for b in blocks if re.search(r'File Path of the Research Paper', b, flags=re.I) and '.pdf' in b.lower()]

if not blocks:
    raise ValueError("No idea blocks with a .pdf file path were found. Check the model output or delimiter.")

if idea_number < 0 or idea_number >= len(blocks):
    raise IndexError(f"idea_number {idea_number} out of range. Found {len(blocks)} idea blocks.")

chosen_idea = blocks[idea_number]

# 2) Field extractors (tolerate optional '**' after labels and extra whitespace)
def grab(label, text, multiline=False, required=True):
    flags = re.I | (re.S if multiline else 0)
    # Match: Label : [optional **] value (until next label or end)
    # For multiline fields, capture lazily up to the next known label or end
    if multiline:
        pattern = rf"{label}\s*:\s*\**\s*(.+?)(?=\n(?:Idea|Description|Citation|NOFO Alignment|File Path of the Research Paper)\s*:|\Z)"
    else:
        pattern = rf"{label}\s*:\s*\**\s*(.+)"
    m = re.search(pattern, text, flags=flags)
    if not m:
        if required:
            raise ValueError(f"Could not extract '{label}'.")
        return None
    return m.group(1).strip()

idea_title   = grab(r"Idea", chosen_idea, multiline=False)
description  = grab(r"Description", chosen_idea, multiline=True)
citation     = grab(r"Citation", chosen_idea, multiline=False)
nofo_align   = grab(r"NOFO Alignment", chosen_idea, multiline=True)

# File path: capture up to .pdf (handles trailing text)
m_path = re.search(r"File Path of the Research Paper\s*:\s*\**\s*(.+?\.pdf)\b", chosen_idea, flags=re.I)
if not m_path:
    raise ValueError("File Path of the Research Paper not found in the chosen idea (no .pdf match).")
idea_generated_from_research_paper = m_path.group(1).strip()

print("Chosen idea index:", idea_number)
print("Idea:", idea_title)
print("Citation:", citation)
print("Filepath:", idea_generated_from_research_paper)

Chosen idea index: 1
Idea: Telehealth Interventions for Rural Health Disparities
Citation: Smith, A. C., & Thomas, E. (2020). Telehealth in Rural Areas: A Review of the Literature.
Filepath: /path/to/telehealth_rural_health_disparities.pdf


In [16]:
# @title Create Proposal Based On Chosen Idea
# This assumes you have already run the cell in your notebook that sets the idea_title variable
# For example, if you have a cell like:
# idea_title = "Telehealth Interventions for Rural Health Disparities"

try:
    # This will get the variable from the notebook's global scope
    idea_title = globals().get('idea_title')
    if idea_title is None:
        raise ValueError("The 'idea_title' variable was not found. Please ensure the cell defining it has been run.")
except NameError:
    raise NameError("The 'idea_title' variable has not been defined. Please run the cell that sets it.")

# @title Prompt for GPT
user_prompt = f"""
You are a researcher specializing in writing research proposals based on NOFO (Notice of Funding Opportunity) documents.
Your task is to write a comprehensive research proposal for the topic: "{idea_title}".

Follow this structure and include the following sections:
1.  **Title:** A brief and meaningful title.
2.  **Introduction:** Provide an introduction that discusses the problem and the proposed solution.
3.  **Problem Statement:** Clearly articulate the problem and the research questions you intend to address.
4.  **Objectives:** State the primary objectives of the research.
5.  **Methods and Logic Steps:** Detail the research methods, including what will be done and how to achieve the objectives.
6.  **Expected Outcomes:** List the expected outcomes of the research.
7.  **References:** Provide a list of relevant references.

The tone should be formal and academic. The content should be well-researched and detailed.
"""

# @title Call the OpenAI API
def generate_proposal(prompt):
    """
    Generates a research proposal using the OpenAI API.
    """
    try:
        response = openai.chat.completions.create(
            model="gpt-3.5-turbo",  # You can use a more advanced model like "gpt-4" if you have access
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"An error occurred: {e}"

# Generate the proposal and print it
generated_proposal = generate_proposal(user_prompt)
print(generated_proposal)

**Title:** Bridging the Gap: Enhancing Rural Healthcare Access through Telehealth Interventions

**Introduction:**
Access to quality healthcare services in rural areas presents a significant challenge, leading to health disparities among rural populations. To address this issue, implementing telehealth interventions holds promise in improving healthcare access, delivery, and outcomes for individuals residing in rural communities. This research proposal aims to investigate the efficacy of telehealth interventions in reducing health disparities prevalent in rural regions, ultimately contributing to enhanced healthcare equity for underserved populations.

**Problem Statement:**
Rural health disparities persist due to barriers such as geographic isolation, limited healthcare infrastructure, shortage of healthcare providers, and financial constraints. This research aims to explore the impact of telehealth interventions on these disparities by addressing the following research questions:
1. 

In [17]:
# @title Create Proposal PDF

proposal_text = globals().get('generated_proposal')

if proposal_text is None:
    raise ValueError("The 'generated_proposal' variable was not found. Please ensure the previous cell has been run.")

# Define the PDF file path on your Google Drive
pdf_filename = "/content/drive/MyDrive/FinalProject/Research_Proposal.pdf"

# Create a SimpleDocTemplate object
doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
styles = getSampleStyleSheet()

# Create a list to hold the flowable objects (e.g., Paragraphs, Spacers)
story = []

# Pre-process the text to handle bold and heading formatting
sections = re.split(r'^(#+\s.*)', proposal_text, flags=re.MULTILINE)
sections = [s.strip() for s in sections if s.strip()]

# Create a custom style for headings
heading1_style = ParagraphStyle(
    'Heading1',
    parent=styles['Heading1'],
    spaceBefore=12,
    spaceAfter=6,
    fontSize=14,
    fontName='Helvetica-Bold'
)
normal_style = styles['Normal']

for section in sections:
    # Check if the section is a heading
    if section.startswith('1.') or section.startswith('2.') or section.startswith('3.') or section.startswith('4.') or section.startswith('5.') or section.startswith('6.') or section.startswith('7.'):
        # This is a numbered heading
        # Add the heading with bold styling
        story.append(Paragraph(section, heading1_style))
    else:
        # This is regular text, split by new lines
        lines = section.split('\n')
        for line in lines:
            if line:  # Skip empty lines
                # Handle bolding of text
                bolded_text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
                story.append(Paragraph(bolded_text, normal_style))
                story.append(Spacer(1, 12))  # Add space between paragraphs

# Build the PDF document
doc.build(story)

print(f"Successfully created a PDF file named '{pdf_filename}' in your Google Drive.")

Successfully created a PDF file named '/content/drive/MyDrive/FinalProject/Research_Proposal.pdf' in your Google Drive.


In [18]:
# @title LLM Evaluator System Prompt

eval_system_prompt = """
You are an expert AI Quality Assurance Analyst. Your task is to critically evaluate an AI-generated research proposal based on a Notice of Funding Opportunity (NOFO) document. You must assess the proposal based on the four key criteria provided below.

Your evaluation must be returned STRICTLY in the specified JSON format. Do not include any introductory text, markdown formatting, or any content outside of the JSON object.

**Evaluation Criteria & Scoring:**

1.  **Innovation (Score 1-5):** How innovative is the proposed research, and does it align with the NOFO's goals?
    * **1 (Poor):** The proposal lacks novelty and does not address the NOFO's stated needs.
    * **3 (Average):** The proposal presents a standard approach with some alignment to the NOFO's goals but lacks significant innovation.
    * **5 (Excellent):** The proposal outlines a highly innovative, unique approach that perfectly aligns with the NOFO's objectives.

2.  **Significance (Score 1-5):** How significant is the proposed research's potential impact on the field?
    * **1 (Poor):** The proposed work is trivial with minimal impact.
    * **3 (Average):** The work is relevant but its potential impact is limited.
    * **5 (Excellent):** The research has the potential to make a transformative and highly significant contribution to the field.

3.  **Approach (Score 1-5):** How well-defined and feasible is the proposed methodology and research plan?
    * **1 (Poor):** The methodology is vague, poorly structured, and likely unfeasible.
    * **3 (Average):** The approach is logical but lacks specific details or contains minor flaws.
    * **5 (Excellent):** The research plan is highly detailed, well-justified, and demonstrates a clear, feasible, and robust methodology.

4.  **Investigator Expertise (Score 1-5):** How well do the qualifications and experience of the investigator(s) align with the proposed research?
    * **1 (Poor):** The investigator's expertise is not relevant to the proposal.
    * **3 (Average):** The investigator has some relevant experience but may lack key expertise.
    * **5 (Excellent):** The investigator(s) possess a strong track record and expertise directly relevant to the proposed research.

5.  **Strengths:** In one sentence, describe the single biggest strength of the generated draft.

6.  **Weaknesses:** In one sentence, suggest the most important area for improvement.

7.  **Recommendations:** Provide a 2-3 line summary of your evaluation, justifying the scores given.

**Required JSON Output Format:**

{
"Name": {idea_title},
"Innovation": {"Score": 0},
"Significance": {"Score": 0},
"Approach": {"Score": 0, "Justification": ""},
"Investigator Expertise": {"Score": 0},
"Strengths": "",
"Weaknesses": "",
"Recommendations": ""
}
"""

In [19]:
# @title Evaluation Function & User Prompt

def evaluate_proposal(eval_system_prompt, eval_user_prompt, eval_model="gpt-4o-mini", temperature=0.0):
    try:

        client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

        modified_prompt: list[ChatCompletionMessageParam] = [
            {'role': 'system', 'content': eval_system_prompt},
            {'role': 'user', 'content': eval_user_prompt}
            ]

        eval_response = client.chat.completions.create(
            model=eval_model,
            messages=modified_prompt,
            temperature=temperature
        )

        return eval_response.choices[0].message.content

    except Exception as e:
        error_message = f"Error evaluating prompt: {e}"
        print(error_message)
        return "{}"  # Return empty JSON structure on error


# The actual user prompt to be sent to the LLM
eval_user_prompt = f"""
Please evaluate the "Research Proposal" based on the "Notice of Funding Opportunity" using the specified JSON format.

**Notice of Funding Opportunity (NOFO):**
{"/content/drive/MyDrive/FinalProject/NOFO.pdf"}

---

**Research Proposal to Evaluate:**
{"/content/drive/MyDrive/FinalProject/Research_Proposal.pdf"}
"""

In [20]:
# @title Evaluation Construction

import json
# Replace with the appropriate libraries for your environment
# For example, using a placeholder for the content loading
#
# Placeholder function to simulate loading content from files
def load_document_content(file_path):
    print(f"Loading content from {file_path}")
    # In a real scenario, you would read the file here.
    # For this example, we'll return a dummy string.
    if "NOFO" in file_path:
        return "This is a placeholder for the NOFO content."
    elif "Research_Proposal" in file_path:
        return "This is a placeholder for the Research Proposal content."
    else:
        return "File not found."

# Load the content from the documents
# In a real-world script, you would read these from the file system
NOFO_content = load_document_content('/content/drive/MyDrive/FinalProject/NOFO.pdf')
proposal_content = load_document_content('/content/drive/MyDrive/FinalProject/Research_Proposal.pdf')

# Construct the user prompt with the actual document content
eval_user_prompt = f"""
Please evaluate the "Research Proposal" based on the "Notice of Funding Opportunity" using the specified JSON format.

**Notice of Funding Opportunity (NOFO):**
{NOFO_content}

---

**Research Proposal to Evaluate:**
{proposal_content}
"""

# Call the evaluation function
try:
    evaluation_json_str = evaluate_proposal(eval_system_prompt, eval_user_prompt)

    # Process the JSON string output
    evaluation_results = json.loads(evaluation_json_str)

    # Display the results
    print("\n--- Evaluation Results ---")
    for key, value in evaluation_results.items():
        if isinstance(value, dict) and "Score" in value:
            print(f"{key}: Score = {value['Score']}")
        else:
            print(f"{key}: {value}")

except json.JSONDecodeError as e:
    print(f"Error parsing JSON from LLM response: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Loading content from /content/drive/MyDrive/FinalProject/NOFO.pdf
Loading content from /content/drive/MyDrive/FinalProject/Research_Proposal.pdf

--- Evaluation Results ---
Name: Research Proposal
Innovation: Score = 1
Significance: Score = 1
Approach: Score = 1
Investigator Expertise: Score = 1
Strengths: The proposal identifies a relevant topic for research.
Weaknesses: The proposal lacks detail and clarity in all aspects, making it difficult to assess its feasibility and impact.
Recommendations: The proposal needs significant revisions to enhance clarity, detail, and alignment with the NOFO's objectives to improve its overall quality.
