### Research question and objective
The user will be prompted to input their research question and objective, from where we would get the necessary keywords and is also used to screen abstracts

## SEARCH PART

Search strategy - Included all relevant papers with keyword extraction, and all the papers are downloaded. Once the papers are retrieved, they go through a screening process using abstracts, rank with respect to relevance.

In [5]:
!pip install python-dotenv



In [6]:
from google.colab import files
uploaded = files.upload()

Saving Semantic_key.env to Semantic_key (1).env


In [7]:
import requests
import os, time
import concurrent.futures
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from dotenv import load_dotenv

# Function to retreive fulltext url / doi url from DOAJ API response
def get_fulltext_from_bibjson(bibjson):
    # First try to get fulltext link from DOAJ metadata
    for link in bibjson.get("link", []):
        if link.get("type") == "fulltext":
            return link.get("url")

    # If full text url not available, build fulltext landing page from DOI (redirects to publisher)
    for id_obj in bibjson.get("identifier", []):
        if id_obj.get("type") == "doi":
            return f"https://doi.org/{id_obj.get('id')}"

    return None

# Function to search DOAJ DB
def doaj_search(query, page_size=10):
    url = f"https://doaj.org/api/v2/search/articles/{query}?pageSize={page_size}"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)

    print(f"[DOAJ] Response status: {response.status_code}")
    papers = []

    if response.status_code == 200:
        data = response.json()
        for result in data.get("results", []):
            bibjson = result.get("bibjson", {})
            title = bibjson.get("title", "N/A")
            abstract = bibjson.get("abstract", "")
            full_text_url = get_fulltext_from_bibjson(bibjson)

            papers.append({
                "source": "DOAJ",
                "title": title,
                "abstract": abstract,
                "full_text_url": full_text_url
            })
    return papers

# Function to search EuropePMC DB
def europe_pmc_search(query, max_results=30):
    params = {"query": query + " + OPEN_ACCESS:Y", "format": "json", "pageSize": max_results}
    url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
    response = requests.get(url, params=params)
    print(f"[Europe PMC] Response status: {response.status_code}")
    papers = []
    if response.status_code == 200:
        data = response.json().get("resultList", {}).get("result", [])
        for item in data:
            papers.append({
                "source": "Europe PMC",
                "title": item.get('title', 'N/A'),
                "abstract": item.get('abstractText', ''),
                "link": f"https://europepmc.org/article/{item.get('source', '')}/{item.get('id', '')}",
                "full_text_url": f"https://europepmc.org/backend/ptpmcrender.fcgi?accid={item.get('pmcid', '')}&blobtype=pdf"
            })
    return papers

# Function to search semantic scholar DB
load_dotenv()
def search_semantic_scholar(query, max_results=25):
    url = "https://api.semanticscholar.org/graph/v1/paper/search"

    # Load the API key from the environment variable
    API_KEY = os.getenv("API_KEY")

    headers = {
        "Authorization": f"Bearer {API_KEY}"
    }

    params = {
        "query": query,
        "fields": "title,abstract,url,openAccessPdf",  # Fields you want to retrieve
        "limit": max_results  # Number of results to return
    }

    # Retry logic for rate-limiting (HTTP status 429)
    retries = 10
    for attempt in range(retries):
        response = requests.get(url, headers=headers, params=params)

        # If the request is successful (status 200), process the response
        if response.status_code == 200:
            print(f"[Semantic Scholar] Status: {response.status_code}")
            open_access_papers = []
            results = response.json().get("data", [])
            for paper in results:
                if paper.get("openAccessPdf") and paper["openAccessPdf"].get("url"):
                    open_access_papers.append({
                        "source": "Semantic Scholar",
                        "title": paper.get("title"),
                        "abstract": paper.get("abstract", ""),
                        "full_text_url": paper["openAccessPdf"]["url"]
                    })
            return open_access_papers

        # If rate-limited, back off and retry after a delay
        elif response.status_code == 429:
            print(f"[Semantic Scholar] Rate-limited. Retrying in {2 ** attempt} seconds...")
            time.sleep(2 ** attempt)  # Exponential backoff
        else:
            print(f"[Semantic Scholar] Error: {response.status_code}")
            break  # Exit on other errors like 4xx, 5xx

    # If we exhausted retries and still getting rate-limited
    return []


# Function to perform parallel search on EuropePMC, DOAJ & Semantic Scholar APIs
def parallel_search(query, max_results=30):
    results = {"EUROPEPMC": [], "DOAJ": [], "SEMANTIC": []}

    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_pubmed = executor.submit(europe_pmc_search, query, max_results)
        future_doaj = executor.submit(doaj_search, query, max_results)
        future_semantic = executor.submit(search_semantic_scholar, query, max_results)

        results["EUROPEPMC"] = future_pubmed.result()
        results["DOAJ"] = future_doaj.result()
        results["SEMANTIC"] = future_semantic.result()

    # Display how many papers were retrieved from each API
    print(f"Papers retrieved from each source:")
    print(f"Europe PMC: {len(results['EUROPEPMC'])} papers")
    print(f"DOAJ: {len(results['DOAJ'])} papers")
    print(f"Semantic Scholar: {len(results['SEMANTIC'])} papers")

    # Combine all results and limit the total number of papers
    #all_papers = results["EUROPEPMC"] + results["DOAJ"] + results["SEMANTIC"]

    # Trim the results to the max_results
    return results


# Function to check if full text url is a direct pdf link
def is_direct_pdf_link(url):
    try:
        response = requests.head(url, headers={"User-Agent": "Mozilla/5.0"}, allow_redirects=True, timeout=10)
        return 'application/pdf' in response.headers.get("Content-Type", "").lower()
    except:
        return False

# Function used to get pdf link from unpaywall if only doi is available
def get_pdf_from_unpaywall(doi, email="your_email@example.com"):
    api_url = f"https://api.unpaywall.org/v2/{doi}?email={email}"
    try:
        response = requests.get(api_url)
        if response.status_code == 200:
            data = response.json()
            oa_location = data.get("best_oa_location")
            if oa_location and oa_location.get("url_for_pdf"):
                return oa_location["url_for_pdf"]
    except:
        pass
    return None

# Function to find pdf link if the full text url is pointing to a webpage instead of direct link
def extract_pdf_link_from_html_page(page_url):
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        response = requests.get(page_url, headers=headers, timeout=10)
        if response.status_code != 200:
            return None
        soup = BeautifulSoup(response.text, "html.parser")
        for link in soup.find_all("a", href=True):
            classes = link.get("class", [])
            href = link["href"]

            # Match if any class contains 'pdf' or class list includes 'pdf'
            if "pdf" in href.lower() or any("pdf" in cls.lower() for cls in classes):
                full_link = urljoin(page_url, href)
                if full_link.endswith(".pdf") or "view" in full_link:  # customize as needed
                    return full_link
    except:
        pass
    return None

# Function to download the pdf to local if the url is a direct link
def download_pdf_from_url(pdf_url, save_path="paper.pdf"):
    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        response = requests.get(pdf_url, headers=headers, stream=True, timeout=10)
        content_type = response.headers.get("Content-Type", "")

        if response.status_code == 200 and 'application/pdf' in content_type:
            with open(save_path, "wb") as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            print(f"PDF downloaded: {save_path}")
            return save_path
        else:
            print(f"Skipped (Not a PDF or blocked): {pdf_url} [Content-Type: {content_type}]")
    except Exception as e:
        print(f"Error downloading PDF from {pdf_url}: {e}")
    return None

# Function to rename the downloaded file based on title of the paper
def sanitize_filename(title, version=""):
    return "".join(c if c.isalnum() or c in "._-" else "_" for c in title)[:100] + (f"_{version}" if version else "")


# Function to download the papers after checking if it is a direct link/doi/webpage
def download_all_pdfs(results):
    os.makedirs("downloads", exist_ok=True)
    #all_papers = results["EUROPEPMC"] + results["DOAJ"] + results["SEMANTIC"]
    all_papers = []
    for source in ["EUROPEPMC", "DOAJ", "SEMANTIC"]:
        all_papers.extend(results[source])

    for idx, paper in enumerate(all_papers):
        title = paper.get("title", f"paper_{idx}")
        pdf_url = paper.get("full_text_url")

        if pdf_url:
            filename = sanitize_filename(title) + ".pdf"
            save_path = os.path.join("downloads", filename)
            print(f"\nDownloading Paper {idx + 1}: {title}")
            resolve_pdf_url_and_download(pdf_url, save_path)


# Function to resolve the pdf url correctly based on api response and download the paper accordingly
def resolve_pdf_url_and_download(full_text_url, save_path):

    # Direct PDF check
    if is_direct_pdf_link(full_text_url):
        return download_pdf_from_url(full_text_url, save_path)

    # Check if link is DOI and use Unpaywall to download
    parsed = urlparse(full_text_url)
    if "doi.org" in parsed.netloc:
        doi = parsed.path.strip("/")
        print(f"Using Unpaywall for DOI: {doi}")
        pdf_url = get_pdf_from_unpaywall(doi)
        if pdf_url and is_direct_pdf_link(pdf_url):
            return download_pdf_from_url(pdf_url, save_path)

    # Scrape the webpage to find PDF url and download
    print(f"Scraping HTML for PDF: {full_text_url}")
    pdf_url = extract_pdf_link_from_html_page(full_text_url)
    if pdf_url and is_direct_pdf_link(pdf_url):
        return download_pdf_from_url(pdf_url, save_path)

    print(f"No downloadable PDF found")
    return

print("\n Welcome to the PRISMA-ScR Automated Research Tool!")
query = input("Enter keywords to search for research papers: ")

print("Searching PubMed, DOAJ, SEMANTIC SCHOLAR databases... Please wait...\n")
search_results = parallel_search(query, max_results=30)

download_all_pdfs(search_results)
print("Search complete!")


 Welcome to the PRISMA-ScR Automated Research Tool!
Enter keywords to search for research papers: climate change adaptation
Searching PubMed, DOAJ, SEMANTIC SCHOLAR databases... Please wait...

[DOAJ] Response status: 200
[Europe PMC] Response status: 200
[Semantic Scholar] Status: 200
Papers retrieved from each source:
Europe PMC: 30 papers
DOAJ: 30 papers
Semantic Scholar: 22 papers

Downloading Paper 1: Outpacing climate change: adaptation to heatwaves in Europe.
PDF downloaded: downloads/Outpacing_climate_change__adaptation_to_heatwaves_in_Europe..pdf

Downloading Paper 2: What is limiting how we imagine climate change adaptation?
Scraping HTML for PDF: https://europepmc.org/backend/ptpmcrender.fcgi?accid=PMC11625676&blobtype=pdf
No downloadable PDF found

Downloading Paper 3: OLDER ADULTS AND CLIMATE CHANGE ADAPTATION STRATEGIES: A SCOPING REVIEW
PDF downloaded: downloads/OLDER_ADULTS_AND_CLIMATE_CHANGE_ADAPTATION_STRATEGIES__A_SCOPING_REVIEW.pdf

Downloading Paper 4: Flavonoids 

Research question -mapping where and how mobile apps have been used as part of natural disaster mental health response strategies.



###Ensemble Model

## Ensemble model
Check the model apis, use the genfuser and pairrank paper to get ideas and come up with prompts

In [None]:
!pip install bert-score mistralai

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting mistralai
  Downloading mistralai-1.7.0-py3-none-any.whl.metadata (30 kB)
Collecting eval-type-backport>=0.2.0 (from mistralai)
  Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collectin

In [None]:
# ------------FINAL CODE ENSEMBLE-------------------#
import numpy as np
import json
import requests
import torch
from google import genai
from bert_score import score as bert_score
from mistralai import Mistral

def query_gemini(prompt, api_key):
    client = genai.Client(api_key=api_key)
    try:
        response = client.models.generate_content(
          model="gemini-2.0-flash", contents=prompt
        )
        return response.text
    except Exception as e:
        print(f"Error querying Gemini API: {e}")
        return None

def query_mistral(prompt, api_token):
    try:
        model= "mistral-small-latest"
        print("Sending prompt to Mistral API...")
        client = Mistral(api_key=api_token)
        chat_response = client.chat.complete(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ]
        )

        response_content = chat_response.choices[0].message.content

        # Try to parse as JSON
        try:
            json_response = json.loads(response_content)
            return json_response
        except json.JSONDecodeError:
            print("Warning: Response is not valid JSON. Returning raw text.")
            return response_content

    except Exception as e:
        print(f"Error querying Mistral API: {e}")
        return None
def parse_ranking_response(response_text):
    try:
        # Try to extract JSON from the response
        start_idx = response_text.find('{')
        end_idx = response_text.rfind('}')

        if start_idx >= 0 and end_idx > start_idx:
            json_str = response_text[start_idx:end_idx+1]
            ranking_data = json.loads(json_str)

            if isinstance(ranking_data, dict) and "ranking" in ranking_data:
                return ranking_data["ranking"], ranking_data.get("scores", [])

        # Fallback to parsing numbered list
        lines = response_text.strip().split('\n')
        ranking = []

        for line in lines:
            if line and ':' in line:
                parts = line.split(':', 1)
                try:
                    index = int(parts[0].strip().rstrip('.')) - 1
                    ranking.append(index)
                except ValueError:
                    pass

        return ranking, [] if ranking else None, []
    except Exception as e:
        print(f"Error parsing ranking response: {e}")
        return None, []
def get_model_rankings(abstracts, titles, query, gemini_api_key, hf_api_token):


    abstract_list = ""
    for i, abs in enumerate(abstracts):
        abstract_list += f"{i+1}. {abs}\n"  # If abs is a string. Adjust if dict.

    prompt = f"""
      You are a research assistant helping with a scoping review.

      RESEARCH QUESTION: {query}

      I have retrieved the following {len(abstracts)} papers. Please rank them based on their relevance to the research question:

      {abstract_list}

      Based on their abstracts, rank these papers in order of relevance to my research question.
      For each paper, assign a relevance score from 0 to 10, where 10 is most relevant.

      Return your answer as a JSON object with the following format:
      {{
        "ranking": [list of paper numbers in order of relevance from most to least relevant],
        "scores": [corresponding relevance scores for each paper]
      }}

      Provide ONLY the JSON response with no additional text.
      """
    gemini_response = query_gemini(prompt, gemini_api_key)
    mistral_response = query_mistral(prompt, hf_api_token)
    gemini_ranking, gemini_scores = parse_ranking_response(gemini_response) if gemini_response else (None, [])
    mistral_ranking, mistral_scores = parse_ranking_response(mistral_response) if mistral_response else (None, [])

    results = {
        "gemini": {
            "ranking": gemini_ranking if gemini_ranking else [],
            "scores": gemini_scores if gemini_scores else []
        },
        "mistral": {
            "ranking": mistral_ranking if mistral_ranking else [],
            "scores": mistral_scores if mistral_scores else []
        }
    }

    return results

def get_ensemble_ranking(abstracts, titles, query, results_from_models=None):

    if not abstracts:
        return np.arange(len(abstracts)), np.zeros(len(abstracts))

    if results_from_models is None:
        gemini_api_key = "AIzaSyDApKHVHxDERiaZDit0Dfpz9XMdQdhL36c"
        hf_api_token = "Bzj8ue6SoITkGj4hUcRxb9sp56k4aiUa"
        results_from_models = get_model_rankings(abstracts, titles, query, gemini_api_key, hf_api_token)

    gemini_ranking = np.array(results_from_models["gemini"]["ranking"]) if results_from_models["gemini"]["ranking"] else np.arange(len(abstracts))
    mistral_ranking = np.array(results_from_models["mistral"]["ranking"]) if results_from_models["mistral"]["ranking"] else np.arange(len(abstracts))

    if len(gemini_ranking) != len(abstracts):
        gemini_ranking = np.arange(len(abstracts))
    if len(mistral_ranking) != len(abstracts):
        mistral_ranking = np.arange(len(abstracts))

    ensemble_scores = np.zeros(len(abstracts))
    for i in range(len(abstracts)):
        gemini_position = np.where(gemini_ranking == i)[0][0] if i in gemini_ranking else len(abstracts)
        mistral_position = np.where(mistral_ranking == i)[0][0] if i in mistral_ranking else len(abstracts)

        ensemble_scores[i] = 1 / (1 + (gemini_position + mistral_position) / 2)

    ensemble_ranked = np.argsort(ensemble_scores)[::-1]

    return ensemble_ranked, ensemble_scores

def get_model_summaries(abstracts, titles, ranking, query, gemini_api_key, hf_api_token, top_n=5):

    top_indices = ranking[:min(top_n, len(ranking))]

    papers_info = []
    for i, idx in enumerate(top_indices):
        paper_info = f"{i+1}: \"{titles[idx]}\"\n"
        paper_info += f"Abstract: {abstracts[idx]}\n"
        papers_info.append(paper_info)

    papers_text = "\n\n".join(papers_info)

    prompt = f"""
    You are a research assistant helping with a scoping review, following PRISMA Guidelines.
    RESEARCH QUESTION: {query}
    Based on the top {len(top_indices)} papers below, create a comprehensive summary that:
    1. Identifies key themes and findings across the papers
    2. Highlights methodological approaches used
    3. Notes any gaps in the literature
    4. Suggests directions for future research
    Papers:
    {papers_text}
    Provide ONLY a well-structured summary that synthesizes the information from these papers with no additional text.
    """

    gemini_summary = query_gemini(prompt, gemini_api_key)
    mistral_summary = query_mistral(prompt, hf_api_token)

    return {
        "gemini": gemini_summary,
        "mistral": mistral_summary
    }

def get_ensemble_summaries(abstracts, titles, query, results_from_models=None):

    if not abstracts:
        return "No abstracts provided for summarization."

    ensemble_ranked, _ = get_ensemble_ranking(abstracts, titles, query, results_from_models)

    gemini_api_key = "AIzaSyDApKHVHxDERiaZDit0Dfpz9XMdQdhL36c"
    hf_api_token = "Bzj8ue6SoITkGj4hUcRxb9sp56k4aiUa"

    summaries = get_model_summaries(abstracts, titles, ensemble_ranked, query, gemini_api_key, hf_api_token)

    if summaries["gemini"] and summaries["mistral"]:
        ensemble_prompt = f"""
        You are a research assistant helping with a scoping review, following PRISMA guidelines.

        I have two summaries of the same set of papers related to this research question: "{query}"

        Summary 1:
        {summaries["gemini"]}

        Summary 2:
        {summaries["mistral"]}

        Please create a synthesis of these two summaries, incorporating the strongest insights and analysis from each.
        The final summary should be comprehensive yet concise, highlighting key themes, methods, gaps, and future directions.
        Do not reference these summaries and just output the final summary.
        """

        ensemble_summary = query_gemini(ensemble_prompt, gemini_api_key)
        return ensemble_summary
    elif summaries["gemini"]:
        return summaries["gemini"]
    elif summaries["mistral"]:
        return summaries["mistral"]
    else:
        return "Unable to generate summaries from the provided models."

def get_model_summaries_for_each_paper(abstracts, titles, query, gemini_api_key, hf_api_token):

    gemini_summary = ""
    mistral_summary = ""
    for i in range(len(abstracts)):
      prompt = f"""
      Summarize this research paper: Title - {titles[i]}, abstract - {abstracts[i]}
      Provide ONLY a well-structured summary that synthesizes the information from these papers with no additional text.
      """

      gemini_summary += (query_gemini(prompt, gemini_api_key))
      mistral_summary +=  (query_mistral(prompt, hf_api_token))
      time.sleep(1)
    return {
        "gemini_ep": gemini_summary,
        "mistral_ep": mistral_summary
    }

def get_ensemble_summaries_for_each_paper(abstracts, titles, query, results_from_models=None):

    if not abstracts:
        return "No abstracts provided for summarization."
    gemini_api_key = "AIzaSyDApKHVHxDERiaZDit0Dfpz9XMdQdhL36c"
    hf_api_token = "Bzj8ue6SoITkGj4hUcRxb9sp56k4aiUa"

    summaries = get_model_summaries_for_each_paper(abstracts, titles, query, gemini_api_key, hf_api_token)

    if summaries["gemini_ep"] and summaries["mistral_ep"]:
        ensemble_prompt = f"""
        I have two summaries of the same set of papers related to this research question: "{query}"

        Summary 1:
        {summaries["gemini_ep"]}

        Summary 2:
        {summaries["mistral_ep"]}

        Please create a synthesis of these two summaries maintaining academic standard. Do not reference these summaries and just output the final summary.
        """

        ensemble_summary = query_gemini(ensemble_prompt, gemini_api_key)
        return ensemble_summary
    elif summaries["gemini_ep"]:
        return summaries["gemini_ep"]
    elif summaries["mistral_ep"]:
        return summaries["mistral_ep"]
    else:
        return "Unable to generate summaries from the provided models."

def evaluate_summaries_with_bert(summaries, reference_summary):

    scores = {}

    for model_name, summary in summaries.items():
        if summary:
            try:
                P, R, F1 = bert_score([summary], [reference_summary], lang="en", rescale_with_baseline=True)
                scores[model_name] = {
                    "precision": P.item(),
                    "recall": R.item(),
                    "f1": F1.item()
                }
            except Exception as e:
                print(f"Error computing BERT Score for {model_name}: {e}")
                scores[model_name] = None

    # If we have an ensemble summary, evaluate it too
    if "ensemble" in summaries and summaries["ensemble"]:
        try:
            P, R, F1 = bert_score([summaries["ensemble"]], [reference_summary], lang="en", rescale_with_baseline=True)
            scores["ensemble"] = {
                "precision": P.item(),
                "recall": R.item(),
                "f1": F1.item()
            }
        except Exception as e:
            print(f"Error computing BERT Score for ensemble: {e}")
            scores["ensemble"] = None

    return scores

## Abstract Screening

The abstracts of the relevant papers go through screening process below. Screening strategy is used by ranking the papers with sentence transformers and ensemble model. All the ranks obtained by each paper will have a final consolidated ranking from the models to minimise the model bias using Reciprocal Rank Fusion

### Sentence Transformers used
The following pretrained sentence transformers are used:
* BM25
* SBERT
* SPLADE

and Ensemble model of Deepseek and Gemini LLMs.

The abstracts are ranked on the relevance similarity scores based on mean consolidated embeddings with other abstracts and research question. Say, we have 'N' total papers retrieved and we use the above models(m ∈ [1,4]),\
<center>$R_{abs_i}^{model_m} = SS_{model_m}(abs_i, \frac{1}{2N}(Σ_{j!=i}abs_j)+rq/2)$</center>, where,\

* R_{abs_i}^{model_m} is the rank of paper i with respect to model m,
* SS_{model_m}(a, b) is similarity score with respect to model m between a and b, a and b are two text embedding vectors,
* abs_i is abstract embedding vector of paper i,
* rq is embedding vector of research question.

Now, we have m ranks for each paper, which can possibly include model bias because they are trained over different kinds of data.

To reduce this bias, we adopt RRF:
<center> $Rank_{abs_i} = Σ_{j=1}^m \frac{1}{k+R_{abs_i}^{model_j} }$</center>
where,


* Rank_{abs_i} is the final rank of i^{th} paper,
* k is a constant, generally used 60
* R_{abs_i}^{model_j} is the rank of i^{th} paper with respect to model j.



In [None]:
!pip install pymupdf



# EXTENSIVE ABSTRACT RETRIEVAL

In [None]:
import re
from pathlib import Path
import fitz  # PyMuPDF
import nltk
from nltk.tokenize import sent_tokenize

# Download NLTK data if needed
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

def get_abstracts_from_papers(results):
    """
    Extract abstracts from the search results.
    If abstracts are missing, attempt to extract them from PDFs.

    Args:
        results: Dictionary with search results from different sources

    Returns:
        tuple: (abstracts, titles) lists containing all valid papers
    """
    all_papers = results["EUROPEPMC"] + results["DOAJ"] + results["SEMANTIC"]
    abstracts = []
    titles = []
    papers_without_abstracts = []

    # First pass: collect available abstracts and identify papers needing extraction
    for paper in all_papers:
        title = paper.get("title", "")
        abstract = paper.get("abstract", "")

        if title and title.strip():  # We require a title
            if abstract and abstract.strip():  # Paper already has an abstract
                abstracts.append(abstract)
                titles.append(title)
            else:
                # Save papers needing abstract extraction
                papers_without_abstracts.append(paper)

    # Second pass: extract abstracts from PDFs for papers that need them
    if papers_without_abstracts:
        pdf_abstracts, pdf_titles = extract_abstracts_from_pdfs('/content/downloads', papers_without_abstracts)
        abstracts.extend(pdf_abstracts)
        titles.extend(pdf_titles)

    return abstracts, titles

def extract_abstracts_from_pdfs(folder_path, papers_without_abstracts):
    """
    Extract abstracts from PDF files for papers with missing abstracts.

    Args:
        folder_path: Path to folder containing PDF files
        papers_without_abstracts: List of paper dictionaries with missing abstracts

    Returns:
        tuple: (extracted_abstracts, corresponding_titles)
    """
    folder = Path(folder_path)
    extracted_abstracts = []
    corresponding_titles = []

    # Create a mapping from normalized titles to papers
    title_to_paper = {normalize_title(paper.get("title", "")): paper
                      for paper in papers_without_abstracts if paper.get("title")}

    # Process each PDF file in the folder
    pdf_files = list(folder.glob('*.pdf'))
    for pdf_path in pdf_files:
        try:
            # Extract title from PDF for matching
            pdf_title = extract_title_from_pdf(pdf_path)

            if pdf_title:
                normalized_pdf_title = normalize_title(pdf_title)

                # Try to match with papers that need abstracts
                matched_paper = None

                # Try exact title match first
                if normalized_pdf_title in title_to_paper:
                    matched_paper = title_to_paper[normalized_pdf_title]
                else:
                    # Try fuzzy matching
                    best_match = find_best_title_match(normalized_pdf_title, title_to_paper.keys())
                    if best_match:
                        matched_paper = title_to_paper[best_match]

                # If we found a match, extract the abstract
                if matched_paper:
                    abstract = extract_abstract_from_pdf(pdf_path)
                    if abstract:
                        extracted_abstracts.append(abstract)
                        corresponding_titles.append(matched_paper.get("title", ""))

        except Exception as e:
            print(f"Error processing {pdf_path.name}: {e}")

    return extracted_abstracts, corresponding_titles

def normalize_title(title):
    """Normalize title for comparison by lowercasing and removing extra spaces."""
    return re.sub(r'\s+', ' ', title.lower().strip())

def find_best_title_match(pdf_title, candidate_titles, threshold=0.7):
    """Find the best matching title using token-based similarity."""
    pdf_title_words = set(re.findall(r'\b\w+\b', pdf_title.lower()))

    best_match = None
    best_score = 0

    for candidate in candidate_titles:
        candidate_words = set(re.findall(r'\b\w+\b', candidate.lower()))
        if pdf_title_words and candidate_words:
            # Calculate Jaccard similarity
            intersection = len(pdf_title_words.intersection(candidate_words))
            union = len(pdf_title_words.union(candidate_words))
            score = intersection / union if union > 0 else 0

            if score > threshold and score > best_score:
                best_score = score
                best_match = candidate

    return best_match

def extract_title_from_pdf(pdf_path):
    """Extract title from a PDF file."""
    try:
        doc = fitz.open(pdf_path)

        # Try to get title from metadata
        metadata = doc.metadata
        if metadata.get("title") and len(metadata.get("title").strip()) > 5:
            doc.close()
            return metadata.get("title").strip()

        # Extract from first page
        text = doc[0].get_text()
        doc.close()

        # Title is usually the first substantial line
        lines = [line.strip() for line in text.split('\n') if len(line.strip()) > 5]
        for line in lines:
            if 10 <= len(line) <= 200 and not line.lower().startswith(('doi', 'http', 'www')):
                return line

        return None

    except Exception as e:
        print(f"Error extracting title from PDF: {e}")
        return None

def extract_abstract_from_pdf(pdf_path):
    """
    Extract abstract from a PDF file.

    Args:
        pdf_path: Path to the PDF file

    Returns:
        str or None: Extracted abstract or None if not found
    """
    try:
        doc = fitz.open(pdf_path)
        text = ""

        # Get text from first few pages where abstract is likely to be
        for page_num in range(min(3, len(doc))):
            text += doc[page_num].get_text()
        doc.close()

        return find_abstract_in_text(text)

    except Exception as e:
        print(f"Error extracting abstract from PDF: {e}")
        return None

def find_abstract_in_text(text):
    """
    Find abstract in the PDF text using multiple strategies.

    Args:
        text: Extracted text from PDF

    Returns:
        str or None: Extracted abstract or None if not found
    """
    # Method 1: Look for specifically labeled abstract section
    abstract_patterns = [
        r"(?i)abstract[\s]*[:.\n]+(.*?)(?:[\n]{2,}|\b(?:introduction|keywords|key\s+words)\b)",
        r"(?i)ABSTRACT[\s]*[:.\n]+(.*?)(?:[\n]{2,}|\b(?:introduction|keywords|key\s+words)\b)",
        r"(?i)Abstract[\s]*[:.\n]+(.*?)(?:[\n]{2,}|\b(?:introduction|keywords|key\s+words)\b)",
        r"(?i)Summary[\s]*[:.\n]+(.*?)(?:[\n]{2,}|\b(?:introduction|keywords|key\s+words)\b)",
        r"(?i)SUMMARY[\s]*[:.\n]+(.*?)(?:[\n]{2,}|\b(?:introduction|keywords|key\s+words)\b)"
    ]

    for pattern in abstract_patterns:
        match = re.search(pattern, text, re.DOTALL)
        if match:
            abstract = match.group(1).strip()
            # Clean up the abstract (remove extra whitespaces, line breaks, etc.)
            abstract = re.sub(r'\s+', ' ', abstract)
            return abstract

    # Method 2: For academic papers, abstract is often the second paragraph after the title
    paragraphs = re.split(r'\n{2,}', text)
    if len(paragraphs) > 2:
        # If second paragraph is short (likely authors/affiliations), try third paragraph
        candidate = paragraphs[1].strip()
        if len(candidate.split()) < 30 and len(paragraphs) > 3:
            candidate = paragraphs[0].strip()

        # Clean up and verify it looks like an abstract
        candidate = re.sub(r'\s+', ' ', candidate)
        words = candidate.split()
        if 30 < len(words) < 500:  # Typical abstract length
            return ' '.join(words)

    # Method 3: Look for the first paragraph that looks like an abstract (sentence-based approach)
    paragraphs = [re.sub(r'\s+', ' ', p.strip()) for p in re.split(r'\n{2,}', text) if p.strip()]
    for paragraph in paragraphs[:5]:  # Check first 5 paragraphs
        sentences = sent_tokenize(paragraph)
        # Abstract usually has multiple sentences and meaningful length
        if len(sentences) >= 2 and 50 < len(paragraph) < 2000:
            return paragraph

    return None

In [None]:
!pip install rank_bm25



In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import torch
import os
import json

# Reciprocal Rank Fusion (RRF) Function
def reciprocal_rank_fusion(ranked_lists, k=60):
    scores = {}
    for rank_list in ranked_lists:
        for rank, doc_id in enumerate(rank_list):
            scores[doc_id] = scores.get(doc_id, 0) + 1 / (k + rank + 1)
    return sorted(scores.items(), key=lambda x: x[1], reverse=True)

# BM25 Ranking
def get_bm25_ranking(abstracts, query):
    tokenized_abstracts = [doc.split() for doc in abstracts]
    bm25 = BM25Okapi(tokenized_abstracts)
    bm25_scores = bm25.get_scores(query.split())
    return np.argsort(bm25_scores)[::-1], bm25_scores

# SBERT Ranking
def get_sbert_ranking(abstracts, query):
    sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
    abstract_embeddings = sbert_model.encode(abstracts, convert_to_tensor=True)
    query_embedding = sbert_model.encode([query], convert_to_tensor=True)

    # Calculate mean abstract embedding to use in similarity calculation
    mean_abstract_embedding = torch.mean(abstract_embeddings, dim=0, keepdim=True)
    # Combine mean abstract and research question as described in the formula
    combined_embedding = 0.5 * mean_abstract_embedding + 0.5 * query_embedding

    # Calculate similarity scores for each abstract with the combined embedding
    abstract_embeddings = abstract_embeddings.cpu().numpy()
    combined_embedding = combined_embedding.cpu().numpy()
    sbert_scores = cosine_similarity(abstract_embeddings, combined_embedding).flatten()

    return np.argsort(sbert_scores)[::-1], sbert_scores

# SPLADE Ranking
def get_splade_ranking(abstracts, query):
    try:
        splade_tokenizer = AutoTokenizer.from_pretrained("naver/splade-cocondenser-ensembledistil")
        splade_model = AutoModel.from_pretrained("naver/splade-cocondenser-ensembledistil")

        def get_splade_representation(text):
            inputs = splade_tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
            with torch.no_grad():
                outputs = splade_model(**inputs).last_hidden_state.mean(dim=1)
            return outputs.squeeze().cpu().numpy()

        splade_embeddings = np.array([get_splade_representation(text) for text in abstracts])
        query_splade_embedding = get_splade_representation(query)

        # Calculate mean abstract embedding
        mean_splade_embedding = np.mean(splade_embeddings, axis=0)
        # Combine mean abstract and research question
        combined_embedding = 0.5 * mean_splade_embedding + 0.5 * query_splade_embedding

        splade_scores = cosine_similarity(splade_embeddings, combined_embedding.reshape(1, -1)).flatten()
        return np.argsort(splade_scores)[::-1], splade_scores
    except Exception as e:
        print(f"Error in SPLADE ranking: {e}")
        # Return dummy ranking if SPLADE fails
        return np.arange(len(abstracts)), np.zeros(len(abstracts))

def printRankings(ranked_results):
  for i in ranked_results:
    print(f'Rank: {i["rank"]}       Title:{i["title"]}       Relevance Score:{i["relevance_score"]}')
    print("\n")

def rank_abstracts(search_results, research_question, rfm=None):
    # Extract abstracts from search results
    abstracts, titles = get_abstracts_from_papers(search_results)

    if not abstracts:
        print("No abstracts found in the search results")
        return []

    print(f"Ranking {len(abstracts)} abstracts based on relevance to research question...")

    # Get rankings from each model
    bm25_ranked, bm25_scores = get_bm25_ranking(abstracts, research_question)
    sbert_ranked, sbert_scores = get_sbert_ranking(abstracts, research_question)
    splade_ranked, splade_scores = get_splade_ranking(abstracts, research_question)

    # Collect results from the first three models to use in ensemble
    results_from_models = {
        "bm25": {"ranking": bm25_ranked.tolist(), "scores": bm25_scores.tolist()},
        "sbert": {"ranking": sbert_ranked.tolist(), "scores": sbert_scores.tolist()},
        "splade": {"ranking": splade_ranked.tolist(), "scores": splade_scores.tolist()}
    }

    # Get ensemble model ranking
    ensemble_ranked, ensemble_scores = get_ensemble_ranking(abstracts, titles, research_question, rfm)

    # Apply RRF to Combine Rankings
    ranked_lists = [bm25_ranked, sbert_ranked, splade_ranked, ensemble_ranked]
    final_ranking = reciprocal_rank_fusion(ranked_lists)

    # Create the final ranked results
    ranked_results = []
    for idx, (doc_id, score) in enumerate(final_ranking):
        if doc_id < len(titles):  # Ensure valid index
            ranked_results.append({
                "rank": idx + 1,
                "title": titles[doc_id],
                "abstract": abstracts[doc_id],
                "relevance_score": score
            })

    # Save rankings for later use
    os.makedirs("results", exist_ok=True)
    with open("results/abstract_rankings.json", "w") as f:
        json.dump(ranked_results, f, indent=2)

    return ranked_results

Ranking the abstracts

In [None]:
# ----------------CODE TO RUN THE PIPELINE----------------------#
def run_review_pipeline(query, abstracts, titles, reference_summary=None):

    gemini_api_key = "AIzaSyDApKHVHxDERiaZDit0Dfpz9XMdQdhL36c"
    hf_api_token = "Bzj8ue6SoITkGj4hUcRxb9sp56k4aiUa"

    print("Getting model rankings...")
    model_rankings = get_model_rankings(abstracts, titles, query, gemini_api_key, hf_api_token)

    print("Creating ensemble ranking...")
    ensemble_ranked, ensemble_scores = get_ensemble_ranking(abstracts, titles, query, model_rankings)

    print("Final ranking...")
    final_ranking = rank_abstracts(search_results, query, model_rankings)

    print("Generating model summaries...")
    model_summaries = get_model_summaries(abstracts, titles, ensemble_ranked, query, gemini_api_key, hf_api_token)

    print("Creating ensemble summary...")
    ensemble_summary = get_ensemble_summaries(abstracts, titles, query, model_rankings)

    print("Generating model summaries for each paper...")
    model_summaries_ep = get_model_summaries_for_each_paper(abstracts, titles, query, gemini_api_key, hf_api_token)

    print("Creating ensemble summary for each paper..")
    ensemble_summary_ep = get_ensemble_summaries_for_each_paper(abstracts, titles, query, model_rankings)

    all_summaries = {
        "gemini": model_summaries["gemini"],
        "mistral": model_summaries["mistral"],
        "ensemble": ensemble_summary,
        "gemini_ep": model_summaries_ep["gemini_ep"],
        "mistral_ep": model_summaries_ep["mistral_ep"],
        "ensemble_ep": ensemble_summary_ep
    }

    results = {
        "rankings": {
            "gemini": model_rankings["gemini"],
            "mistral": model_rankings["mistral"],
            "ensemble": {
                "ranking": ensemble_ranked.tolist(),
                "scores": ensemble_scores.tolist()
            },
            "final" : final_ranking
        },
        "summaries": all_summaries
    }

    # Step 5: Evaluate with BERT Score if reference is provided
    if reference_summary:
        print("Evaluating summaries with BERT Score...")
        bert_scores = evaluate_summaries_with_bert(all_summaries, reference_summary)
        results["evaluation"] = bert_scores

    return results

In [None]:
ref_summary = """
Background
Disasters are becoming more frequent due to the impact of extreme weather events attributed to climate change, causing loss of lives, property, and psychological trauma. Mental health response to disasters emphasizes prevention and mitigation, and mobile health (mHealth) apps have been used for mental health promotion and treatment. However, little is known about their use in the mental health components of disaster management.

Objective
This scoping review was conducted to explore the use of mobile phone apps for mental health responses to natural disasters and to identify gaps in the literature.

Methods
We identified relevant keywords and subject headings and conducted comprehensive searches in 6 electronic databases. Studies in which participants were exposed to a man-made disaster were included if the sample also included some participants exposed to a natural hazard. Only full-text studies published in English were included. The initial titles and abstracts of the unique papers were screened by 2 independent review authors. Full texts of the selected papers that met the inclusion criteria were reviewed by the 2 independent reviewers. Data were extracted from each selected full-text paper and synthesized using a narrative approach based on the outcome measures, duration, frequency of use of the mobile phone apps, and the outcomes. This scoping review was reported according to the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews).

Results
Of the 1398 papers retrieved, 5 were included in this review. A total of 3 studies were conducted on participants exposed to psychological stress following a disaster while 2 were for disaster relief workers. The mobile phone apps for the interventions included Training for Life Skills, Sonoma Rises, Headspace, Psychological First Aid, and Substance Abuse and Mental Health Services Administration (SAMHSA) Behavioural Health Disaster Response Apps. The different studies assessed the effectiveness or efficacy of the mobile app, feasibility, acceptability, and characteristics of app use or predictors of use. Different measures were used to assess the effectiveness of the apps’ use as either the primary or secondary outcome.

Conclusions
A limited number of studies are exploring the use of mobile phone apps for mental health responses to disasters. The 5 studies included in this review showed promising results. Mobile apps have the potential to provide effective mental health support before, during, and after disasters. However, further research is needed to explore the potential of mobile phone apps in mental health responses to all hazards.
"""

In [None]:
abstracts, titles = get_abstracts_from_papers(search_results)
results = run_review_pipeline(query, abstracts, titles, ref_summary)

# Print rankings
print("\nPaper Rankings:")
print("==============")
print("\nGemini Ranking:")
# print("results!!!----------->", results)
for i, idx in enumerate(results["rankings"]["gemini"]["ranking"]):
    print(f"{i+1}. {titles[idx-1]}")

print("\nMistral Ranking:")
for i, idx in enumerate(results["rankings"]["mistral"]["ranking"]):
    print(f"{i+1}. {titles[idx-1]}")

print("\nEnsemble Ranking:")
for i, idx in enumerate(results["rankings"]["ensemble"]["ranking"]):
    print(f"{i+1}. {titles[idx]} (Score: {results['rankings']['ensemble']['scores'][i]:.4f})")

print("\nFinal Ranking:")
for i in (results["rankings"]["final"]):
  print(f"{i['rank']}.{i['title']} (Score: {i['relevance_score']:.4f})")

# Print summaries (truncated for brevity)
print("\nConsolidated Summary:")
print("=================================")
for model, summary in results["summaries"].items():
    if summary:
        print(f"\n{model.capitalize()} Summary: {summary[:]}...")

print("\nSummaries for each paper:")
print("=================================")

# Print BERT Score evaluation
if "evaluation" in results:
    print("\nBERT Score Evaluation:")
    print("=====================")
    for model, scores in results["evaluation"].items():
        if scores:
            print(f"\n{model.capitalize()}:")
            print(f"  Precision: {scores['precision']:.4f}")
            print(f"  Recall: {scores['recall']:.4f}")
            print(f"  F1: {scores['f1']:.4f}")


Error extracting abstract from PDF: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error extracting abstract from PDF: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see

Some weights of BertModel were not initialized from the model checkpoint at naver/splade-cocondenser-ensembledistil and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Generating model summaries...
Sending prompt to Mistral API...
Creating ensemble summary...
Sending prompt to Mistral API...
Generating model summaries for each paper...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Creating ensemble summary for each paper..
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Sending prompt to Mistral API...
Evaluating summaries with BERT Score...


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You sho


Paper Rankings:

Gemini Ranking:
1. Lessons Learned from Natural Disasters around Digital Health Technologies and Delivering Quality Healthcare.
2. Managing risk, governmentality and geoinformation: Vectors of vulnerability in the mapping of COVID‐19
3. Mobile Phone Network Data in the COVID-19 era: A systematic review of applications, socioeconomic factors affecting compliance to non-pharmaceutical interventions, privacy implications, and post-pandemic economic recovery strategies.
4. The critical elements of the health system that could make for resilience in the World Health Organization African Region: a scoping review.
5. A scoping review of wildfire smoke risk communications: issues, gaps, and recommendations.
6. A Topical Review on Enabling Technologies for the Internet of Medical Things: Sensors, Devices, Platforms, and Applications.
7. New York State Climate Impacts Assessment Chapter 03: Agriculture.

Mistral Ranking:
1. Lessons Learned from Natural Disasters around Digital 

In [None]:
# reference_summary = """
# Background
# Disasters are becoming more frequent due to the impact of extreme weather events attributed to climate change, causing loss of lives, property, and psychological trauma. Mental health response to disasters emphasizes prevention and mitigation, and mobile health (mHealth) apps have been used for mental health promotion and treatment. However, little is known about their use in the mental health components of disaster management.

# Objective
# This scoping review was conducted to explore the use of mobile phone apps for mental health responses to natural disasters and to identify gaps in the literature.

# Methods
# We identified relevant keywords and subject headings and conducted comprehensive searches in 6 electronic databases. Studies in which participants were exposed to a man-made disaster were included if the sample also included some participants exposed to a natural hazard. Only full-text studies published in English were included. The initial titles and abstracts of the unique papers were screened by 2 independent review authors. Full texts of the selected papers that met the inclusion criteria were reviewed by the 2 independent reviewers. Data were extracted from each selected full-text paper and synthesized using a narrative approach based on the outcome measures, duration, frequency of use of the mobile phone apps, and the outcomes. This scoping review was reported according to the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews).

# Results
# Of the 1398 papers retrieved, 5 were included in this review. A total of 3 studies were conducted on participants exposed to psychological stress following a disaster while 2 were for disaster relief workers. The mobile phone apps for the interventions included Training for Life Skills, Sonoma Rises, Headspace, Psychological First Aid, and Substance Abuse and Mental Health Services Administration (SAMHSA) Behavioural Health Disaster Response Apps. The different studies assessed the effectiveness or efficacy of the mobile app, feasibility, acceptability, and characteristics of app use or predictors of use. Different measures were used to assess the effectiveness of the apps’ use as either the primary or secondary outcome.

# Conclusions
# A limited number of studies are exploring the use of mobile phone apps for mental health responses to disasters. The 5 studies included in this review showed promising results. Mobile apps have the potential to provide effective mental health support before, during, and after disasters. However, further research is needed to explore the potential of mobile phone apps in mental health responses to all hazards.

# Keywords: mental health, disasters, mobile health, mHealth, application, applications, app, apps, smartphone, stress, psychological, traumatic, disaster, disasters, hazard, hazards, emergency, psychological trauma, mobile apps, trauma, scoping, review methods, review methodology, mobile phone
# """
# abstracts, titles = get_abstracts_from_papers(search_results)
# results = run_review_pipeline(query, abstracts, titles, reference_summary)

# # Print rankings
# print("\nPaper Rankings:")
# print("==============")
# print("\nGemini Ranking:")
# # print("results!!!----------->", results)
# for i, idx in enumerate(results["rankings"]["gemini"]["ranking"]):
#     print(f"{i+1}. {titles[idx-1]}")

# print("\nMistral Ranking:")
# for i, idx in enumerate(results["rankings"]["mistral"]["ranking"]):
#     print(f"{i+1}. {titles[idx-1]}")

# print("\nEnsemble Ranking:")
# for i, idx in enumerate(results["rankings"]["ensemble"]["ranking"]):
#     print(f"{i+1}. {titles[idx]} (Score: {results['rankings']['ensemble']['scores'][i]:.4f})")

# print("\nFinal Ranking:")
# for i in (results["rankings"]["final"]):
#   print(f"{i['rank']}.{i['title']} (Score: {i['relevance_score']:.4f})")

# # Print summaries (truncated for brevity)
# print("\nConsolidated Summary:")
# print("=================================")
# for model, summary in results["summaries"].items():
#     if summary:
#         print(f"\n{model.capitalize()} Summary: {summary[:]}...")

# # Print BERT Score evaluation
# if "evaluation" in results:
#     print("\nBERT Score Evaluation:")
#     print("=====================")
#     for model, scores in results["evaluation"].items():
#         if scores:
#             print(f"\n{model.capitalize()}:")
#             print(f"  Precision: {scores['precision']:.4f}")
#             print(f"  Recall: {scores['recall']:.4f}")
#             print(f"  F1: {scores['f1']:.4f}")
