<a href="https://colab.research.google.com/github/Nanda654/HEADS/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Longforner

In [None]:


import torch
from transformers import LongformerModel, LongformerTokenizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import numpy as np

# --- Load Longformer Model and Tokenizer ---
print("Loading Longformer model and tokenizer...")
model_name = 'allenai/longformer-base-4096'
tokenizer = LongformerTokenizer.from_pretrained(model_name)
model = LongformerModel.from_pretrained(model_name)

# --- Set Device (GPU if available, else CPU) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval() # Set model to evaluation mode
model.to(device)
print(f"Using device: {device}")
print("Longformer model loaded.")

# --- Helper Function for Sentence Embeddings ---
def get_sentence_embeddings(text, batch_size=4):
    """
    Splits text into sentences, tokenizes them, and gets Longformer embeddings.
    Handles long documents by processing sentences in batches.
    Returns:
        sentences (list): List of original sentence strings.
        sentence_embeddings (np.array): NumPy array of sentence embeddings.
    """
    doc = nlp(text) # nlp is globally defined at the start of the cell
    sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]

    if not sentences:
        print("Warning: No valid sentences found in the input text.")
        return [], np.array([])

    all_sentence_embeddings = []
    print(f"Total sentences to process: {len(sentences)}")

    for i in range(0, len(sentences), batch_size):
        batch_sentences = sentences[i:i + batch_size] # CORRECTED: using batch_size
        try:
            inputs = tokenizer(
                batch_sentences,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=tokenizer.model_max_length
            ).to(device)

            with torch.no_grad():
                outputs = model(**inputs)

            cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
            all_sentence_embeddings.extend(cls_embeddings)
            # Removed detailed batch print to reduce output clutter unless needed for debugging speed
            # print(f"  Processed batch {i // batch_size + 1}/{(len(sentences) + batch_size - 1) // batch_size}")

        except Exception as e:
            print(f"Error processing batch of sentences (index {i}-{i+len(batch_sentences)-1}): {e}")
            all_sentence_embeddings.extend([np.zeros(model.config.hidden_size)] * len(batch_sentences))
            continue

    return sentences, np.array(all_sentence_embeddings)

# --- Centroid-Based Summarization Function (Optimized to accept pre-calculated embeddings) ---
def centroid_summarization_optimized(sentences, embeddings, num_sentences=3):
    """
    Generates an extractive summary using a centroid-based approach.
    Accepts pre-calculated sentences and embeddings.
    """
    print("\n--- Starting Centroid-Based Summarization ---")
    if not sentences or embeddings.shape[0] == 0:
        print("  No sentences or embeddings provided. Cannot summarize.")
        return [], []

    if num_sentences <= 0:
        print("  Number of sentences for summary must be positive.")
        return [], []

    num_sentences_to_extract = min(num_sentences, len(sentences))

    document_centroid = np.mean(embeddings, axis=0)
    similarities = cosine_similarity(embeddings, document_centroid.reshape(1, -1)).flatten()

    summary_sentences_mmr = []
    selected_indices = set()
    ranked_initial_indices = np.argsort(similarities)[::-1]

    for _ in range(num_sentences_to_extract):
        best_sentence_idx = -1
        max_mmr_score = -1

        for i in ranked_initial_indices:
            if i not in selected_indices:
                relevance = similarities[i]

                if not selected_indices:
                    mmr_score = relevance
                else:
                    diversity_scores = cosine_similarity(embeddings[i].reshape(1, -1),
                                                         embeddings[list(selected_indices)])
                    redundancy = np.max(diversity_scores)
                    lambda_param = 0.7
                    mmr_score = lambda_param * relevance - (1 - lambda_param) * redundancy

                if mmr_score > max_mmr_score:
                    max_mmr_score = mmr_score
                    best_sentence_idx = i

        if best_sentence_idx != -1:
            summary_sentences_mmr.append((sentences[best_sentence_idx], best_sentence_idx))
            selected_indices.add(best_sentence_idx)
            ranked_initial_indices = ranked_initial_indices[ranked_initial_indices != best_sentence_idx]
        else:
            break

    summary_sentences_mmr.sort(key=lambda x: x[1])
    final_summary_sents = [s[0] for s in summary_sentences_mmr]
    final_summary_indices = [s[1] for s in summary_sentences_mmr]

    print("--- Centroid-Based Summarization Complete ---")
    return final_summary_sents, final_summary_indices

# --- K-Means Based Summarization Function (Optimized to accept pre-calculated embeddings) ---
def kmeans_summarization_optimized(sentences, embeddings, num_clusters=5, num_sentences_per_cluster=1):
    """
    Generates an extractive summary using K-Means clustering.
    Accepts pre-calculated sentences and embeddings.
    """
    print("\n--- Starting K-Means Based Summarization ---")
    if not sentences or embeddings.shape[0] == 0:
        print("  No sentences or embeddings provided. Cannot summarize.")
        return [], []

    if num_clusters <= 0 or num_sentences_per_cluster <= 0:
        print("  Number of clusters and sentences per cluster must be positive.")
        return [], []

    effective_num_clusters = min(num_clusters, len(sentences))

    if effective_num_clusters == 0:
        print("  Not enough sentences to form clusters.")
        return [], []

    kmeans = KMeans(n_clusters=effective_num_clusters, random_state=42, n_init='auto')
    kmeans.fit(embeddings)
    clusters = kmeans.labels_
    centroids = kmeans.cluster_centers_

    summary_sentences_with_idx = []
    selected_indices = set()

    for i in range(effective_num_clusters):
        cluster_sentence_indices = np.where(clusters == i)[0]

        if len(cluster_sentence_indices) == 0:
            continue

        distances = cdist(embeddings[cluster_sentence_indices], centroids[i].reshape(1, -1), 'cosine').flatten()
        sorted_cluster_indices = cluster_sentence_indices[np.argsort(distances)]

        count_selected_from_cluster = 0
        for original_idx in sorted_cluster_indices:
            if original_idx not in selected_indices:
                summary_sentences_with_idx.append((sentences[original_idx], original_idx))
                selected_indices.add(original_idx)
                count_selected_from_cluster += 1
                if count_selected_from_cluster >= num_sentences_per_cluster:
                    break

    summary_sentences_with_idx.sort(key=lambda x: x[1])
    final_summary_sents = [s[0] for s in summary_sentences_with_idx]
    final_summary_indices = [s[1] for s in summary_sentences_with_idx]

    print("--- K-Means Based Summarization Complete ---")
    return final_summary_sents, final_summary_indices

# --- Combined Extractive Summarization Function (Optimized to accept pre-calculated embeddings) ---
def combined_extractive_summary_optimized(sentences, embeddings, total_summary_sentences=7,
                                centroid_sentences_to_propose=5,
                                kmeans_clusters_to_propose=4,
                                kmeans_sentences_per_cluster_to_propose=1,
                                lambda_param_mmr=0.7):
    """
    Generates a single extractive summary by combining candidates from
    both centroid-based and K-Means approaches, then using MMR for final selection.
    Accepts pre-calculated sentences and embeddings.
    """
    print("\n--- Starting Combined Extractive Summarization ---")
    if not sentences or embeddings.shape[0] == 0:
        print("  No sentences or embeddings provided. Cannot summarize combined.")
        return []

    centroid_candidates_sents, centroid_candidates_indices = centroid_summarization_optimized(
        sentences, embeddings, num_sentences=centroid_sentences_to_propose
    )
    print(f"  Centroid proposed {len(centroid_candidates_sents)} candidates.")

    kmeans_candidates_sents, kmeans_candidates_indices = kmeans_summarization_optimized(
        sentences, embeddings, num_clusters=kmeans_clusters_to_propose, num_sentences_per_cluster=kmeans_sentences_per_cluster_to_propose
    )
    print(f"  K-Means proposed {len(kmeans_candidates_sents)} candidates.")

    # Combine candidates and their original indices, removing duplicates
    combined_candidates_map = {}
    for idx, sent in zip(centroid_candidates_indices, centroid_candidates_sents):
        combined_candidates_map[idx] = sent
    for idx, sent in zip(kmeans_candidates_indices, kmeans_candidates_sents):
        combined_candidates_map[idx] = sent

    all_candidate_indices_sorted = sorted(combined_candidates_map.keys())
    all_candidate_sentences = [combined_candidates_map[idx] for idx in all_candidate_indices_sorted]
    all_candidate_embeddings = np.array([embeddings[idx] for idx in all_candidate_indices_sorted])

    if not all_candidate_sentences or all_candidate_embeddings.shape[0] == 0:
        print("  No unique candidates found after combining. Cannot generate combined summary.")
        return []

    num_sentences_to_extract = min(total_summary_sentences, len(all_candidate_sentences))
    print(f"  Total unique candidates: {len(all_candidate_sentences)}. Extracting {num_sentences_to_extract} for combined summary.")

    document_centroid = np.mean(embeddings, axis=0)
    candidate_similarities = cosine_similarity(all_candidate_embeddings, document_centroid.reshape(1, -1)).flatten()

    final_summary_sentences = []
    selected_candidate_indices = set()

    ranked_initial_candidate_indices = np.argsort(candidate_similarities)[::-1]

    for _ in range(num_sentences_to_extract):
        best_idx_in_candidates = -1
        max_mmr_score = -1

        for i_candidate in ranked_initial_candidate_indices:
            if i_candidate not in selected_candidate_indices:
                relevance = candidate_similarities[i_candidate]

                if not selected_candidate_indices:
                    mmr_score = relevance
                else:
                    diversity_scores = cosine_similarity(all_candidate_embeddings[i_candidate].reshape(1, -1),
                                                         all_candidate_embeddings[list(selected_candidate_indices)])
                    redundancy = np.max(diversity_scores)

                    mmr_score = lambda_param_mmr * relevance - (1 - lambda_param_mmr) * redundancy

                if mmr_score > max_mmr_score:
                    max_mmr_score = mmr_score
                    best_idx_in_candidates = i_candidate

        if best_idx_in_candidates != -1:
            final_summary_sentences.append((all_candidate_sentences[best_idx_in_candidates],
                                             all_candidate_indices_sorted[best_idx_in_candidates]))
            selected_candidate_indices.add(best_idx_in_candidates)
            ranked_initial_candidate_indices = ranked_initial_candidate_indices[ranked_initial_candidate_indices != best_idx_in_candidates]
        else:
            break

    final_summary_sentences.sort(key=lambda x: x[1])
    final_summary = [s[0] for s in final_summary_sentences]

    print("--- Combined Extractive Summarization Complete ---")
    return final_summary

# --- Example Usage and Testing ---
long_document = """
Artificial intelligence (AI) has rapidly transformed various sectors, revolutionizing industries from healthcare to finance. In healthcare, AI assists in diagnosing diseases earlier and more accurately, personalizing treatment plans, and accelerating drug discovery. Machine learning algorithms, a subset of AI, analyze vast amounts of patient data to identify patterns that human doctors might miss, leading to more effective interventions. For instance, AI-powered tools can detect subtle signs of retinopathy from eye scans, potentially preventing blindness. The integration of AI into electronic health records is also streamlining administrative tasks, freeing up medical professionals to focus more on patient care. This technological leap promises to enhance diagnostic capabilities and optimize treatment protocols significantly.

The financial industry also heavily leverages AI for fraud detection, algorithmic trading, and personalized financial advice. AI systems can monitor transactions in real-time, identifying unusual patterns indicative of fraudulent activity with high precision. Furthermore, robo-advisors powered by AI provide automated, data-driven investment advice tailored to individual risk tolerance and financial goals, making financial planning more accessible to a wider demographic. The use of AI in predicting market trends and managing portfolios is becoming increasingly sophisticated, offering new avenues for investors.

Beyond these, AI is deeply embedded in everyday life through virtual assistants like Siri and Alexa, recommendation engines on streaming platforms, and autonomous vehicles. AI's role in natural language processing (NLP) has led to advancements in language translation and sentiment analysis, impacting global communication and customer service. The ethical implications of AI, however, are a growing concern among researchers and policymakers. Issues such as algorithmic bias, job displacement due to automation, and privacy breaches require careful consideration and robust regulation. Ensuring transparency, fairness, and accountability in AI development is paramount to harnessing its benefits responsibly.

Research in AI continues to advance at an astonishing pace, focusing on areas like explainable AI (XAI) to make AI decisions more understandable, and robust AI to improve performance in real-world, unpredictable environments. Novel architectures like generative adversarial networks (GANs) and reinforcement learning are pushing the boundaries of what AI can achieve, from creating realistic imagery to mastering complex games. The future of AI promises even more integration into society, with potential breakthroughs in areas like general artificial intelligence (AGI) and enhanced human-computer interaction, leading to smarter cities and more efficient resource management. However, achieving these advancements responsibly will necessitate ongoing collaboration between technologists, policymakers, and ethicists to address the complex challenges that arise. The rapid pace of development means that continuous public discourse and legislative adaptation are critical to navigate the challenges and maximize the societal benefits of AI, ensuring it serves humanity's best interests.
"""

print("Original Document Length (sentences):", sum(1 for _ in nlp(long_document).sents))

# --- OPTIMIZATION: Calculate document embeddings only ONCE ---
print("\nCalculating document embeddings (this might take a while for long texts)...")
sentences_list, embeddings_array = get_sentence_embeddings(long_document, batch_size=8)
print("Embeddings calculation complete.")


# --- Individual Centroid-Based Summarization ---
'''print("\n" + "="*80)
print("Individual Centroid-Based Summary:")
centroid_summary, _ = centroid_summarization_optimized(sentences_list, embeddings_array, num_sentences=5)
for i, sent in enumerate(centroid_summary):
    print(f"{i+1}. {sent}")


# --- Individual K-Means Based Summarization ---
print("\n" + "="*80)
print("Individual K-Means Based Summary:")
kmeans_summary, _ = kmeans_summarization_optimized(sentences_list, embeddings_array, num_clusters=4, num_sentences_per_cluster=1)
for i, sent in enumerate(kmeans_summary):
    print(f"{i+1}. {sent}")'''


# --- Combined Extractive Summarization ---
print("\n" + "="*80)
print("Combined Extractive Summary:")
combined_summary = combined_extractive_summary_optimized(
    sentences_list,
    embeddings_array,
    total_summary_sentences=6,
    centroid_sentences_to_propose=7,
    kmeans_clusters_to_propose=5,
    kmeans_sentences_per_cluster_to_propose=1
)
for i, sent in enumerate(combined_summary):
    print(f"{i+1}. {sent}")

print("\n" + "="*80)
print("\nAll summarization processes complete.")

Loading Longformer model and tokenizer...
Using device: cpu
Longformer model loaded.
Original Document Length (sentences): 20

Calculating document embeddings (this might take a while for long texts)...
Total sentences to process: 20
Embeddings calculation complete.

Combined Extractive Summary:

--- Starting Combined Extractive Summarization ---

--- Starting Centroid-Based Summarization ---
--- Centroid-Based Summarization Complete ---
  Centroid proposed 7 candidates.

--- Starting K-Means Based Summarization ---
--- K-Means Based Summarization Complete ---
  K-Means proposed 5 candidates.
  Total unique candidates: 11. Extracting 6 for combined summary.
--- Combined Extractive Summarization Complete ---
1. Artificial intelligence (AI) has rapidly transformed various sectors, revolutionizing industries from healthcare to finance.
2. The integration of AI into electronic health records is also streamlining administrative tasks, freeing up medical professionals to focus more on patien

#BART

In [5]:
# Install necessary libraries (only runs if not already installed)
#%pip install transformers torch

import torch
from transformers import BartForConditionalGeneration, BartTokenizer

# --- Load BART Model and Tokenizer ---
print("Loading BART model and tokenizer for abstractive summarization...")
bart_model_name = 'facebook/bart-large-cnn' # This is a good choice for summarization
bart_tokenizer = BartTokenizer.from_pretrained(bart_model_name)
bart_model = BartForConditionalGeneration.from_pretrained(bart_model_name)

# --- Set Device (GPU if available, else CPU) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

bart_model.eval() # Set to evaluation mode
bart_model.to(device)
print(f"BART using device: {device}")
print("BART model loaded.")

# --- Abstractive Summarization Function (using BART) ---
def bart_abstractive_summary(text_to_summarize, max_length=150, min_length=50, num_beams=4, early_stopping=True):
    """
    Generates an abstractive summary using the pre-loaded BART model.
    Assumes bart_tokenizer and bart_model are loaded globally.

    Args:
        text_to_summarize (str or list of str): The input text (or list of sentences) to summarize.
                                                  If a list, it will be joined into a single string.
        max_length (int): Maximum length of the generated summary.
        min_length (int): Minimum length of the generated summary.
        num_beams (int): Number of beams for beam search. Higher values lead to better quality but slower generation.
        early_stopping (bool): Whether to stop beam search when all beams have finished their generation.

    Returns:
        str: The generated abstractive summary.
    """
    print("\n--- Starting BART Abstractive Summarization ---")

    if isinstance(text_to_summarize, list):
        text_to_summarize = " ".join(text_to_summarize)

    if not text_to_summarize.strip():
        print("  Input text for abstractive summary is empty. Cannot summarize.")
        return ""

    inputs = bart_tokenizer( # bart_tokenizer is now accessible globally
        [text_to_summarize],
        max_length=1024, # BART's typical max input length
        return_tensors="pt",
        truncation=True,
        padding="max_length"
    ).to(device) # device is also globally accessible

    summary_ids = bart_model.generate( # bart_model is now accessible globally
        inputs["input_ids"],
        num_beams=num_beams,
        max_length=max_length,
        min_length=min_length,
        early_stopping=early_stopping
    )

    summary_text = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    print("--- BART Abstractive Summarization Complete ---")
    return summary_text

# --- Example Usage for BART only ---
#input_text_for_bart = " ".join(combined_summary)
input_text_for_bart = """FDA and USDA have responsibility for overseeing the safety of the food supply. The Federal Food, Drug, and Cosmetic Act prohibits the misbranding of food, which includes food labeling that is false or misleading. General information about the process of making cell-cultured meat is available, but specific information about the technology being used and the eventual commercial production methods as well as the final products is not yet known. However, the technology and methods to commercially produce cell- cultured meat are still in development, and producers, regulators, and consumers do not yet have clarity on what these will entail. The composition of the final product is also not yet known. The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. Cell banking. Food processing. The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. Consequently, they have not finalized aspects of the technology and eventual commercial production methods to be used or the composition of the final product. As a result, certain information is not yet available to stakeholders\u2014including cell-cultured meat firms themselves, regulators, and the public\u2014about specific aspects of the technology and commercial production methods that will be used, such as the composition of the growth medium and of the final products. This lack of information results in unanswered questions about cell- cultured meat as it relates to the eventual technology and commercial production methods to be used and the composition of the final products. Are potential safety hazards in commercial production methods for cell-cultured meat different from those for conventional meat, and how will eventual commercial production methods affect the overall safety of the product? However, because specific information about commercial production methods and final products is not yet known, it is unclear whether cell-cultured meat produced on a commercial scale will pose any hazards not present in conventional meat. What will be the composition of any eventual products? Some firms have developed prototypes of cell-cultured meat products as part of their research and development. FDA and USDA have established multiple mechanisms to collaborate on regulatory oversight of cell-cultured meat. Specifically, the agencies have collaborated through a joint public meeting, an interagency agreement, and three working groups. However, the interagency agreement and working groups, which are ongoing mechanisms, do not fully incorporate leading practices for interagency collaboration. In addition, FDA and USDA have not documented which agency will oversee cell-cultured seafood not covered by the interagency agreement. In June 2019, FDA and USDA created three working groups to carry out the terms of the interagency agreement. FDA and USDA could more fully incorporate leading practices for collaboration in their interagency agreement and working groups. We compared the agencies\u2019 interagency agreement and working groups with the seven leading practices to enhance and sustain interagency collaboration that we previously identified. Developing and updating written guidance and agreements. We found that the interagency agreement for oversight of cell-cultured meat partially incorporates all seven leading practices for collaboration. Specifically, the agreement identifies broad outcomes such as the development of labeling principles. However, the agreement does not describe how the agencies will track and monitor progress toward outcomes. Developing and updating written guidance and agreements. This is consistent with our leading collaboration practice to continually update or monitor written agreements. However, the interagency agreement does not document how the agencies will track and monitor progress toward short-term and long-term outcomes. Table 1 provides more detail about the agencies\u2019 incorporation of these leading collaboration practices in their interagency agreement. As the agencies continue to collaborate on their shared oversight of cell- cultured meat, by more fully incorporating all seven leading practices for collaboration into their interagency agreement, they will be better positioned to address potential fragmentation in their efforts to ensure the safety of the food supply as cell-cultured meat products near commercialization and entry into the marketplace. We found that the pre-market assessment, labeling, and transfer of jurisdiction working groups that FDA and USDA created to carry out the terms of the interagency agreement either partially incorporate or do not incorporate the seven leading practices for interagency collaboration. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. We have previously reported that fragmentation has caused inconsistent oversight and inefficient use of resources in the federal food safety oversight system. In this context, some industry representatives and other stakeholders have expressed concerns about potential fragmentation or overlap in oversight of cell-cultured meat, such as could occur during the harvest phase of cell-cultured meat production when FDA hands off its oversight to USDA. By more fully incorporating all seven leading practices for interagency collaboration early in the development of the three working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat, ensure consistency and efficient use of resources, and provide clarity to key stakeholders. While FDA and USDA officials told us they have decided who will oversee cell-cultured seafood, they have not formally announced or documented this decision, and some stakeholders have reported confusion or ambiguity about which agency will oversee cell-cultured seafood other than catfish. FDA and USDA officials told us that FDA will have sole oversight responsibility for cell-cultured seafood other than catfish. While FDA and USDA officials told us they had agreed that FDA would oversee cell-cultured seafood other than catfish, as of December 2019, the agencies had not formally announced or documented this agreement. Developing and updating written guidance and agreements is a leading practice for collaboration, as we have previously reported. By taking steps to document which agency will oversee cell-cultured seafood other than catfish, FDA and USDA will better ensure the public, including key stakeholders such as cell-cultured meat firms, have clarity about the agencies\u2019 oversight responsibilities in this area. Compounding this challenge is that specific information about key aspects of cell-cultured meat, such as the technology and production methods to be used as well as the composition of the products, is not yet known. However, the interagency agreement only partially incorporates the seven leading collaboration practices that can enhance and sustain agencies\u2019 collaborative efforts, and the working groups either partially incorporate or do not incorporate these leading practices, which has raised concerns about potential fragmentation or overlap in oversight. By more fully incorporating all seven leading practices for collaboration into their interagency agreement, FDA and USDA could build on their existing efforts and be better positioned to sustain and enhance their collaborative efforts. Moreover, by more fully incorporating all seven leading practices for interagency collaboration early in the development of the working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat and ensure they are utilizing resources efficiently or effectively. FDA and USDA officials told us they have decided FDA will oversee most cell-cultured seafood, but the agencies have not formally documented this decision. By taking steps to document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish, FDA and USDA could better ensure that members of the public and other key stakeholders such as cell-cultured meat firms have clarity about the agencies\u2019 oversight responsibilities in this area. We are making a total of six recommendations, three to FDA and three to USDA: The Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should more fully incorporate the seven leading practices for effective collaboration in the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. (Recommendation 1) The Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should more fully incorporate the seven leading practices for effective collaboration in the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. (Recommendation 2) As the three cell-cultured meat working groups move forward, the Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should more fully incorporate the seven leading practices for effective collaboration, such as identifying specific outcomes and a way to monitor and evaluate progress toward outcomes. (Recommendation 3) As the three cell-cultured meat working groups move forward, the Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should more fully incorporate the seven leading practices for effective collaboration, such as identifying specific outcomes and a way to monitor and evaluate progress toward outcomes. (Recommendation 4) The Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should clearly document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish. (Recommendation 5) The Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should clearly document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish. (Recommendation 6) We provided a draft of this report to the Department of Health and Human Services\u2019 (HHS) Food and Drug Administration (FDA) and the U.S. Department of Agriculture (USDA) for review and comment. We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. We recognize that cell-cultured meat is a new food product that raises many new questions and that specific information about key aspects of cell-cultured meat is not yet known. In light of this challenging context, it is all the more important that FDA and USDA more fully incorporate leading practices for collaboration into their joint efforts in order to ensure they are in the best possible position to oversee this new food product. FDA concurred with two recommendations and partially concurred with one. USDA also concurred with two recommendations and partially concurred with one. FDA and USDA partially concurred with our recommendation, directed to each agency, to more fully incorporate the seven leading practices for effective collaboration into the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. We appreciate the agencies\u2019 willingness to incorporate the leading practices for effective collaboration into their efforts. We continue to believe that FDA and USDA should more fully incorporate the seven leading practices for effective collaboration into their interagency agreement for the joint oversight of cell-cultured meat. Developing a more detailed joint framework or standard operating procedure in accordance with the existing interagency agreement that incorporates those leading practices would meet the intent of our recommendation to improve the effectiveness of the agencies\u2019 collaboration. GAO staff who made key contributions to this report are listed in appendix V. Our report (1) describes what is known about methods for commercially producing cell-cultured meat and (2) examines the extent to which the Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) are collaborating to provide regulatory oversight of cell-cultured meat. We also attended public meetings and conferences and conducted site visits to several locations. To describe what is known about the process for producing cell-cultured meat and potential commercial production methods, we also reviewed two sets of public comments submitted to FDA and USDA in association with the two 2018 public meetings pertaining to cell-cultured meat. To examine the extent to which FDA and USDA are coordinating to provide regulatory oversight of cell-cultured meat, we identified actions they took to coordinate from July 2018 through April 2020. We compared the agencies\u2019 interagency agreement and working groups with seven leading practices to enhance and sustain interagency collaboration. Developing and updating written guidance and agreements How will the collaborative mechanism be funded?"""

print("\n" + "="*80)
print("Abstractive Summary (using BART directly on the full text):")
bart_only_summary = bart_abstractive_summary(
    input_text_for_bart,
    max_length=150, # Max length of the final abstractive summary
    min_length=50,  # Min length of the final abstractive summary
    num_beams=4     # Beam search parameter for quality
)
print(bart_only_summary)

print("\n" + "="*80)
print("\nBART only summarization complete.")

Loading BART model and tokenizer for abstractive summarization...


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

BART using device: cpu
BART model loaded.

Abstractive Summary (using BART directly on the full text):

--- Starting BART Abstractive Summarization ---
--- BART Abstractive Summarization Complete ---
FDA and USDA have responsibility for overseeing the safety of the food supply. The technology to produce cell-cultured meat at a commercial scale is still in development. Information about the methods to be used for commercial production and the composition of the final product are not yet known.


BART only summarization complete.


#Full code for Longformer and BART and pipeline

In [None]:
# Block 1: Setup, Model Loading, and Function Definitions

# Install necessary libraries (only runs if not already installed)
%pip install transformers torch scikit-learn numpy scipy spacy

# Download spaCy model (only downloads if not already present)
try:
    import spacy
    # Try to load the model directly without 'download' first
    nlp = spacy.load("en_core_web_sm")
    print("spaCy 'en_core_web_sm' model already loaded.")
except OSError:
    print("spaCy model 'en_core_web_sm' not found. Downloading...")
    from spacy.cli import download
    download("en_core_web_sm")
    import spacy
    nlp = spacy.load("en_core_web_sm")
    print("spaCy 'en_core_web_sm' model downloaded and loaded.")

import torch
from transformers import LongformerModel, LongformerTokenizer, BartForConditionalGeneration, BartTokenizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import numpy as np

# --- Load Longformer Model and Tokenizer (for Extractive) ---
print("Loading Longformer model and tokenizer...")
longformer_model_name = 'allenai/longformer-base-4096'
longformer_tokenizer = LongformerTokenizer.from_pretrained(longformer_model_name)
longformer_model = LongformerModel.from_pretrained(longformer_model_name)

# --- Load BART Model and Tokenizer (for Abstractive) ---
print("Loading BART model and tokenizer for abstractive summarization...")
bart_model_name = 'facebook/bart-large-cnn' # This is a good choice for summarization
bart_tokenizer = BartTokenizer.from_pretrained(bart_model_name)
bart_model = BartForConditionalGeneration.from_pretrained(bart_model_name)


# --- Set Device (GPU if available, else CPU) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

longformer_model.eval() # Set to evaluation mode
longformer_model.to(device)
print(f"Longformer using device: {device}")

bart_model.eval() # Set to evaluation mode
bart_model.to(device)
print(f"BART using device: {device}")
print("All models loaded and moved to device.")

# --- Helper Function for Sentence Embeddings (Longformer) ---
def get_sentence_embeddings(text, batch_size=4):
    """
    Splits text into sentences, tokenizes them, and gets Longformer embeddings.
    Handles long documents by processing sentences in batches.
    Returns:
        sentences (list): List of original sentence strings.
        sentence_embeddings (np.array): NumPy array of sentence embeddings.
    """
    # nlp, longformer_tokenizer, longformer_model, and device are global here
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]

    if not sentences:
        print("Warning: No valid sentences found in the input text.")
        return [], np.array([])

    all_sentence_embeddings = []
    print(f"Total sentences to process: {len(sentences)}")

    for i in range(0, len(sentences), batch_size):
        batch_sentences = sentences[i:i + batch_size]
        try:
            inputs = longformer_tokenizer(
                batch_sentences,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=longformer_tokenizer.model_max_length
            ).to(device)

            with torch.no_grad():
                outputs = longformer_model(**inputs)

            cls_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
            all_sentence_embeddings.extend(cls_embeddings)

        except Exception as e:
            print(f"Error processing batch of sentences (index {i}-{i+len(batch_sentences)-1}): {e}")
            all_sentence_embeddings.extend([np.zeros(longformer_model.config.hidden_size)] * len(batch_sentences))
            continue

    return sentences, np.array(all_sentence_embeddings)

# --- Centroid-Based Summarization Function (Optimized to accept pre-calculated embeddings) ---
def centroid_summarization_optimized(sentences, embeddings, num_sentences=3):
    """
    Generates an extractive summary using a centroid-based approach.
    Accepts pre-calculated sentences and embeddings.
    """
    if not sentences or embeddings.shape[0] == 0:
        print("  No sentences or embeddings provided. Cannot summarize.")
        return [], []

    if num_sentences <= 0:
        print("  Number of sentences for summary must be positive.")
        return [], []

    num_sentences_to_extract = min(num_sentences, len(sentences))

    document_centroid = np.mean(embeddings, axis=0)
    similarities = cosine_similarity(embeddings, document_centroid.reshape(1, -1)).flatten()

    summary_sentences_mmr = []
    selected_indices = set() # Correctly initialized here
    ranked_initial_indices = np.argsort(similarities)[::-1]

    for _ in range(num_sentences_to_extract):
        best_sentence_idx = -1
        max_mmr_score = -1

        for i in ranked_initial_indices:
            if i not in selected_indices:
                relevance = similarities[i]

                if not selected_indices: # This check is now safe
                    mmr_score = relevance
                else:
                    diversity_scores = cosine_similarity(embeddings[i].reshape(1, -1),
                                                         embeddings[list(selected_indices)])
                    redundancy = np.max(diversity_scores)
                    lambda_param = 0.7
                    mmr_score = lambda_param * relevance - (1 - lambda_param) * redundancy

                if mmr_score > max_mmr_score:
                    max_mmr_score = mmr_score
                    best_sentence_idx = i

        if best_sentence_idx != -1:
            summary_sentences_mmr.append((sentences[best_sentence_idx], best_sentence_idx))
            selected_indices.add(best_sentence_idx)
            ranked_initial_indices = ranked_initial_indices[ranked_initial_indices != best_sentence_idx]
        else:
            break

    summary_sentences_mmr.sort(key=lambda x: x[1])
    final_summary_sents = [s[0] for s in summary_sentences_mmr]
    final_summary_indices = [s[1] for s in summary_sentences_mmr]

    return final_summary_sents, final_summary_indices

# --- K-Means Based Summarization Function (Optimized to accept pre-calculated embeddings) ---
def kmeans_summarization_optimized(sentences, embeddings, num_clusters=5, num_sentences_per_cluster=1):
    """
    Generates an extractive summary using K-Means clustering.
    Accepts pre-calculated sentences and embeddings.
    """
    if not sentences or embeddings.shape[0] == 0:
        print("  No sentences or embeddings provided. Cannot summarize.")
        return [], []

    if num_clusters <= 0 or num_sentences_per_cluster <= 0:
        print("  Number of clusters and sentences per cluster must be positive.")
        return [], []

    effective_num_clusters = min(num_clusters, len(sentences))

    if effective_num_clusters == 0:
        print("  Not enough sentences to form clusters.")
        return [], []

    kmeans = KMeans(n_clusters=effective_num_clusters, random_state=42, n_init='auto')
    kmeans.fit(embeddings)
    clusters = kmeans.labels_
    centroids = kmeans.cluster_centers_

    summary_sentences_with_idx = []
    selected_indices = set()

    for i in range(effective_num_clusters):
        cluster_sentence_indices = np.where(clusters == i)[0]

        if len(cluster_sentence_indices) == 0:
            continue

        distances = cdist(embeddings[cluster_sentence_indices], centroids[i].reshape(1, -1), 'cosine').flatten()
        sorted_cluster_indices = cluster_sentence_indices[np.argsort(distances)]

        count_selected_from_cluster = 0
        for original_idx in sorted_cluster_indices:
            if original_idx not in selected_indices:
                summary_sentences_with_idx.append((sentences[original_idx], original_idx))
                selected_indices.add(original_idx)
                count_selected_from_cluster += 1
                if count_selected_from_cluster >= num_sentences_per_cluster:
                    break

    summary_sentences_with_idx.sort(key=lambda x: x[1])
    final_summary_sents = [s[0] for s in summary_sentences_with_idx]
    final_summary_indices = [s[1] for s in summary_sentences_with_idx]

    return final_summary_sents, final_summary_indices

# --- Combined Extractive Summarization Function (Optimized) ---
def combined_extractive_summary_optimized(sentences, embeddings, total_summary_sentences=7,
                                centroid_sentences_to_propose=5,
                                kmeans_clusters_to_propose=4,
                                kmeans_sentences_per_cluster_to_propose=1,
                                lambda_param_mmr=0.7):
    """
    Generates a single extractive summary by combining candidates from
    both centroid-based and K-Means approaches, then using MMR for final selection.
    Accepts pre-calculated sentences and embeddings.
    """
    print("\n--- Starting Combined Extractive Summarization Candidate Generation ---")
    if not sentences or embeddings.shape[0] == 0:
        print("  No sentences or embeddings provided. Cannot summarize combined.")
        return []

    centroid_candidates_sents, centroid_candidates_indices = centroid_summarization_optimized(
        sentences, embeddings, num_sentences=centroid_sentences_to_propose
    )
    print(f"  Centroid proposed {len(centroid_candidates_sents)} candidates.")

    kmeans_candidates_sents, kmeans_candidates_indices = kmeans_summarization_optimized(
        sentences, embeddings, num_clusters=kmeans_clusters_to_propose, num_sentences_per_cluster=kmeans_sentences_per_cluster_to_propose
    )
    print(f"  K-Means proposed {len(kmeans_candidates_sents)} candidates.")

    # Combine candidates and their original indices, removing duplicates
    combined_candidates_map = {}
    for idx, sent in zip(centroid_candidates_indices, centroid_candidates_sents):
        combined_candidates_map[idx] = sent
    for idx, sent in zip(kmeans_candidates_indices, kmeans_candidates_sents):
        combined_candidates_map[idx] = sent

    all_candidate_indices_sorted = sorted(combined_candidates_map.keys())
    all_candidate_sentences = [combined_candidates_map[idx] for idx in all_candidate_indices_sorted]
    all_candidate_embeddings = np.array([embeddings[idx] for idx in all_candidate_indices_sorted])

    if not all_candidate_sentences or all_candidate_embeddings.shape[0] == 0:
        print("  No unique candidates found after combining. Cannot generate combined summary.")
        return []

    num_sentences_to_extract = min(total_summary_sentences, len(all_candidate_sentences))
    print(f"  Total unique candidates: {len(all_candidate_sentences)}. Extracting {num_sentences_to_extract} for combined summary.")

    document_centroid = np.mean(embeddings, axis=0)
    candidate_similarities = cosine_similarity(all_candidate_embeddings, document_centroid.reshape(1, -1)).flatten()

    final_summary_sentences = []
    selected_candidate_indices = set() # <-- FIXED: Initialized here

    ranked_initial_candidate_indices = np.argsort(candidate_similarities)[::-1]

    for _ in range(num_sentences_to_extract):
        best_idx_in_candidates = -1
        max_mmr_score = -1

        for i_candidate in ranked_initial_candidate_indices:
            if i_candidate not in selected_candidate_indices:
                relevance = candidate_similarities[i_candidate]

                if not selected_candidate_indices:
                    mmr_score = relevance
                else:
                    diversity_scores = cosine_similarity(all_candidate_embeddings[i_candidate].reshape(1, -1),
                                                         all_candidate_embeddings[list(selected_candidate_indices)])
                    redundancy = np.max(diversity_scores)

                    mmr_score = lambda_param_mmr * relevance - (1 - lambda_param_mmr) * redundancy

                if mmr_score > max_mmr_score:
                    max_mmr_score = mmr_score
                    best_idx_in_candidates = i_candidate

        if best_idx_in_candidates != -1:
            final_summary_sentences.append((all_candidate_sentences[best_idx_in_candidates],
                                             all_candidate_indices_sorted[best_idx_in_candidates]))
            selected_candidate_indices.add(best_idx_in_candidates)
            ranked_initial_candidate_indices = ranked_initial_candidate_indices[ranked_initial_candidate_indices != best_idx_in_candidates]
        else:
            break

    final_summary_sentences.sort(key=lambda x: x[1])
    final_summary = [s[0] for s in final_summary_sentences]

    print("--- Combined Extractive Summarization Selection Complete ---")
    return final_summary

# --- Abstractive Summarization Function (using BART) ---
def bart_abstractive_summary(text_to_summarize, max_length=150, min_length=50, num_beams=4, early_stopping=True):
    """
    Generates an abstractive summary using the pre-loaded BART model.
    Assumes bart_tokenizer and bart_model are loaded globally.

    Args:
        text_to_summarize (str or list of str): The input text (or list of sentences) to summarize.
                                                  If a list, it will be joined into a single string.
        max_length (int): Maximum length of the generated summary.
        min_length (int): Minimum length of the generated summary.
        num_beams (int): Number of beams for beam search. Higher values lead to better quality but slower generation.
        early_stopping (bool): Whether to stop beam search when all beams have finished their generation.

    Returns:
        str: The generated abstractive summary.
    """
    print("\n--- Starting BART Abstractive Summarization ---")

    if isinstance(text_to_summarize, list):
        text_to_summarize = " ".join(text_to_summarize)

    if not text_to_summarize.strip():
        print("  Input text for abstractive summary is empty. Cannot summarize.")
        return ""

    inputs = bart_tokenizer(
        [text_to_summarize],
        max_length=1024, # BART's typical max input length
        return_tensors="pt",
        truncation=True,
        padding="max_length"
    ).to(device)

    summary_ids = bart_model.generate(
        inputs["input_ids"],
        num_beams=num_beams,
        max_length=max_length,
        min_length=min_length,
        early_stopping=early_stopping
    )

    summary_text = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    print("--- BART Abstractive Summarization Complete ---")
    return summary_text

spaCy 'en_core_web_sm' model already loaded.
Loading Longformer model and tokenizer...
Loading BART model and tokenizer for abstractive summarization...
Longformer using device: cpu
BART using device: cpu
All models loaded and moved to device.


In [None]:
# Block 2: Your Document Text

long_document = """
Artificial intelligence (AI) has rapidly transformed various sectors, revolutionizing industries from healthcare to finance. In healthcare, AI assists in diagnosing diseases earlier and more accurately, personalizing treatment plans, and accelerating drug discovery. Machine learning algorithms, a subset of AI, analyze vast amounts of patient data to identify patterns that human doctors might miss, leading to more effective interventions. For instance, AI-powered tools can detect subtle signs of retinopathy from eye scans, potentially preventing blindness. The integration of AI into electronic health records is also streamlining administrative tasks, freeing up medical professionals to focus more on patient care. This technological leap promises to enhance diagnostic capabilities and optimize treatment protocols significantly.

The financial industry also heavily leverages AI for fraud detection, algorithmic trading, and personalized financial advice. AI systems can monitor transactions in real-time, identifying unusual patterns indicative of fraudulent activity with high precision. Furthermore, robo-advisors powered by AI provide automated, data-driven investment advice tailored to individual risk tolerance and financial goals, making financial planning more accessible to a wider demographic. The use of AI in predicting market trends and managing portfolios is becoming increasingly sophisticated, offering new avenues for investors.

Beyond these, AI is deeply embedded in everyday life through virtual assistants like Siri and Alexa, recommendation engines on streaming platforms, and autonomous vehicles. AI's role in natural language processing (NLP) has led to advancements in language translation and sentiment analysis, impacting global communication and customer service. The ethical implications of AI, however, are a growing concern among researchers and policymakers. Issues such as algorithmic bias, job displacement due to automation, and privacy breaches require careful consideration and robust regulation. Ensuring transparency, fairness, and accountability in AI development is paramount to harnessing its benefits responsibly.

Research in AI continues to advance at an astonishing pace, focusing on areas like explainable AI (XAI) to make AI decisions more understandable, and robust AI to improve performance in real-world, unpredictable environments. Novel architectures like generative adversarial networks (GANs) and reinforcement learning are pushing the boundaries of what AI can achieve, from creating realistic imagery to mastering complex games. The future of AI promises even more integration into society, with potential breakthroughs in areas like general artificial intelligence (AGI) and enhanced human-computer interaction, leading to smarter cities and more efficient resource management. However, achieving these advancements responsibly will necessitate ongoing collaboration between technologists, policymakers, and ethicists to address the complex challenges that arise. The rapid pace of development means that continuous public discourse and legislative adaptation are critical to navigate the challenges and maximize the societal benefits of AI, ensuring it serves humanity's best interests.
"""

In [None]:
# Block 3: The Summarization Pipeline

print("--- Starting Hybrid Summarization Pipeline ---")
print("Original Document Length (sentences):", sum(1 for _ in nlp(long_document).sents))


# Step 1: Generate Sentence Embeddings using Longformer
print("\n[Pipeline Step 1/3] Calculating document embeddings with Longformer...")
sentences_list, embeddings_array = get_sentence_embeddings(long_document, batch_size=8)
print("  Embeddings calculation complete.")


# Step 2: Generate Combined Extractive Summary
print("\n[Pipeline Step 2/3] Generating combined extractive summary...")
combined_extractive_summary_sentences = combined_extractive_summary_optimized(
    sentences_list,
    embeddings_array,
    total_summary_sentences=6, # Desired length for the extractive part
    centroid_sentences_to_propose=7,
    kmeans_clusters_to_propose=5,
    kmeans_sentences_per_cluster_to_propose=1
)
print(f"  Extracted {len(combined_extractive_summary_sentences)} sentences.")
print("\nExtractive Summary:")
for i, sent in enumerate(combined_extractive_summary_sentences):
    print(f"{i+1}. {sent}")


# Step 3: Generate Abstractive Summary from Extractive Output using BART
print("\n[Pipeline Step 3/3] Generating abstractive summary with BART...")
extractive_text_for_abstractive = " ".join(combined_extractive_summary_sentences)
final_abstractive_summary = bart_abstractive_summary(
    extractive_text_for_abstractive,
    max_length=150, # Max length of the final abstractive summary
    min_length=50,  # Min length of the final abstractive summary
    num_beams=4     # Beam search parameter for quality
)
print("\nAbstractive Summary:")
print(final_abstractive_summary)

print("\n--- Hybrid Summarization Pipeline Complete ---")

--- Starting Hybrid Summarization Pipeline ---
Original Document Length (sentences): 20

[Pipeline Step 1/3] Calculating document embeddings with Longformer...
Total sentences to process: 20
  Embeddings calculation complete.

[Pipeline Step 2/3] Generating combined extractive summary...

--- Starting Combined Extractive Summarization Candidate Generation ---
  Centroid proposed 7 candidates.
  K-Means proposed 5 candidates.
  Total unique candidates: 11. Extracting 6 for combined summary.
--- Combined Extractive Summarization Selection Complete ---
  Extracted 6 sentences.

Extractive Summary:
1. Artificial intelligence (AI) has rapidly transformed various sectors, revolutionizing industries from healthcare to finance.
2. The integration of AI into electronic health records is also streamlining administrative tasks, freeing up medical professionals to focus more on patient care.
3. Furthermore, robo-advisors powered by AI provide automated, data-driven investment advice tailored to in

# Rough


In [None]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
import spacy
import numpy as np

# --- Load BART Model and Tokenizer ---
print("Loading BART model and tokenizer for abstractive summarization...")
bart_model_name = 'facebook/bart-large-cnn'
bart_tokenizer = BartTokenizer.from_pretrained(bart_model_name)
bart_model = BartForConditionalGeneration.from_pretrained(bart_model_name)

# --- Set Device (GPU if available, else CPU) ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
bart_model.eval()
bart_model.to(device)
print(f"BART using device: {device}")
print("BART model loaded.")

# Load spaCy for fact extraction
try:
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model loaded.")
except:
    print("Installing spaCy model...")
    import os
    os.system("python -m spacy download en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model loaded.")

# --- Helper Functions for Constrained Beam Search ---

def extract_key_facts(extractive_output, importance_threshold=0.7):
    """Extract key facts from the extractive summarization output"""
    if isinstance(extractive_output, str):
        # If input is a string, treat it as a single sentence with high importance
        key_sentences = [extractive_output]
    elif isinstance(extractive_output, list) and all(isinstance(item, str) for item in extractive_output):
        # If input is a list of strings, use all sentences
        key_sentences = extractive_output
    else:
        # If input is a list of (sentence, score) tuples
        key_sentences = [sent for sent, score in extractive_output if score > importance_threshold]

    # Process sentences to extract atomic facts
    facts = []
    for sentence in key_sentences:
        # Simple approach: use key noun phrases and entities
        doc = nlp(sentence)
        for chunk in doc.noun_chunks:
            if len(chunk.text.split()) > 1:  # Filter out very short phrases
                facts.append(chunk.text)

        # Add named entities
        for ent in doc.ents:
            facts.append(ent.text)

    # Deduplicate facts
    return list(set(facts))

def prepare_constraints(facts, tokenizer):
    """Convert textual facts to token IDs for constraint checking"""
    constraints = []
    for fact in facts:
        # Tokenize the fact
        fact_tokens = tokenizer.encode(fact, add_special_tokens=False)

        # Only use facts that aren't too long or too short
        if 2 <= len(fact_tokens) <= 10:
            constraints.append(fact_tokens)

    return constraints

def is_subsequence(smaller, larger):
    """Check if smaller list appears as a subsequence in larger list"""
    i = j = 0
    while i < len(smaller) and j < len(larger):
        if smaller[i] == larger[j]:
            i += 1
        j += 1
    return i == len(smaller)

def check_constraints(sequence, constraints):
    """Check which constraints are satisfied by the current sequence"""
    satisfied = []

    for i, constraint in enumerate(constraints):
        # Check if constraint tokens appear in sequence in the correct order
        if is_subsequence(constraint, sequence):
            satisfied.append(i)

    return satisfied

# --- Constrained Beam Search Implementation ---

def constrained_beam_search(model, input_ids, attention_mask, constraints,
                           num_beams=4, max_length=150, min_length=50,
                           constraint_weight=2.0):
    """
    Implements constrained beam search for BART summarization.

    Args:
        model: The BART model
        input_ids: Tokenized input text
        attention_mask: Attention mask for input
        constraints: List of token sequences that should appear in the output
        num_beams: Number of beams for beam search
        max_length: Maximum length of the generated summary
        min_length: Minimum length of the generated summary
        constraint_weight: Weight given to satisfying constraints

    Returns:
        The generated summary that satisfies the most constraints
    """
    # Get encoder output once
    encoder_outputs = model.get_encoder()(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)

    # Initialize beams: (tokens, log_prob, satisfied_constraints)
    batch_size = input_ids.shape[0]
    device = input_ids.device

    # Start with the decoder start token
    decoder_start_token_id = model.config.decoder_start_token_id
    beams = [([decoder_start_token_id], 0.0, set()) for _ in range(num_beams)]

    # Track completed sequences
    done_beams = []

    # Main beam search loop
    for step in range(max_length):
        all_candidates = []

        # Check if all beams are done
        if len(done_beams) == num_beams:
            break

        # Prepare current tokens for all beams
        active_beams = [b for b in beams if b[0][-1] != model.config.eos_token_id]
        if not active_beams:
            break

        current_tokens = [beam[0] for beam in active_beams]
        max_len = max(len(tokens) for tokens in current_tokens)

        # Pad and create tensor
        padded_tokens = [tokens + [model.config.pad_token_id] * (max_len - len(tokens)) for tokens in current_tokens]
        decoder_input = torch.tensor(padded_tokens, device=device)

        # Get next token predictions
        with torch.no_grad():
            outputs = model.decoder(
                input_ids=decoder_input,
                encoder_hidden_states=encoder_outputs.last_hidden_state,
                encoder_attention_mask=attention_mask
            )

            logits = outputs[0]  # Get logits

            # Process each beam
            for beam_idx, (tokens, score, satisfied) in enumerate(active_beams):
                # Get logits for the last token
                curr_logits = logits[beam_idx, len(tokens)-1, :]

                # Apply softmax to get probabilities
                probs = torch.nn.functional.softmax(curr_logits, dim=-1)
                log_probs = torch.log(probs + 1e-10)  # Add small epsilon to avoid log(0)

                # Get top tokens
                topk_log_probs, topk_indices = torch.topk(log_probs, num_beams * 2)

                # Create new candidates
                for log_prob, token_id in zip(topk_log_probs.tolist(), topk_indices.tolist()):
                    new_tokens = tokens + [token_id]
                    new_score = score + log_prob

                    # Check which constraints are newly satisfied
                    new_satisfied = set(satisfied)
                    for i, constraint in enumerate(constraints):
                        if i not in new_satisfied and is_subsequence(constraint, new_tokens):
                            new_satisfied.add(i)

                    # Apply constraint bonus
                    constraint_bonus = len(new_satisfied) * constraint_weight
                    adjusted_score = new_score + constraint_bonus

                    # Add to candidates
                    all_candidates.append((new_tokens, adjusted_score, new_satisfied))

                    # Check if this is a completed sequence
                    if token_id == model.config.eos_token_id and len(new_tokens) >= min_length:
                        done_beams.append((new_tokens, adjusted_score, new_satisfied))

        # Select top beams for next iteration
        beams = sorted(all_candidates, key=lambda x: x[1], reverse=True)[:num_beams]

    # If we have completed sequences, return the best one
    if done_beams:
        # Sort by number of constraints satisfied, then by score
        best_beam = max(done_beams, key=lambda x: (len(x[2]), x[1]))
        return best_beam[0]

    # If no sequence completed, return the best current beam
    best_beam = max(beams, key=lambda x: (len(x[2]), x[1]))
    return best_beam[0]

# --- Abstractive Summarization with Constrained Beam Search ---

def constrained_bart_summary(text_to_summarize, constraint_sentences=None,
                            max_length=150, min_length=50, num_beams=4):
    """
    Generates an abstractive summary using BART with constrained beam search.

    Args:
        text_to_summarize (str): The input text to summarize
        constraint_sentences (list): List of sentences containing facts that must be included
        max_length (int): Maximum length of the generated summary
        min_length (int): Minimum length of the generated summary
        num_beams (int): Number of beams for beam search

    Returns:
        str: The generated abstractive summary that includes the key facts
    """
    print("Starting BART Abstractive Summarization with Constrained Beam Search ---")

    if isinstance(text_to_summarize, list):
        text_to_summarize = " ".join(text_to_summarize)

    if not text_to_summarize.strip():
        print("  Input text for abstractive summary is empty. Cannot summarize.")
        return ""

    # Process input text
    inputs = bart_tokenizer(
        [text_to_summarize],
        max_length=1024,
        return_tensors="pt",
        truncation=True,
        padding="max_length"
    ).to(device)

    # Extract and prepare constraints if provided
    constraints = []
    if constraint_sentences:
        print(f"Extracting key facts from {len(constraint_sentences)} constraint sentences...")
        facts = extract_key_facts(constraint_sentences)
        print(f"Extracted {len(facts)} key facts: {facts[:5]}...")
        constraints = prepare_constraints(facts, bart_tokenizer)
        print(f"Prepared {len(constraints)} constraints for beam search")

    # If no constraints or constraint extraction failed, fall back to standard beam search
    if not constraints:
        print("No constraints provided or extracted. Using standard beam search.")
        summary_ids = bart_model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            num_beams=num_beams,
            max_length=max_length,
            min_length=min_length,
            early_stopping=True
        )
        summary_text = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    else:
        # Use constrained beam search
        print("Using constrained beam search with extracted facts...")
        output_ids = constrained_beam_search(
            model=bart_model,
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            constraints=constraints,
            num_beams=num_beams,
            max_length=max_length,
            min_length=min_length
        )
        summary_text = bart_tokenizer.decode(output_ids, skip_special_tokens=True)

    print("--- BART Abstractive Summarization with Constrained Beam Search Complete ---")
    return summary_text

# --- Example Usage ---
# This is where you would input your text and constraint sentences

# Example:

input_text = """Artificial intelligence (AI) has rapidly become a transformative force across various industries. In healthcare, AI systems assist doctors by analyzing medical images, predicting patient risks, and streamlining administrative tasks through automated electronic health records. Hospitals are increasingly relying on AI tools to optimize patient scheduling and improve diagnostic accuracy. In finance, AI-driven algorithms power fraud detection systems, assess credit risk, and support robo-advisors that provide tailored investment advice based on individual financial goals and risk tolerance.

AI is also playing a crucial role in transportation. Self-driving cars and traffic optimization systems use vast amounts of data to reduce accidents and improve traffic flow in urban areas. Meanwhile, the education sector is leveraging AI-powered personalized learning platforms that adapt to students’ strengths and weaknesses, enhancing engagement and learning outcomes.

However, the rise of AI comes with challenges. Concerns about algorithmic bias, data privacy, and job displacement are prompting calls for stronger regulations and ethical guidelines. Privacy breaches can occur when sensitive personal data is mishandled by AI systems, while automation threatens certain repetitive or low-skilled jobs.

The future of AI looks promising, with ongoing research into general artificial intelligence (AGI) and advanced human-computer interaction. Smarter cities, more efficient energy management, and breakthroughs in medicine are all on the horizon. To ensure AI serves humanity’s best interests, governments, companies, and researchers must engage in continuous public discourse, adapt regulations, and focus on ethical deployment of this powerful technology."""

# These are the factual and relationship constraint sentences
constraint_sentences = [
    "Education uses personalized learning platforms powered by AI."
]

summary = constrained_bart_summary(
    input_text,
    constraint_sentences=constraint_sentences,
    max_length=150,
    min_length=50,
    num_beams=4
)

print("" + "="*80)
print("Constrained Abstractive Summary:")
print(summary)



Loading BART model and tokenizer for abstractive summarization...
BART using device: cpu
BART model loaded.
spaCy model loaded.
Starting BART Abstractive Summarization with Constrained Beam Search ---
Extracting key facts from 1 constraint sentences...
Extracted 3 key facts: ['Education', 'personalized learning platforms', 'AI']...
Prepared 1 constraints for beam search
Using constrained beam search with extracted facts...
--- BART Abstractive Summarization with Constrained Beam Search Complete ---
Constrained Abstractive Summary:



Your beam search still outputs only <\s> because the first predicted token is EOS.
Even with early EOS blocking, this can happen if:

Probability of EOS dominates initially, which happens with small batch beam search using custom decoding.

No token in the top-k log probs survives filtering because of the early EOS skip.

Constraint subsequences are hard to satisfy, and beam scoring leads to pruning all sequences.

Why this happens
You are starting with decoder_start_token_id = 0 (BART <s>).

On the first step, the model often outputs EOS with very high probability.

In your beam loop, if EOS is skipped and all candidates are empty, it leads to immediate termination.

Robust Fix
Instead of hand-rolling beam search, leverage Hugging Face’s generate with force_words_ids, which natively handles constraints and prevents EOS problems.

Here’s a simpler and working solution:

python
Copy
Edit


In [None]:
from transformers import BartForConditionalGeneration, BartTokenizer

bart_model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(bart_model_name)
model = BartForConditionalGeneration.from_pretrained(bart_model_name).eval()

text = """Artificial intelligence (AI) has rapidly become a transformative force across various industries.
In healthcare, AI systems assist doctors by analyzing medical images, predicting patient risks, and streamlining administrative tasks
through automated electronic health records. Hospitals are increasingly relying on AI tools to optimize patient scheduling and improve
diagnostic accuracy. In finance, AI-driven algorithms power fraud detection systems, assess credit risk, and support robo-advisors
that provide tailored investment advice based on individual financial goals and risk tolerance.

AI is also playing a crucial role in transportation. Self-driving cars and traffic optimization systems use vast amounts of data
to reduce accidents and improve traffic flow in urban areas. Meanwhile, the education sector is leveraging AI-powered personalized
learning platforms that adapt to students’ strengths and weaknesses, enhancing engagement and learning outcomes.

However, the rise of AI comes with challenges. Concerns about algorithmic bias, data privacy, and job displacement are prompting
calls for stronger regulations and ethical guidelines. Privacy breaches can occur when sensitive personal data is mishandled by
AI systems, while automation threatens certain repetitive or low-skilled jobs.

The future of AI looks promising, with ongoing research into general artificial intelligence (AGI) and advanced human-computer
interaction. Smarter cities, more efficient energy management, and breakthroughs in medicine are all on the horizon. To ensure
AI serves humanity’s best interests, governments, companies, and researchers must engage in continuous public discourse, adapt
regulations, and focus on ethical deployment of this powerful technology.
"""

# --- Constraint words ---
constraint_words = ["Education", "AI"]
force_words_ids = [tokenizer([w], add_special_tokens=False).input_ids[0] for w in constraint_words]

inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024)

summary_ids = model.generate(
    **inputs,
    max_length=150,
    min_length=50,
    num_beams=5,
    force_words_ids=force_words_ids,
    no_repeat_ngram_size=3,
    early_stopping=True
)

summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Constrained Abstractive Summary:")
print(summary)


Constrained Beam Search is scheduled to be moved to a `custom_generate` repository in v4.55.0. To prevent loss of backward compatibility, add `trust_remote_code=True` to your `generate` call.


Constrained Abstractive Summary:
Artificial intelligence (AI) has rapidly become a transformative force across various industries. Concerns about algorithmic bias, data privacy, and job displacement are prompting calls for stronger regulations and ethical guidelines. The future of AI looks promising, with ongoing research into general artificial intelligence (AGI) and advanced human-computer interaction. Smarter cities, more efficient energy management, and breakthroughs in medicine are all on the horizon. To ensure AI serves humanity’s best interests, governments, companies, and researchers must engage in continuous public discourse, adapt  protections, and focus on ethical deployment of this powerful technology. Back to Mail Online home. back to the page you came from.  "The Future of AI" is publishedEducation


#Input text with original text, extrative summary and abstractive summary

In [16]:
#original text
x1 = """In our prior work, we have found that technological innovation involves not only creating new ideas but also translating those ideas into a new product or service. Innovation, and the research driving it, is inherently risky because the likelihood that research can be translated into a product or service and the ultimate value of that product or service are unknown. The Department of Commerce\u2019s National Institute of Standards and Technology describes the path from innovation to commercialization as comprised of three overarching stages: inventing, transitioning to making, and selling. (See fig. 1 for a description of the path from innovation to commercialization.) FDA and USDA have responsibility for overseeing the safety of the food supply. In general, FDA is responsible for ensuring the safety of virtually all domestic and imported food products except those regulated by USDA. USDA is responsible for ensuring the safety of meat, poultry, processed egg products, and catfish. FDA and USDA cooperate with states, tribes, and local food safety and public health agencies to carry out their federal responsibilities. FDA and USDA carry out their responsibilities in part through inspections of facilities where food is produced. The frequency of inspections the agencies conduct varies, as follows: FDA. FDA\u2019s authority requires a risk-based approach, in which inspection rates vary depending on the level of risk associated with a food product. FDA conducts risk-based inspections of high-risk and non-high-risk food facilities. For example, the FDA Food Safety Modernization Act, signed into law in 2011, specified that FDA had to inspect all high-risk domestic facilities at least every 3 years. USDA. Depending on the type of facility, USDA conducts inspections at least once per operating shift or maintains a constant presence. Specifically, USDA conducts carcass-by-carcass inspection at all federally inspected meat and poultry slaughter facilities and verifies that these establishments follow all food safety and humane handling requirements. At facilities that process meat and poultry products, USDA conducts inspections at least once per production shift, following the agency\u2019s longstanding interpretation of its statutes requiring it to do so. Among other things, the Federal Food, Drug, and Cosmetic Act requires that food additives be approved by FDA before they can be lawfully used in foods. Substances added to food are considered unsafe unless the agency establishes that the use of the food additive, under specific conditions for use, will be safe, or unless the substance is generally recognized as safe (GRAS) under the conditions of its intended use among qualified experts. As we reported in 2010, the Federal Food, Drug, and Cosmetic Act exempts GRAS substances from the act\u2019s general requirement that companies obtain FDA approval before marketing food containing a new additive. GRAS substances include hundreds of spices and artificial flavors, emulsifiers and binders, vitamins and minerals, and preservatives that manufacturers add to enhance a food\u2019s taste, texture, nutritional content, or shelf life. The GRAS exemption allows companies, without notice to or approval from FDA, to determine whether there is enough support to claim a substance is GRAS. For a company to claim a substance is GRAS, it must conclude that there is common knowledge about the safety of the substance among experts qualified by scientific training and experience to evaluate its safety. In addition, as part of their oversight of the food supply, FDA and USDA oversee food labeling of the products under their respective jurisdictions. USDA, by statute, is charged with assuring that products under its jurisdiction, including meat, poultry, and catfish, in interstate or foreign commerce are properly marked, labeled, and packaged. USDA develops and applies the labeling requirements for these products, and food manufacturers are responsible for complying with the USDA labeling rules and adhering to the process maintained by USDA for the evaluation and approval of these product labels. Consistent with its statutes, USDA requires preapproval of all labels before manufacturers can market their products. The Federal Food, Drug, and Cosmetic Act prohibits the misbranding of food, which includes food labeling that is false or misleading. Consistent with its statutes, FDA ensures that foods within its jurisdiction are not misbranded by focusing on the labels of products already in the market. FDA establishes regulations for the enforcement of these provisions and issues guidance. Food manufacturers are responsible for compliance with misbranding provisions in the Federal Food, Drug, and Cosmetic Act and its implementing regulations. From time to time, new technologies, such as those used to make cell- cultured meat, generate challenges for FDA\u2019s and USDA\u2019s regulatory structure. Other examples of new food technologies to which federal agencies have needed to adapt include the genetic modification of plants and irradiation of foods. In the case of genetically modified plants, there are no specific regulations addressing products resulting from the manipulation of the genetic material of living seeds. However, under FDA policy, new genetically engineered crop varieties are treated like other foods (including their conventional counterparts) under the Federal Food Drug and Cosmetic Act and may not contain either unapproved food additives or contaminants that would adulterate the food. In 1995, FDA established a voluntary pre-market consultation process through which companies are encouraged to notify the agency before marketing a food produced from a genetically modified crop and voluntarily submit a summary of the developer-performed safety assessment. FDA evaluates the safety assessment for any issues that need to be addressed and works with the developer to resolve those issues. In the case of irradiated foods, companies seeking approval for a source of radiation used to treat a food may submit a food additive petition to FDA demonstrating the safety of the proposed use. FDA grants approval only after agency scientists have determined that the proposed use is safe, then the process can be employed commercially. General information about the process of making cell-cultured meat is available, but specific information about the technology being used and the eventual commercial production methods as well as the final products is not yet known. While firms may vary in how they make cell-cultured meat, the general process they use can be described in five phases. However, the technology and methods to commercially produce cell- cultured meat are still in development, and producers, regulators, and consumers do not yet have clarity on what these will entail. The composition of the final product is also not yet known. The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. (See fig. 2.) The five-phase process is generally as follows: 1. Biopsy. A biopsy is taken by collecting rice-sized tissue samples from an animal, such as livestock, chicken, or fish. During this and subsequent phases, specific laboratory sanitation procedures are followed, and antibiotics may be used in order to avoid or minimize contamination from bacteria. Growth Media According to researchers and representatives from cell-cultured meat firms, the growth media for cell-cultured meat often contains fetal bovine serum, which is obtained from blood drawn from a bovine fetus at slaughter. However, researchers and representatives from cell-culturing firms we spoke with said they are working to develop growth media that do not contain fetal bovine serum. Representatives from some of these firms also told us that the composition of the growth media, including the exact ingredients and their proportions, can vary based on the specific needs of the cells and the variety of serum used. For example, cell-cultured seafood may have different growth media and environmental requirements than cell-cultured livestock and poultry. 2. Cell banking. Biopsied cells with the most desirable traits are selected and either used immediately for cell growth or frozen to create a cell bank for later use. These desirable traits can be obtained by either selecting existing cells or using genetic engineering methods to insert, delete, or edit the DNA to target desired traits in cells. Examples of desirable traits may include cells that divide quickly, cells that divide a greater number of times, cells that result in a reduced cholesterol or fat content or other desirable nutritional traits, or cells that are more resilient to environmental factors, such as temperature, than other cells. According to agency officials and representatives from cell-cultured meat firms, this phase represents an important opportunity to ensure that the source cells used to initiate commercial production are free of pathogens or other contaminants. 3. Growth. During the cell growth phase, cells are placed in a bioreactor and begin to divide and differentiate. A bioreactor is a container that creates an environment that can sustain the growth of cells and includes the ability to control factors such as temperature, pH, and oxygen and carbon dioxide concentrations. Bioreactors can vary in size, including microwave-sized and refrigerator-sized units, but could be as large as 20 to 30 feet tall in commercial production. Bioreactors contain a growth medium, which may include ingredients such as glucose, amino acids, hormones and other growth factors, and other basic nutrients that cells need to consume in order to thrive. In addition to the medium needed for growth, the cells may need to be attached to a structure, referred to as a scaffold, to properly develop into cell-cultured meat. 4. Harvest. Once the cells have divided to form a sufficiently large amount of cell-cultured meat, producers remove\u2014or harvest\u2014it from the growth medium and bioreactor. """
x2="""If a scaffold was used to provide a structure for cells to grow on, then the cell-cultured meat would either be separated from the scaffold during harvesting or left attached to an edible scaffold. 5. Food processing. The harvested cell-cultured meat is then prepared into a product such as meatballs or chicken nuggets. In the future, products similar to intact cuts of meat such as steak or chicken breast may be produced. The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. In the continuum of moving a technology from innovation to commercialization, cell-cultured meat firms are in the middle stage of building and testing their prototypes, based on our discussions with representatives from these firms. Consequently, they have not finalized aspects of the technology and eventual commercial production methods to be used or the composition of the final product. As a result, certain information is not yet available to stakeholders\u2014including cell-cultured meat firms themselves, regulators, and the public\u2014about specific aspects of the technology and commercial production methods that will be used, such as the composition of the growth medium and of the final products. In addition to technology development, the scarcity of publicly available research on cell-cultured meat production limits information available to agency officials and the public. Each cell-cultured meat firm is developing detailed information on its own eventual commercial production methods for making cell-cultured meat. However, the firms, similar to other technology start-ups, are reluctant to disclose intellectual property and business-sensitive information due to concerns about competition. For example, one firm told us that they can reverse engineer parts of another company\u2019s commercial production method by seeing pictures of the equipment the other company is using. In addition, cell-cultured meat firms compete with other firms for funding from sources such as venture capitalists, foreign governments, and conventional meat companies. This competition for funding contributes to firms being reluctant to share information they consider important intellectual property, such as parts of their production processes. As a result, agency officials and other stakeholders told us that they must largely rely on whatever information the cell-cultured meat firms are willing to provide to understand details of the companies\u2019 prototype processes and products. This limitation can affect agencies\u2019 ability to make regulatory and other decisions. Specifically, FDA and USDA officials said they have limited information on cell-cultured meat production methods and products and need more in order to regulate this new food. One USDA official explained that the agency cannot establish labeling requirements if the agency does not know the nutritional profile of the final product. For example, if the scaffold on which the cell-cultured meat is grown is not edible, the agencies may require firms to disclose certain aspects of their commercial production methods, such as how they removed the cell- cultured meat from the scaffold. However, if the scaffold is edible, it will affect the final composition of the product, which may require different labeling than a product that was developed without edible scaffolding. This lack of information results in unanswered questions about cell- cultured meat as it relates to the eventual technology and commercial production methods to be used and the composition of the final products. Among other things, this lack of information creates challenges for industry and federal regulatory agencies as cell-cultured meat nears commercialization. The sources we reviewed and stakeholders we talked to identified a number of open questions, including the following: Tissue collection. How often will producers need to collect biopsy samples from animals, and what animals will be used? Some stakeholders have stated concerns about whether, and how, regulators will ensure that biopsies are collected from healthy animals. For example, one cell-cultured meat firm stated that tissue samples would be taken from slaughtered donor animals that met federal standards for conventional processing at the time of slaughter. However, USDA and FDA have not indicated whether they would require cell-cultured meat firms to do so. Additionally, representatives from cell-cultured meat firms stated that they did not yet know how frequently they would need to collect biopsies from animals for commercial-level production. Additionally, according to researchers, there are too many unknowns to accurately estimate how much cell- cultured meat could be produced from a single biopsy of animal tissue. Genetic engineering. Will commercial production methods involve genetic engineering? Some stakeholders expressed concern that the use of genetic engineering in cell-cultured meat production could cause the product to experience a lengthy wait for regulatory approval, similar to that for genetically engineered salmon, which took approximately 20 years. One representative from a cell-cultured meat firm noted that uncertainty about pending government regulations could negatively affect firms\u2019 ability to attract and retain investors. Representatives from some firms said understanding what regulatory requirements will look like might influence which scientific pathways they pursue as they continue to develop their commercial production methods. According to FDA officials and representatives from one cell-cultured meat firm, it is likely that some firms will use genetic engineering in their commercial cell-cultured meat production methods. However, representatives from two other cell-cultured meat firms told us they were undecided as to whether they would use genetic engineering in their commercial production methods. Antibiotics. Will antibiotics be used to make cell-cultured meat, and will residues be present in the final product? According to agency officials, the presence of antibiotics in commercial production and the potential for residues in the resulting product would represent a significant potential concern for food safety and public health. Officials stated that they would not expect antibiotics to be used past the cell- banking phase. Representatives from cell-cultured meat firms we spoke to differed on whether they planned to use antibiotics in their commercial production process, but they had not finalized their decisions. According to one firm, if antibiotics are used, the use would be limited both in quantity and duration. Growth medium. What type of growth medium will producers use, and how might variations in the media affect the final product? According to agency officials and other stakeholders, the ingredients used in the growth medium could affect the end product\u2019s composition and raise potential safety concerns. For example, FDA officials stated that residual growth factors, such as hormones, in the final product would be something they would likely evaluate in premarket consultations. However, representatives from cell-cultured meat firms stated that their firms have not finalized the medium they plan to use. In addition, the formulation of the medium firms use could be an important piece of intellectual property or confidential business information. Scaffold. What type of scaffold will producers use, if any, and will it be edible or inedible? The use of edible or food-grade scaffolds, where they are used, will affect the composition of the product and may need to be evaluated by federal agencies for safety. According to USDA officials, the composition of edible scaffolding may also create labeling and jurisdictional concerns. For example, USDA officials stated that the addition of edible scaffolding may require significant additional aspects of production to be subject to USDA jurisdiction. Additionally, researchers have commented that a chemical separation technique needed to separate some inedible scaffolds may also need to be evaluated for potential safety concerns. Point of harvest. How will FDA and USDA define the point of harvest? The point of harvest is the point at which FDA will transfer oversight responsibilities, including inspections, to USDA. Stakeholders have raised concerns that not having a clear definition of the point of harvest could lead to challenges such as overlapping inspection requirements or a gap in inspection. Representatives from several cell-cultured meat firms we spoke to in the spring of 2019 said it was ambiguous how FDA and USDA intended to define the point of harvest. These representatives also said it is unclear how often each agency plans to conduct inspections during the phases for which it is responsible. Agency officials stated that they are working to develop a detailed process for the transfer of jurisdiction, including defining the point of harvest. Scaling up production. How will firms scale up production to commercial levels? One 2018 study conducted by researchers in the United Kingdom stated that to produce one pound of cell-cultured meat, firms would need bioreactors at least 2 1\/2 times larger than what is currently available. Similarly, a senior FDA official stated that the capacity of existing production equipment is a challenge for firms seeking to produce cell-cultured meat products at a commercial scale. As a result, the firms themselves may have to develop the equipment or custom order such equipment. Representatives from one cell- cultured meat firm told us that they are interacting with equipment providers to identify commercial-scale production equipment. Production cost. How will firms sell their product at a price point that is both profitable to the firms and affordable to the consumer? Some studies and stakeholders we interviewed, including representatives from cell-cultured meat firms, said that the high production cost of cell- cultured meat is a key industry challenge. For example, in the last two years, one firm reported that it cost $600 to make a cell-cultured meat hamburger patty and reported that it cost about $1,200 to produce a single cell-cultured meatball. One of the biggest cost drivers in the production of cell-cultured meat is the growth medium, according to some studies and some cell-cultured meat firms. To address issues of cost and scale, some firms may develop their own, less expensive growth media. Safety considerations. Are potential safety hazards in commercial production methods for cell-cultured meat different from those for conventional meat, and how will eventual commercial production methods affect the overall safety of the product? According to agency officials, cell-cultured meat may present different safety challenges compared to conventional meat. For example, according to agency officials, residues and constituents in harvested cell-cultured meat would be expected to be different from those in conventional meat, depending on the details of the production process. Representatives from one cell-cultured meat firm told us that they likely will use food processing techniques similar to those used for conventional meat, abide by similar health and safety standards, and possibly share food processing facilities. However, because specific information about commercial production methods and final products is not yet known, it is unclear whether cell-cultured meat produced on a commercial scale will pose any hazards not present in conventional meat. Product composition. What will be the composition of any eventual products? Agency officials told us that without knowing the composition of a cell-cultured meat product, it is impossible to predict how food safety and labeling requirements will apply. According to representatives from some cell-cultured meat firms, initial cell-cultured meat products most likely will not be composed entirely of cell- cultured meat but, rather, a mixture of cell-cultured meat and other ingredients such as binding, flavoring ingredients, and plant-based materials used in conventional food products. Some firms have developed prototypes of cell-cultured meat products as part of their research and development. In April 2019, representatives from one firm told us that their prototype included about 90 percent plant-based ingredients and 10 percent cell-cultured meat. However, representatives from cell-cultured meat firms stated that they aim to produce products that contain more cell-cultured meat than other ingredients. For example, some cell-cultured meat firms have stated that a long-term goal is to commercially produce cell-cultured meat products that are similar to intact cuts of meat, such as steaks. As of December 2019, these firms had not provided regulators with specific information detailing the composition of their cell-cultured meat prototypes, according to FDA and USDA officials. Environmental, animal welfare, and health impacts. How will cell- cultured meat impact the environment, animal welfare, or human health, if at all? Cell-cultured meat firms and researchers have made various claims about the potential environmental, animal welfare, and health advantages of cell-cultured meat over conventionally produced meat. For example, some cell-cultured meat firms have claimed that cell-cultured meat production would use less water and emit less greenhouse gases than conventional meat production. Some cell- cultured meat firms have also claimed that cell-cultured meat will improve animal welfare because slaughter will be unnecessary. Additionally, some stakeholders stated that because there is less opportunity for contamination from animal feces\u2014a potential source of contamination for conventional meat\u2014cell-cultured meat would be less likely than conventional meat to contain foodborne pathogens. However, there are disagreements regarding the accuracy of these claims. Stakeholders told us that until commercial production methods and final products are established, these claims about impacts on the environment, animal welfare, and human health will remain unsubstantiated. Timeline to market. When will cell-cultured meat products reach consumers? As of December 2019, no cell-cultured meat products were available for retail sale in the United States. Stakeholders give varying estimates for when cell-cultured meat may be commercially available. Some estimates suggest that firms may be able to commercially produce some form of cell-cultured meat product as soon as 2020, while others estimate that such products may not be available for 2 to 4 years. Labeling. How will cell-cultured meat be labeled? Labeling was an area of concern for representatives from both conventional and cell- cultured meat firms who explained that the specific terminology, such as \u201cclean meat\u201d or \u201clab-grown meat,\u201d can sometimes reflect bias for, or against, certain products, potentially affecting consumer acceptance of these products. Additionally, stakeholders, as well as agency officials, have emphasized the importance of labeling to ensure consumers have accurate information about what they are buying. For example, in February 2018 the United States Cattlemen\u2019s Association submitted a petition to USDA requesting that the agency limit the term \u201cbeef\u201d to products \u201cborn, raised, and harvested in a traditional manner\u201d and \u201cmeat\u201d to mean the \u201ctissue or flesh of animals that have been harvested in the traditional manner.\u201d USDA received over 6,000 comments on the petition, and the agency had not responded to the petition as of December 2019. However, according to agency officials, USDA has committed to a public process, likely rulemaking, for the development of labeling requirements for cell- cultured meat and poultry. In addition, in recent years, a number of states have passed laws that could affect the labeling of cell-cultured meat when it comes to market. For example, in 2018, Missouri enacted a law to prohibit plant-based products and cell-cultured meat from being labeled as \u201cmeat.\u201d Consumer Acceptance How will consumers respond to cell-cultured meat? It remains unclear whether consumers will embrace and purchase cell-cultured meat products. Stakeholders we interviewed and studies we reviewed cited consumer acceptance as a challenge for commercializing cell-cultured meat. One study noted that consumers have both positive and negative views toward cell-cultured meat, which could impact their willingness to purchase and consume such products. FDA and USDA have established multiple mechanisms to collaborate on regulatory oversight of cell-cultured meat. Specifically, the agencies have collaborated through a joint public meeting, an interagency agreement, and three working groups. However, the interagency agreement and working groups, which are ongoing mechanisms, do not fully incorporate leading practices for interagency collaboration. In addition, FDA and USDA have not documented which agency will oversee cell-cultured seafood not covered by the interagency agreement. In 2018, FDA and USDA began taking steps to collaborate on the regulatory oversight of cell-cultured meat through several mechanisms: a joint public meeting, an interagency agreement, and three working groups. The agencies held the joint meeting in October 2018 to discuss the use of cell-culture technology to develop products derived from livestock and poultry, and topics included potential hazards, oversight considerations, and labeling. As part of this meeting, FDA and USDA held an open public comment period from September through December 2018, gathered 315 written comments, and offered interested parties the opportunity to offer comments in person. The agencies received public comments from members of the public, as well as from representatives from cell-cultured meat and conventional meat industries, food and consumer safety groups, animal welfare groups, and environmental organizations, among others. The written comments the agencies received focused on such topics as environmental considerations, labeling, potential health and safety implications, and potential regulatory and inspection processes. Stakeholders also presented multiple perspectives on these issues at the meeting. For example, stakeholders expressed different views as to whether cell-cultured meat should be regulated as a food additive, considered a GRAS substance, or whether new regulations were needed. In March 2019, FDA and USDA issued a formal interagency agreement that describes the intended roles and responsibilities of each agency in overseeing cell-cultured meat. The agreement establishes the following: Oversight. FDA will oversee the early phases of growing cell-cultured meat through the point of harvest. During harvest, FDA will work with USDA to transfer regulatory oversight to USDA. USDA will then assume oversight of cell-cultured meat through the food processing phase, including labeling, as shown in figure 3. Types of meat covered. The agreement covers cell-cultured meat derived from species overseen by USDA, such as livestock, poultry, and catfish. Future actions. The agreement also details future actions the agencies plan to take, such as developing a more detailed regulatory framework or standard operating procedures and developing joint principles for product labeling. Reviewing and updating the agreement. The agreement states that the agencies have the ability to modify it as needed and will review the agreement every 3 years to determine whether they should modify or terminate it. In June 2019, FDA and USDA created three working groups to carry out the terms of the interagency agreement."""
x3="""The working groups are comprised of FDA and USDA officials and operate independently, though some individuals are members of multiple groups. The groups are as follows: Pre-market assessment working group. Led by FDA, this group was created to clarify the process FDA will use for pre-market reviews of cell-cultured meat. Labeling working group. Led by USDA, this group will focus on developing joint principles for product labeling and claims. Transfer of jurisdiction working group. Co-led by FDA and USDA, this group will develop procedures for the transfer of inspection at harvest, among other things. According to agency officials, the working groups are still in the initial phases of development, though some have progressed further than others. For example, as of December 2019, the pre-market assessment and labeling groups had met and begun to address various areas, while the transfer of jurisdiction working group was still in discussions to outline the roles, responsibilities, and outcomes for the group and had not held a formal meeting. FDA and USDA could more fully incorporate leading practices for collaboration in their interagency agreement and working groups. We have previously reported that interagency mechanisms or strategies to coordinate programs that address crosscutting issues may reduce potentially duplicative, overlapping, and fragmented efforts. In addition, while collaborative mechanisms may differ in complexity and scope, they all benefit from certain leading practices, which raise issues to consider when implementing these mechanisms. We compared the agencies\u2019 interagency agreement and working groups with the seven leading practices to enhance and sustain interagency collaboration that we previously identified. These leading practices, and examples of the associated issues to consider, are as follows: Defining outcomes and monitoring accountability. Is there a way to track and monitor progress toward short-term and long-term outcomes? Do participating agencies have collaboration-related competencies or performance standards against which individual performance can be evaluated? Bridging organizational cultures. What are the commonalities between the participating agencies\u2019 missions and cultures, and what are some potential challenges? Have participating agencies developed ways for operating across agency boundaries? Have participating agencies agreed on common terminology and definitions? Identifying and sustaining leadership. How will leadership be sustained over the long term? If leadership is shared, have roles and responsibilities been clearly identified and agreed upon? Clarifying roles and responsibilities. Have participating agencies clarified roles and responsibilities? Have participating agencies articulated and agreed to a process for making and enforcing decisions? Including relevant participants. Have all relevant participants been included? Do participants have appropriate knowledge, skills, and abilities to contribute? Identifying and leveraging resources. How will the collaborative mechanism be funded and staffed? Developing and updating written guidance and agreements. If appropriate, have the participating agencies documented their agreement regarding how they will collaborate? (A written document can incorporate agreements reached in any or all of the following areas: leadership, accountability, roles and responsibilities, and resources.) Have participating agencies developed ways to continually update or monitor written agreements? See appendix II for a full list of the associated issues to consider for each leading practice. We found that the interagency agreement for oversight of cell-cultured meat partially incorporates all seven leading practices for collaboration. For example: Defining outcomes and monitoring accountability. The interagency agreement partially incorporates the leading practice of defining outcomes and monitoring progress toward these outcomes. Specifically, the agreement identifies broad outcomes such as the development of labeling principles. However, the agreement does not describe how the agencies will track and monitor progress toward outcomes. Identifying and sustaining leadership. The agreement partially incorporates the leading practice of clarifying leadership structures. For example, it assigns each agency as the lead, or designates shared leadership, for different phases of the cell-cultured meat production process. However, the interagency agreement does not identify how the agencies will sustain leadership over the long term, including through succession planning. We have previously reported that given the importance of leadership to any collaborative effort, transitions and inconsistent leadership can weaken the effectiveness of any collaborative mechanism. Developing and updating written guidance and agreements. The agreement partially incorporates the leading practice of documenting how the agencies will collaborate. For example, the agreement includes a method for updating the document by including a provision that requires a review of the document every 3 years. This is consistent with our leading collaboration practice to continually update or monitor written agreements. However, the interagency agreement does not document how the agencies will track and monitor progress toward short-term and long-term outcomes. Table 1 provides more detail about the agencies\u2019 incorporation of these leading collaboration practices in their interagency agreement. FDA and USDA officials told us that the interagency agreement was intended to be an initial, general outline for their collaboration. They also said that as the technology to produce cell-cultured meat develops and they implement the agreement, including developing the content of a regulatory program, they will consider incorporating leading practices for interagency collaboration. For example: Clarifying roles and responsibilities. FDA and USDA officials said in December 2019 that through the working groups the agencies would continue to explore and define the specific details of how they will manage their shared oversight responsibility. Including relevant participants. FDA officials said in December 2019 that the agency would like to engage many more stakeholders as it continues to develop its oversight of cell-cultured meat. Identifying and leveraging resources. As of December 2019, the pre-market assessment working group and the labeling working group were working to identify any human resources, physical, or financial resources they might need, according to FDA and USDA officials. The federal food safety system is on our High Risk List due to concerns about fragmentation, which we have reported has caused inconsistent oversight, ineffective coordination, and inefficient use of resources. As the agencies continue to collaborate on their shared oversight of cell- cultured meat, by more fully incorporating all seven leading practices for collaboration into their interagency agreement, they will be better positioned to address potential fragmentation in their efforts to ensure the safety of the food supply as cell-cultured meat products near commercialization and entry into the marketplace. We found that the pre-market assessment, labeling, and transfer of jurisdiction working groups that FDA and USDA created to carry out the terms of the interagency agreement either partially incorporate or do not incorporate the seven leading practices for interagency collaboration. Specifically, all three working groups have partially incorporated three of the seven leading practices for collaboration, but none of the working groups have incorporated the four remaining leading practices. For example: Defining outcomes and monitoring accountability. The working groups have all defined and agreed upon their general purposes. However, FDA and USDA have not established methods, such as milestones and metrics, to evaluate the progress of any of the working groups. For example, FDA officials said in December 2019 that their next steps are to conduct a general and qualitative risk assessment of animal cell culture food technology to systematically identify particular areas of interest from a food safety perspective and prepare detailed procedural guidelines for cell-cultured meat firms to follow. However, the officials did not have time frames or a method to evaluate progress towards completing these actions. Including relevant participants. While the working groups have included relevant FDA and USDA officials, none of the groups have included state or tribal officials in initial discussions and planning. According to the state officials we spoke with, being excluded from these federal-level discussions may hinder their ability to align their safety and labeling requirements, among other things, with federal standards. Developing and updating written guidance and agreements. None of the working groups have documented how they will collaborate. For example, the working groups have not documented leadership, accountability, roles and responsibilities, or resources needed for working groups. Table 2 provides more detail about FDA and USDA\u2019s incorporation of leading collaboration practices in the three working groups. In December 2019, FDA and USDA officials said that as they continued to stand up these working groups, they were considering leading practices for collaboration. For example: Defining outcomes and monitoring accountability. FDA and USDA officials said they were considering means to monitor, evaluate, or report on the results of the pre-market assessment working group. Including relevant participants. FDA and USDA officials said that they were working to determine what knowledge participants in the pre-market assessment working group and the labeling working group needed to perform the work of the working group. Developing and updating written guidance and agreements. FDA and USDA officials said they were considering documenting how they will collaborate in the pre-market assessment working group, including potentially creating a charter for the working group. We have previously reported that fragmentation has caused inconsistent oversight and inefficient use of resources in the federal food safety oversight system. """
x4="""The agencies\u2019 2019 agreement to share oversight of cell-cultured meat creates a new relationship between FDA and USDA, since the agencies will oversee different stages of the production of the same food and hand off oversight at a certain point in that production. These factors contribute to an already complicated system in which the two agencies must coordinate on food safety oversight. In this context, some industry representatives and other stakeholders have expressed concerns about potential fragmentation or overlap in oversight of cell-cultured meat, such as could occur during the harvest phase of cell-cultured meat production when FDA hands off its oversight to USDA. Additionally, representatives from one cell-cultured meat firm stated that avoiding overlap in federal oversight whenever possible was important to them. For example, representatives from one firm pointed to inspection, record-keeping requirements, and regulations as potential areas at risk of overlap. They stated that potential overlap would add unnecessary, burdensome requirements and create an uneven playing field with the conventional meat industry. By more fully incorporating all seven leading practices for interagency collaboration early in the development of the three working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat, ensure consistency and efficient use of resources, and provide clarity to key stakeholders. While FDA and USDA officials told us they have decided who will oversee cell-cultured seafood, they have not formally announced or documented this decision, and some stakeholders have reported confusion or ambiguity about which agency will oversee cell-cultured seafood other than catfish. Specifically, FDA and USDA\u2019s interagency agreement regarding cell-cultured meat states that it covers all cell-cultured meat derived from USDA-amenable species required to bear a USDA mark of inspection, which in the agreement includes livestock, poultry, and catfish. However, the agreement does not mention cell-cultured meat made from the cells of other fish, such as tuna and shellfish. FDA and USDA officials told us that FDA will have sole oversight responsibility for cell-cultured seafood other than catfish. According to FDA officials, they have verbally communicated this decision in various meetings with stakeholders. However, FDA and USDA officials told us that formally documenting FDA\u2019s sole oversight of most cell- cultured seafood in their interagency agreement was unnecessary because FDA currently oversees most conventional seafood. According to cell-cultured meat firms, some firms are working on developing cell- cultured versions of seafood, such as bluefin tuna. However, stakeholders from two cell-cultured meat firms, including representatives of a cell- cultured seafood firm we spoke with in April 2019, stated that they did not know who in the federal government would oversee cell-cultured seafood. Representatives from one cell-cultured seafood firm said that not being able to rule out oversight by USDA prevented them from making key decisions regarding what direction to pursue in developing their commercial production method. While FDA and USDA officials told us they had agreed that FDA would oversee cell-cultured seafood other than catfish, as of December 2019, the agencies had not formally announced or documented this agreement. Developing and updating written guidance and agreements is a leading practice for collaboration, as we have previously reported. In addition, standards for internal control in the federal government state that agency management should externally communicate the necessary quality information to achieve its objectives and should select appropriate methods of communication, such as a written document or a face-to-face meeting. Management should also periodically evaluate the entity\u2019s methods of communication so that the organization has the appropriate tools to communicate quality information throughout and outside of the entity on a timely basis. While FDA and USDA officials have informally communicated to some stakeholders that FDA will have sole oversight of most cell-cultured seafood, FDA has not communicated this information formally or in a method readily available to all relevant stakeholders, such as in their interagency agreement or other publicly available written document. FDA and USDA officials told us that they wanted to communicate this information through outreach to individual firms, but FDA or USDA officials said they did not think that revising their interagency agreement was necessary. By taking steps to document which agency will oversee cell-cultured seafood other than catfish, FDA and USDA will better ensure the public, including key stakeholders such as cell-cultured meat firms, have clarity about the agencies\u2019 oversight responsibilities in this area. Cell-cultured meat is a new food product that raises many questions. FDA and USDA\u2019s shared oversight of cell-cultured meat poses various challenges for these agencies, as well as stakeholders such as industry. Compounding this challenge is that specific information about key aspects of cell-cultured meat, such as the technology and production methods to be used as well as the composition of the products, is not yet known. FDA and USDA have taken steps to collaborate on their shared regulatory oversight of cell-cultured meat, including establishing an interagency agreement and three working groups. However, the interagency agreement only partially incorporates the seven leading collaboration practices that can enhance and sustain agencies\u2019 collaborative efforts, and the working groups either partially incorporate or do not incorporate these leading practices, which has raised concerns about potential fragmentation or overlap in oversight. By more fully incorporating all seven leading practices for collaboration into their interagency agreement, FDA and USDA could build on their existing efforts and be better positioned to sustain and enhance their collaborative efforts. Moreover, by more fully incorporating all seven leading practices for interagency collaboration early in the development of the working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat and ensure they are utilizing resources efficiently or effectively. Furthermore, the interagency agreement states that it covers USDA- amenable species required to bear a USDA mark of inspection, which in the agreement includes livestock, poultry, and catfish but does not include cell-cultured seafood other than catfish. FDA and USDA officials told us they have decided FDA will oversee most cell-cultured seafood, but the agencies have not formally documented this decision. By taking steps to document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish, FDA and USDA could better ensure that members of the public and other key stakeholders such as cell-cultured meat firms have clarity about the agencies\u2019 oversight responsibilities in this area. We are making a total of six recommendations, three to FDA and three to USDA: The Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should more fully incorporate the seven leading practices for effective collaboration in the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. (Recommendation 1) The Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should more fully incorporate the seven leading practices for effective collaboration in the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. (Recommendation 2) As the three cell-cultured meat working groups move forward, the Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should more fully incorporate the seven leading practices for effective collaboration, such as identifying specific outcomes and a way to monitor and evaluate progress toward outcomes. (Recommendation 3) As the three cell-cultured meat working groups move forward, the Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should more fully incorporate the seven leading practices for effective collaboration, such as identifying specific outcomes and a way to monitor and evaluate progress toward outcomes. (Recommendation 4) The Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should clearly document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish. (Recommendation 5) The Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should clearly document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish. (Recommendation 6) We provided a draft of this report to the Department of Health and Human Services\u2019 (HHS) Food and Drug Administration (FDA) and the U.S. Department of Agriculture (USDA) for review and comment. In FDA\u2019s comments, reproduced in appendix III, the agency stated that it values GAO\u2019s recognition of the importance of collaborative mechanisms that facilitate coordination and affirmed its commitment to coordinate closely with USDA to ensure the regulatory framework for cell-cultured meat is clear and transparent to stakeholders. In USDA\u2019s comments, reproduced in appendix IV, the department stated that the report put too much focus on best practices for interagency collaboration and not enough emphasis on industry\u2019s role in providing the agencies with the information they need to move their processes forward to effectively regulate cell-cultured meat."""
x5=""" USDA stated that it is difficult to review a developing technology and its future regulatory oversight when so little detailed information about the technology is known. We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. We also acknowledge in our report that having limited information can affect the agencies\u2019 ability to make regulatory and other decisions. We recognize that cell-cultured meat is a new food product that raises many new questions and that specific information about key aspects of cell-cultured meat is not yet known. In light of this challenging context, it is all the more important that FDA and USDA more fully incorporate leading practices for collaboration into their joint efforts in order to ensure they are in the best possible position to oversee this new food product. FDA concurred with two recommendations and partially concurred with one. USDA also concurred with two recommendations and partially concurred with one. Specifically, both agencies agreed with our recommendations regarding (1) more fully incorporating the seven leading practices for effective collaboration in the three cell-cultured meat working groups as they move forward and (2) clearly documenting which agency will oversee cell-cultured seafood other than catfish. FDA and USDA partially concurred with our recommendation, directed to each agency, to more fully incorporate the seven leading practices for effective collaboration into the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. FDA stated that it concurred with the intent of incorporating the seven leading practices into the interagency agreement, and both agencies said that they are open to incorporating the practices into their development of the structure for joint oversight of cell-cultured meat. However, the agencies stated that they did not agree to revise the agreement at this time. FDA and USDA stated that the agreement is a general framework and that incorporating the leading practices would constitute an inappropriate level of detail. Instead, the agencies stated that they believe it would be most valuable to incorporate the leading practices into a more detailed joint framework or standard operating procedure they plan to issue. We appreciate the agencies\u2019 willingness to incorporate the leading practices for effective collaboration into their efforts. The March 2019 interagency agreement states that the agencies have the ability to modify it as needed and will review the agreement every 3 years to determine whether they should modify or terminate it. Therefore, the agencies are due to revisit the agreement in March 2022, if not sooner. Regarding the agencies\u2019 concern that incorporating the leading practices in the interagency agreement would add an inappropriate level of detail, we note that, as we state in our report, the existing agreement already partially incorporates each of the seven leading practices. We continue to believe that FDA and USDA should more fully incorporate the seven leading practices for effective collaboration into their interagency agreement for the joint oversight of cell-cultured meat. Developing a more detailed joint framework or standard operating procedure in accordance with the existing interagency agreement that incorporates those leading practices would meet the intent of our recommendation to improve the effectiveness of the agencies\u2019 collaboration. FDA and USDA also provided technical comments, which we incorporated as appropriate. As agreed with your office, unless you publicly announce its contents earlier, we plan no further distribution of this report until 30 days from its issue date. At that time, we will send copies of this report to the appropriate congressional committees, the Secretary of Health and Human Services, the Secretary of Agriculture, and other interested parties. In addition, the report is available at no charge on the GAO website at http:\/\/www.gao.gov. If you or your staff members have any questions regarding this report, please contact me at (202) 512-3841 or morriss@gao.gov. Contact points for our Offices of Congressional Relations and Public Affairs may be found on the last page of this report. GAO staff who made key contributions to this report are listed in appendix V. Our report (1) describes what is known about methods for commercially producing cell-cultured meat and (2) examines the extent to which the Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) are collaborating to provide regulatory oversight of cell-cultured meat. For both objectives, we conducted a literature review of journal and media articles from 2016 through 2019 to inform our understanding of cell- cultured meat, as well as regulatory activity related to cell-cultured meat in the United States and in other countries. Specifically, we conducted a review of scholarly and trade news from 2016 through July 2019 for specific terms related to cell-cultured meat and regulatory approaches. We conducted searches in more than 30 different academic and trade databases\u2014such as SCOPUS, Foodline, and ProQuest\u2019s Environmental Science Collection\u2014and identified studies relevant to our research objectives. In addition to these formal literature searches, we also asked agency officials and stakeholders to refer us to research articles and publications on cell-cultured meat. We also reviewed documentation from FDA and USDA, including the 2019 interagency agreement, existing memoranda of understanding between the two agencies, Federal Register notices about relevant public meetings, and press releases. We also reviewed documentation such as letters to regulators, presentation slides, and information on organizations\u2019 websites from the cell-cultured meat industry, conventional meat industry, and consumer safety groups, among others. We also interviewed officials from FDA and USDA and representatives of stakeholders from the cell-cultured meat industry and industry associations, conventional meat firms and industry associations, academia, food and consumer safety groups, and state and tribal public health associations, among others. We identified stakeholders to interview through consultation with agency officials and nonfederal stakeholders and through our review of literature. We conducted 17 interviews with representatives or researchers from: six cell-cultured meat firms or industry associations, four conventional meat firms or industry associations, two food and consumer safety groups, one state and tribal public health association, and one food law policy firm. Because this is a nongeneralizable sample, the results of these interviews do not represent the views of all stakeholders involved in or with an interest in the cell-cultured or conventional meat industries or federal regulation of cell-cultured meat. However, they illustrate the range of perspectives on these topics. We also attended public meetings and conferences and conducted site visits to several locations. Specifically, we attended FDA and USDA\u2019s public meeting in October 2018 and four conferences in 2019 that included content pertaining to food safety or cell-cultured meat. We conducted site visits to two conventional meat-processing facilities in Georgia, three cell-cultured meat firms in California, an academic cell- culturing laboratory in California, and a medical cell-culturing facility in Maryland. We identified facilities and laboratories to visit through our literature review, online research, and the assistance of agency officials and stakeholders, such as representatives from the cell-cultured meat and conventional meat industry. To describe what is known about the process for producing cell-cultured meat and potential commercial production methods, we also reviewed two sets of public comments submitted to FDA and USDA in association with the two 2018 public meetings pertaining to cell-cultured meat. These meetings were \u201cFoods Produced Using Animal Cell Culture Technology\u201d in July 2018 and \u201cUse of Cell Culture Technology to Develop Products Derived from Livestock and Poultry\u201d in October 2018. Public comments were submitted by members of the public; representatives from cell- cultured meat firms and industry associations, conventional meat companies and industry associations, food and consumer safety groups, and animal welfare groups; and environmental organizations, among others. We reviewed and analyzed all comments submitted to (1) FDA related to the July 2018 meeting and (2) FDA and USDA related to the October 2018 meeting. We also attended the October 2018 meeting and listened to agency officials\u2019 presentations and oral remarks made by stakeholders and members of the public. We shared our description of the process for making cell-cultured meat, and associated questions, with representatives from three cell-cultured meat firms and academic researchers at two universities for their technical review and incorporated revisions as appropriate. To examine the extent to which FDA and USDA are coordinating to provide regulatory oversight of cell-cultured meat, we identified actions they took to coordinate from July 2018 through April 2020. To identify these actions, we interviewed agency officials, emailed agency officials written questions, reviewed agency documentation and public announcements, and attended public events such as the October 2018 public meeting. We compared the agencies\u2019 interagency agreement and working groups with seven leading practices to enhance and sustain interagency collaboration. Specifically, two independent GAO reviewers assessed the degree to which agencies\u2019 actions incorporated these leading practices. A description of these leading practices and the associated issues to consider is in appendix II. We also assessed the agencies\u2019 actions against standards for internal control in the federal government, including standards related to communicating quality information. In this report, and in our past work, we define collaboration as any joint activity that is intended to produce more public value than could be produced when organizations act alone. We use the terms \u201ccoordination\u201d and \u201ccollaboration\u201d interchangeably in this report. For the purposes of our report, we define cell-cultured meat as food derived from animal cells that were grown in a controlled environment outside of the animal. We define cell-cultured seafood as a subcategory of cell-cultured meat. When referencing conventional meat, we are referring to food produced from the traditional method of slaughtering an animal, such as a cow, hog, chicken, or fish. When referencing seafood, we are referring to shellfish, sea fish, and freshwater fish served as food. We conducted this performance audit from October 2018 to April 2020 in accordance with generally accepted government auditing standards. Those standards require that we plan and perform the audit to obtain sufficient, appropriate evidence to provide a reasonable basis for our findings and conclusions based on our audit objectives. We believe that the evidence obtained provides a reasonable basis for our findings and conclusions based on our audit objectives. Appendix II: Key Issues to Consider for Implementing Interagency Collaborative Mechanisms Issues to consider Have short-term and long-term outcomes been clearly defined? Is there a way to track and monitor progress toward the short-term and long-term outcomes? Do participating agencies have collaboration-related competencies or performance standards against which individual performance can be evaluated? Do participating agencies have the means to recognize and reward accomplishments related to collaboration? What are the missions and organizational cultures of the participating agencies? What are the commonalities between the participating agencies\u2019 missions and cultures and what are some potential challenges? Have participating agencies developed ways for operating across agency boundaries? Have participating agencies agreed on common terminology and definitions? Has a lead agency or individual been identified? If leadership will be shared between one or more agencies, have roles and responsibilities been clearly identified and agreed upon? How will leadership be sustained over the long term? Have participating agencies clarified the roles and responsibilities of the participants? Have participating agencies articulated and agreed to a process for making and enforcing decisions? Have all relevant participants been included? Do the participants have: Full knowledge of the relevant resources in their agency? The ability to commit these resources? The ability to regularly attend activities of the collaborative mechanism? The appropriate knowledge, skills, and abilities to contribute? Developing and updating written guidance and agreements How will the collaborative mechanism be funded? If interagency funding is needed, is it permitted? If interagency funding is needed and permitted, is there a means to track funds in a standardized manner? How will the collaborative mechanism be staffed? Are there incentives available to encourage staff or agencies to participate? If relevant, do agencies have compatible technological systems? Have participating agencies developed online tools or other resources that facilitate joint interactions? If appropriate, have the participating agencies documented their agreement regarding how they will be collaborating? A written document can incorporate agreements reached in any or all of the following areas: Leadership Accountability Roles and responsibilities Resources Have participating agencies developed ways to continually update or monitor written agreements? Steve D. Morris, (202) 512-3841 or morriss@gao.gov In addition to the contact named above, Nico Sloss (Assistant Director), Angela Miles (Analyst-in-Charge), Sahar Angadjivand, Tim Bober, Kevin Bray, Colleen Candrl, Pin En Annie Chou, Tara Congdon, Heather Dowey, Kim Gianopoulos, Gina Hoover, Hayden Huang, Robert Lepzler, Serena Lo, David Lysy, Marc Meyer, Michael Polak, Danny Royer, Sara Sullivan, and Sarah Veale made key contributions to this report"""
para1 = x1+x2+x3+x4+x5
original_document = para1
#extractive summary
para2 = """FDA and USDA have responsibility for overseeing the safety of the food supply. General information about the process of making cell-cultured meat is available, but specific information about the technology being used and the eventual commercial production methods as well as the final products is not yet known. However, the technology and methods to commercially produce cell- cultured meat are still in development, and producers, regulators, and consumers do not yet have clarity on what these will entail. The composition of the final product is also not yet known. The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. Consequently, they have not finalized aspects of the technology and eventual commercial production methods to be used or the composition of the final product. As a result, certain information is not yet available to stakeholders\u2014including cell-cultured meat firms themselves, regulators, and the public\u2014about specific aspects of the technology and commercial production methods that will be used, such as the composition of the growth medium and of the final products. This lack of information results in unanswered questions about cell- cultured meat as it relates to the eventual technology and commercial production methods to be used and the composition of the final products. Some firms have developed prototypes of cell-cultured meat products as part of their research and development. In June 2019, FDA and USDA created three working groups to carry out the terms of the interagency agreement. FDA and USDA could more fully incorporate leading practices for collaboration in their interagency agreement and working groups. Developing and updating written guidance and agreements. However, the agreement does not describe how the agencies will track and monitor progress toward outcomes. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. By more fully incorporating all seven leading practices for interagency collaboration early in the development of the three working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat, ensure consistency and efficient use of resources, and provide clarity to key stakeholders. While FDA and USDA officials told us they have decided who will oversee cell-cultured seafood, they have not formally announced or documented this decision, and some stakeholders have reported confusion or ambiguity about which agency will oversee cell-cultured seafood other than catfish. While FDA and USDA officials told us they had agreed that FDA would oversee cell-cultured seafood other than catfish, as of December 2019, the agencies had not formally announced or documented this agreement. Developing and updating written guidance and agreements is a leading practice for collaboration, as we have previously reported. Compounding this challenge is that specific information about key aspects of cell-cultured meat, such as the technology and production methods to be used as well as the composition of the products, is not yet known. FDA and USDA officials told us they have decided FDA will oversee most cell-cultured seafood, but the agencies have not formally documented this decision. We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. FDA concurred with two recommendations and partially concurred with one. USDA also concurred with two recommendations and partially concurred with one. FDA and USDA partially concurred with our recommendation, directed to each agency, to more fully incorporate the seven leading practices for effective collaboration into the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. We continue to believe that FDA and USDA should more fully incorporate the seven leading practices for effective collaboration into their interagency agreement for the joint oversight of cell-cultured meat. GAO staff who made key contributions to this report are listed in appendix V. Our report (1) describes what is known about methods for commercially producing cell-cultured meat and (2) examines the extent to which the Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) are collaborating to provide regulatory oversight of cell-cultured meat. Developing and updating written guidance and agreements How will the collaborative mechanism be funded?"""
extractive_summary = para2
#abstractive summary
abstractive_summary = """Multiple firms have produced cell-cultured meat as part of their research and development. These products appear likely to become available to consumers in coming years. FDA and USDA are the primary agencies responsible for overseeing the safety of the nation's food supply. However, some stakeholders have expressed concern about the agencies' oversight of cell-cultured meat amidst a fragmented federal food safety oversight system. GAO was asked to review federal oversight of cell-cultured meat. This report (1) describes what is known about methods for commercially producing cell-cultured meat, and (2) examines the extent to which FDA and USDA are collaborating to provide regulatory oversight of cell-cultured meat. GAO conducted a literature review; reviewed documentation from FDA, USDA, and stakeholder groups; analyzed public comments submitted to the agencies; compared agency efforts with leading practices for interagency collaboration; and conducted site visits to selected cell-cultured meat firms. General information about the process of making cell-cultured meat\u2014food products grown from the cells of livestock, poultry, and seafood\u2014is available. However, no company is commercially producing cell-cultured meat. Specific information about the technology being used, eventual commercial production methods, and composition of the final products is not yet known. The general process contains five phases: biopsy, cell banking, growth, harvest, and food processing (see figure). The technology and methods to be used for commercial production are still in development, and producers, regulators, and consumers do not have clarity about many specifics about the process and final product. For example, it is unclear whether production methods and products will use or contain genetically-engineered cells or medications such as antibiotics. The Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) have begun collaborating on regulatory oversight of cell-cultured meat. For example, in 2019, the agencies signed an interagency agreement and created three working groups to carry out the terms of the agreement. However, the agreement and working groups could more fully incorporate practices to enhance and sustain collaboration, such as defining outcomes. For example, the agreement identifies the development of labeling principles as an outcome, but does not describe how the agencies will track and monitor progress toward this outcome, and the working groups identify a lead agency but not members' roles. Also, agency officials said they decided FDA would oversee cell-cultured seafood other than catfish, but they have not formally announced or documented this decision. Developing and updating written guidance and agreements is also a leading practice for interagency collaboration. By fully incorporating leading practices into their efforts to collaborate, the agencies could minimize potential overlap and fragmentation, use resources in a more efficient manner, and better ensure the public and other key stakeholders have clarity about the agencies' oversight responsibilities. GAO recommends that FDA and USDA more fully incorporate leading practices for effective collaboration in the agencies' interagency agreement. FDA and USDA partially concurred and indicated a willingness to incorporate these practices in a more detailed agreement, which would also meet the intent of the recommendations. The agencies concurred with the four other recommendations."""

#Siminarity score of 2 paragraphs

In [8]:
import spacy
from sentence_transformers import SentenceTransformer, util

# Load models
nlp = spacy.load("en_core_web_sm")
model = SentenceTransformer("all-MiniLM-L6-v2")

# Sentence splitter
def split_sentences(text):
    return [sent.text.strip() for sent in nlp(text).sents]

# Similarity score based on sentence coverage
def paragraph_similarity(para1, para2, threshold=0.5):
    sents1 = split_sentences(para1)
    sents2 = split_sentences(para2)

    emb1 = model.encode(sents1, convert_to_tensor=True)
    emb2 = model.encode(sents2, convert_to_tensor=True)

    matched = 0
    for i in range(len(sents2)):
        sims = util.cos_sim(emb2[i], emb1)[0]
        if sims.max().item() >= threshold:
            matched += 1

    coverage = matched / len(sents2) if sents2 else 0.0
    return round(coverage, 3)

# Example
#para1 = """FDA and USDA have responsibility for overseeing the safety of the food supply. General information about the process of making cell-cultured meat is available, but specific information about the technology being used and the eventual commercial production methods as well as the final products is not yet known. However, the technology and methods to commercially produce cell- cultured meat are still in development, and producers, regulators, and consumers do not yet have clarity on what these will entail. The composition of the final product is also not yet known. The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. Consequently, they have not finalized aspects of the technology and eventual commercial production methods to be used or the composition of the final product. As a result, certain information is not yet available to stakeholders\u2014including cell-cultured meat firms themselves, regulators, and the public\u2014about specific aspects of the technology and commercial production methods that will be used, such as the composition of the growth medium and of the final products. This lack of information results in unanswered questions about cell- cultured meat as it relates to the eventual technology and commercial production methods to be used and the composition of the final products. Some firms have developed prototypes of cell-cultured meat products as part of their research and development. In June 2019, FDA and USDA created three working groups to carry out the terms of the interagency agreement. FDA and USDA could more fully incorporate leading practices for collaboration in their interagency agreement and working groups. Developing and updating written guidance and agreements. However, the agreement does not describe how the agencies will track and monitor progress toward outcomes. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. By more fully incorporating all seven leading practices for interagency collaboration early in the development of the three working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat, ensure consistency and efficient use of resources, and provide clarity to key stakeholders. While FDA and USDA officials told us they have decided who will oversee cell-cultured seafood, they have not formally announced or documented this decision, and some stakeholders have reported confusion or ambiguity about which agency will oversee cell-cultured seafood other than catfish. While FDA and USDA officials told us they had agreed that FDA would oversee cell-cultured seafood other than catfish, as of December 2019, the agencies had not formally announced or documented this agreement. Developing and updating written guidance and agreements is a leading practice for collaboration, as we have previously reported. Compounding this challenge is that specific information about key aspects of cell-cultured meat, such as the technology and production methods to be used as well as the composition of the products, is not yet known. FDA and USDA officials told us they have decided FDA will oversee most cell-cultured seafood, but the agencies have not formally documented this decision. We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. FDA concurred with two recommendations and partially concurred with one. USDA also concurred with two recommendations and partially concurred with one. FDA and USDA partially concurred with our recommendation, directed to each agency, to more fully incorporate the seven leading practices for effective collaboration into the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. We continue to believe that FDA and USDA should more fully incorporate the seven leading practices for effective collaboration into their interagency agreement for the joint oversight of cell-cultured meat. GAO staff who made key contributions to this report are listed in appendix V. Our report (1) describes what is known about methods for commercially producing cell-cultured meat and (2) examines the extent to which the Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) are collaborating to provide regulatory oversight of cell-cultured meat. Developing and updating written guidance and agreements How will the collaborative mechanism be funded?"""
#para2 = """Multiple firms have produced cell-cultured meat as part of their research and development. These products appear likely to become available to consumers in coming years. FDA and USDA are the primary agencies responsible for overseeing the safety of the nation's food supply. However, some stakeholders have expressed concern about the agencies' oversight of cell-cultured meat amidst a fragmented federal food safety oversight system. GAO was asked to review federal oversight of cell-cultured meat. This report (1) describes what is known about methods for commercially producing cell-cultured meat, and (2) examines the extent to which FDA and USDA are collaborating to provide regulatory oversight of cell-cultured meat. GAO conducted a literature review; reviewed documentation from FDA, USDA, and stakeholder groups; analyzed public comments submitted to the agencies; compared agency efforts with leading practices for interagency collaboration; and conducted site visits to selected cell-cultured meat firms. General information about the process of making cell-cultured meat\u2014food products grown from the cells of livestock, poultry, and seafood\u2014is available. However, no company is commercially producing cell-cultured meat. Specific information about the technology being used, eventual commercial production methods, and composition of the final products is not yet known. The general process contains five phases: biopsy, cell banking, growth, harvest, and food processing (see figure). The technology and methods to be used for commercial production are still in development, and producers, regulators, and consumers do not have clarity about many specifics about the process and final product. For example, it is unclear whether production methods and products will use or contain genetically-engineered cells or medications such as antibiotics. The Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) have begun collaborating on regulatory oversight of cell-cultured meat. For example, in 2019, the agencies signed an interagency agreement and created three working groups to carry out the terms of the agreement. However, the agreement and working groups could more fully incorporate practices to enhance and sustain collaboration, such as defining outcomes. For example, the agreement identifies the development of labeling principles as an outcome, but does not describe how the agencies will track and monitor progress toward this outcome, and the working groups identify a lead agency but not members' roles. Also, agency officials said they decided FDA would oversee cell-cultured seafood other than catfish, but they have not formally announced or documented this decision. Developing and updating written guidance and agreements is also a leading practice for interagency collaboration. By fully incorporating leading practices into their efforts to collaborate, the agencies could minimize potential overlap and fragmentation, use resources in a more efficient manner, and better ensure the public and other key stakeholders have clarity about the agencies' oversight responsibilities. GAO recommends that FDA and USDA more fully incorporate leading practices for effective collaboration in the agencies' interagency agreement. FDA and USDA partially concurred and indicated a willingness to incorporate these practices in a more detailed agreement, which would also meet the intent of the recommendations. The agencies concurred with the four other recommendations."""

similarity_score = paragraph_similarity(para1, para2)
print(f"Coverage score: {similarity_score} (out of 1.0)")


Coverage score: 0.957 (out of 1.0)


#Sentence similarity score with ROUGE, BERTscore,FACTcc

In [14]:
import spacy
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer
import bert_score

# Load models
nlp = spacy.load("en_core_web_sm")
model = SentenceTransformer("all-MiniLM-L6-v2")

# Sentence splitter
def split_sentences(text):
    return [sent.text.strip() for sent in nlp(text).sents]

# Coverage score
def get_sentence_coverage(para1, para2, threshold=0.5):
    sents1 = split_sentences(para1)
    sents2 = split_sentences(para2)

    emb1 = model.encode(sents1, convert_to_tensor=True)
    emb2 = model.encode(sents2, convert_to_tensor=True)

    matched = 0
    for i in range(len(sents2)):
        sims = util.cos_sim(emb2[i], emb1)[0]
        if sims.max().item() >= threshold:
            matched += 1

    coverage = matched / len(sents2) if sents2 else 0.0
    return round(coverage, 4)

# ROUGE score
def get_rouge_score(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return {
        "rouge1": round(scores["rouge1"].fmeasure, 4),
        "rouge2": round(scores["rouge2"].fmeasure, 4),
        "rougeL": round(scores["rougeL"].fmeasure, 4)
    }

# BERTScore
def get_bert_score(reference, candidate, lang="en"):
    P, R, F1 = bert_score.score([candidate], [reference], lang=lang, verbose=False)
    return {
        "bertscore_precision": round(P[0].item(), 4),
        "bertscore_recall": round(R[0].item(), 4),
        "bertscore_f1": round(F1[0].item(), 4)
    }

# Combined comparison
def compare_paragraphs(para1, para2):
    scores = {
        "coverage_score": get_sentence_coverage(para1, para2),
        **get_rouge_score(para1, para2),
        **get_bert_score(para1, para2)
    }
    return scores

#original text
x1 = """In our prior work, we have found that technological innovation involves not only creating new ideas but also translating those ideas into a new product or service. Innovation, and the research driving it, is inherently risky because the likelihood that research can be translated into a product or service and the ultimate value of that product or service are unknown. The Department of Commerce\u2019s National Institute of Standards and Technology describes the path from innovation to commercialization as comprised of three overarching stages: inventing, transitioning to making, and selling. (See fig. 1 for a description of the path from innovation to commercialization.) FDA and USDA have responsibility for overseeing the safety of the food supply. In general, FDA is responsible for ensuring the safety of virtually all domestic and imported food products except those regulated by USDA. USDA is responsible for ensuring the safety of meat, poultry, processed egg products, and catfish. FDA and USDA cooperate with states, tribes, and local food safety and public health agencies to carry out their federal responsibilities. FDA and USDA carry out their responsibilities in part through inspections of facilities where food is produced. The frequency of inspections the agencies conduct varies, as follows: FDA. FDA\u2019s authority requires a risk-based approach, in which inspection rates vary depending on the level of risk associated with a food product. FDA conducts risk-based inspections of high-risk and non-high-risk food facilities. For example, the FDA Food Safety Modernization Act, signed into law in 2011, specified that FDA had to inspect all high-risk domestic facilities at least every 3 years. USDA. Depending on the type of facility, USDA conducts inspections at least once per operating shift or maintains a constant presence. Specifically, USDA conducts carcass-by-carcass inspection at all federally inspected meat and poultry slaughter facilities and verifies that these establishments follow all food safety and humane handling requirements. At facilities that process meat and poultry products, USDA conducts inspections at least once per production shift, following the agency\u2019s longstanding interpretation of its statutes requiring it to do so. Among other things, the Federal Food, Drug, and Cosmetic Act requires that food additives be approved by FDA before they can be lawfully used in foods. Substances added to food are considered unsafe unless the agency establishes that the use of the food additive, under specific conditions for use, will be safe, or unless the substance is generally recognized as safe (GRAS) under the conditions of its intended use among qualified experts. As we reported in 2010, the Federal Food, Drug, and Cosmetic Act exempts GRAS substances from the act\u2019s general requirement that companies obtain FDA approval before marketing food containing a new additive. GRAS substances include hundreds of spices and artificial flavors, emulsifiers and binders, vitamins and minerals, and preservatives that manufacturers add to enhance a food\u2019s taste, texture, nutritional content, or shelf life. The GRAS exemption allows companies, without notice to or approval from FDA, to determine whether there is enough support to claim a substance is GRAS. For a company to claim a substance is GRAS, it must conclude that there is common knowledge about the safety of the substance among experts qualified by scientific training and experience to evaluate its safety. In addition, as part of their oversight of the food supply, FDA and USDA oversee food labeling of the products under their respective jurisdictions. USDA, by statute, is charged with assuring that products under its jurisdiction, including meat, poultry, and catfish, in interstate or foreign commerce are properly marked, labeled, and packaged. USDA develops and applies the labeling requirements for these products, and food manufacturers are responsible for complying with the USDA labeling rules and adhering to the process maintained by USDA for the evaluation and approval of these product labels. Consistent with its statutes, USDA requires preapproval of all labels before manufacturers can market their products. The Federal Food, Drug, and Cosmetic Act prohibits the misbranding of food, which includes food labeling that is false or misleading. Consistent with its statutes, FDA ensures that foods within its jurisdiction are not misbranded by focusing on the labels of products already in the market. FDA establishes regulations for the enforcement of these provisions and issues guidance. Food manufacturers are responsible for compliance with misbranding provisions in the Federal Food, Drug, and Cosmetic Act and its implementing regulations. From time to time, new technologies, such as those used to make cell- cultured meat, generate challenges for FDA\u2019s and USDA\u2019s regulatory structure. Other examples of new food technologies to which federal agencies have needed to adapt include the genetic modification of plants and irradiation of foods. In the case of genetically modified plants, there are no specific regulations addressing products resulting from the manipulation of the genetic material of living seeds. However, under FDA policy, new genetically engineered crop varieties are treated like other foods (including their conventional counterparts) under the Federal Food Drug and Cosmetic Act and may not contain either unapproved food additives or contaminants that would adulterate the food. In 1995, FDA established a voluntary pre-market consultation process through which companies are encouraged to notify the agency before marketing a food produced from a genetically modified crop and voluntarily submit a summary of the developer-performed safety assessment. FDA evaluates the safety assessment for any issues that need to be addressed and works with the developer to resolve those issues. In the case of irradiated foods, companies seeking approval for a source of radiation used to treat a food may submit a food additive petition to FDA demonstrating the safety of the proposed use. FDA grants approval only after agency scientists have determined that the proposed use is safe, then the process can be employed commercially. General information about the process of making cell-cultured meat is available, but specific information about the technology being used and the eventual commercial production methods as well as the final products is not yet known. While firms may vary in how they make cell-cultured meat, the general process they use can be described in five phases. However, the technology and methods to commercially produce cell- cultured meat are still in development, and producers, regulators, and consumers do not yet have clarity on what these will entail. The composition of the final product is also not yet known. The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. (See fig. 2.) The five-phase process is generally as follows: 1. Biopsy. A biopsy is taken by collecting rice-sized tissue samples from an animal, such as livestock, chicken, or fish. During this and subsequent phases, specific laboratory sanitation procedures are followed, and antibiotics may be used in order to avoid or minimize contamination from bacteria. Growth Media According to researchers and representatives from cell-cultured meat firms, the growth media for cell-cultured meat often contains fetal bovine serum, which is obtained from blood drawn from a bovine fetus at slaughter. However, researchers and representatives from cell-culturing firms we spoke with said they are working to develop growth media that do not contain fetal bovine serum. Representatives from some of these firms also told us that the composition of the growth media, including the exact ingredients and their proportions, can vary based on the specific needs of the cells and the variety of serum used. For example, cell-cultured seafood may have different growth media and environmental requirements than cell-cultured livestock and poultry. 2. Cell banking. Biopsied cells with the most desirable traits are selected and either used immediately for cell growth or frozen to create a cell bank for later use. These desirable traits can be obtained by either selecting existing cells or using genetic engineering methods to insert, delete, or edit the DNA to target desired traits in cells. Examples of desirable traits may include cells that divide quickly, cells that divide a greater number of times, cells that result in a reduced cholesterol or fat content or other desirable nutritional traits, or cells that are more resilient to environmental factors, such as temperature, than other cells. According to agency officials and representatives from cell-cultured meat firms, this phase represents an important opportunity to ensure that the source cells used to initiate commercial production are free of pathogens or other contaminants. 3. Growth. During the cell growth phase, cells are placed in a bioreactor and begin to divide and differentiate. A bioreactor is a container that creates an environment that can sustain the growth of cells and includes the ability to control factors such as temperature, pH, and oxygen and carbon dioxide concentrations. Bioreactors can vary in size, including microwave-sized and refrigerator-sized units, but could be as large as 20 to 30 feet tall in commercial production. Bioreactors contain a growth medium, which may include ingredients such as glucose, amino acids, hormones and other growth factors, and other basic nutrients that cells need to consume in order to thrive. In addition to the medium needed for growth, the cells may need to be attached to a structure, referred to as a scaffold, to properly develop into cell-cultured meat. 4. Harvest. Once the cells have divided to form a sufficiently large amount of cell-cultured meat, producers remove\u2014or harvest\u2014it from the growth medium and bioreactor. """
x2="""If a scaffold was used to provide a structure for cells to grow on, then the cell-cultured meat would either be separated from the scaffold during harvesting or left attached to an edible scaffold. 5. Food processing. The harvested cell-cultured meat is then prepared into a product such as meatballs or chicken nuggets. In the future, products similar to intact cuts of meat such as steak or chicken breast may be produced. The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. In the continuum of moving a technology from innovation to commercialization, cell-cultured meat firms are in the middle stage of building and testing their prototypes, based on our discussions with representatives from these firms. Consequently, they have not finalized aspects of the technology and eventual commercial production methods to be used or the composition of the final product. As a result, certain information is not yet available to stakeholders\u2014including cell-cultured meat firms themselves, regulators, and the public\u2014about specific aspects of the technology and commercial production methods that will be used, such as the composition of the growth medium and of the final products. In addition to technology development, the scarcity of publicly available research on cell-cultured meat production limits information available to agency officials and the public. Each cell-cultured meat firm is developing detailed information on its own eventual commercial production methods for making cell-cultured meat. However, the firms, similar to other technology start-ups, are reluctant to disclose intellectual property and business-sensitive information due to concerns about competition. For example, one firm told us that they can reverse engineer parts of another company\u2019s commercial production method by seeing pictures of the equipment the other company is using. In addition, cell-cultured meat firms compete with other firms for funding from sources such as venture capitalists, foreign governments, and conventional meat companies. This competition for funding contributes to firms being reluctant to share information they consider important intellectual property, such as parts of their production processes. As a result, agency officials and other stakeholders told us that they must largely rely on whatever information the cell-cultured meat firms are willing to provide to understand details of the companies\u2019 prototype processes and products. This limitation can affect agencies\u2019 ability to make regulatory and other decisions. Specifically, FDA and USDA officials said they have limited information on cell-cultured meat production methods and products and need more in order to regulate this new food. One USDA official explained that the agency cannot establish labeling requirements if the agency does not know the nutritional profile of the final product. For example, if the scaffold on which the cell-cultured meat is grown is not edible, the agencies may require firms to disclose certain aspects of their commercial production methods, such as how they removed the cell- cultured meat from the scaffold. However, if the scaffold is edible, it will affect the final composition of the product, which may require different labeling than a product that was developed without edible scaffolding. This lack of information results in unanswered questions about cell- cultured meat as it relates to the eventual technology and commercial production methods to be used and the composition of the final products. Among other things, this lack of information creates challenges for industry and federal regulatory agencies as cell-cultured meat nears commercialization. The sources we reviewed and stakeholders we talked to identified a number of open questions, including the following: Tissue collection. How often will producers need to collect biopsy samples from animals, and what animals will be used? Some stakeholders have stated concerns about whether, and how, regulators will ensure that biopsies are collected from healthy animals. For example, one cell-cultured meat firm stated that tissue samples would be taken from slaughtered donor animals that met federal standards for conventional processing at the time of slaughter. However, USDA and FDA have not indicated whether they would require cell-cultured meat firms to do so. Additionally, representatives from cell-cultured meat firms stated that they did not yet know how frequently they would need to collect biopsies from animals for commercial-level production. Additionally, according to researchers, there are too many unknowns to accurately estimate how much cell- cultured meat could be produced from a single biopsy of animal tissue. Genetic engineering. Will commercial production methods involve genetic engineering? Some stakeholders expressed concern that the use of genetic engineering in cell-cultured meat production could cause the product to experience a lengthy wait for regulatory approval, similar to that for genetically engineered salmon, which took approximately 20 years. One representative from a cell-cultured meat firm noted that uncertainty about pending government regulations could negatively affect firms\u2019 ability to attract and retain investors. Representatives from some firms said understanding what regulatory requirements will look like might influence which scientific pathways they pursue as they continue to develop their commercial production methods. According to FDA officials and representatives from one cell-cultured meat firm, it is likely that some firms will use genetic engineering in their commercial cell-cultured meat production methods. However, representatives from two other cell-cultured meat firms told us they were undecided as to whether they would use genetic engineering in their commercial production methods. Antibiotics. Will antibiotics be used to make cell-cultured meat, and will residues be present in the final product? According to agency officials, the presence of antibiotics in commercial production and the potential for residues in the resulting product would represent a significant potential concern for food safety and public health. Officials stated that they would not expect antibiotics to be used past the cell- banking phase. Representatives from cell-cultured meat firms we spoke to differed on whether they planned to use antibiotics in their commercial production process, but they had not finalized their decisions. According to one firm, if antibiotics are used, the use would be limited both in quantity and duration. Growth medium. What type of growth medium will producers use, and how might variations in the media affect the final product? According to agency officials and other stakeholders, the ingredients used in the growth medium could affect the end product\u2019s composition and raise potential safety concerns. For example, FDA officials stated that residual growth factors, such as hormones, in the final product would be something they would likely evaluate in premarket consultations. However, representatives from cell-cultured meat firms stated that their firms have not finalized the medium they plan to use. In addition, the formulation of the medium firms use could be an important piece of intellectual property or confidential business information. Scaffold. What type of scaffold will producers use, if any, and will it be edible or inedible? The use of edible or food-grade scaffolds, where they are used, will affect the composition of the product and may need to be evaluated by federal agencies for safety. According to USDA officials, the composition of edible scaffolding may also create labeling and jurisdictional concerns. For example, USDA officials stated that the addition of edible scaffolding may require significant additional aspects of production to be subject to USDA jurisdiction. Additionally, researchers have commented that a chemical separation technique needed to separate some inedible scaffolds may also need to be evaluated for potential safety concerns. Point of harvest. How will FDA and USDA define the point of harvest? The point of harvest is the point at which FDA will transfer oversight responsibilities, including inspections, to USDA. Stakeholders have raised concerns that not having a clear definition of the point of harvest could lead to challenges such as overlapping inspection requirements or a gap in inspection. Representatives from several cell-cultured meat firms we spoke to in the spring of 2019 said it was ambiguous how FDA and USDA intended to define the point of harvest. These representatives also said it is unclear how often each agency plans to conduct inspections during the phases for which it is responsible. Agency officials stated that they are working to develop a detailed process for the transfer of jurisdiction, including defining the point of harvest. Scaling up production. How will firms scale up production to commercial levels? One 2018 study conducted by researchers in the United Kingdom stated that to produce one pound of cell-cultured meat, firms would need bioreactors at least 2 1\/2 times larger than what is currently available. Similarly, a senior FDA official stated that the capacity of existing production equipment is a challenge for firms seeking to produce cell-cultured meat products at a commercial scale. As a result, the firms themselves may have to develop the equipment or custom order such equipment. Representatives from one cell- cultured meat firm told us that they are interacting with equipment providers to identify commercial-scale production equipment. Production cost. How will firms sell their product at a price point that is both profitable to the firms and affordable to the consumer? Some studies and stakeholders we interviewed, including representatives from cell-cultured meat firms, said that the high production cost of cell- cultured meat is a key industry challenge. For example, in the last two years, one firm reported that it cost $600 to make a cell-cultured meat hamburger patty and reported that it cost about $1,200 to produce a single cell-cultured meatball. One of the biggest cost drivers in the production of cell-cultured meat is the growth medium, according to some studies and some cell-cultured meat firms. To address issues of cost and scale, some firms may develop their own, less expensive growth media. Safety considerations. Are potential safety hazards in commercial production methods for cell-cultured meat different from those for conventional meat, and how will eventual commercial production methods affect the overall safety of the product? According to agency officials, cell-cultured meat may present different safety challenges compared to conventional meat. For example, according to agency officials, residues and constituents in harvested cell-cultured meat would be expected to be different from those in conventional meat, depending on the details of the production process. Representatives from one cell-cultured meat firm told us that they likely will use food processing techniques similar to those used for conventional meat, abide by similar health and safety standards, and possibly share food processing facilities. However, because specific information about commercial production methods and final products is not yet known, it is unclear whether cell-cultured meat produced on a commercial scale will pose any hazards not present in conventional meat. Product composition. What will be the composition of any eventual products? Agency officials told us that without knowing the composition of a cell-cultured meat product, it is impossible to predict how food safety and labeling requirements will apply. According to representatives from some cell-cultured meat firms, initial cell-cultured meat products most likely will not be composed entirely of cell- cultured meat but, rather, a mixture of cell-cultured meat and other ingredients such as binding, flavoring ingredients, and plant-based materials used in conventional food products. Some firms have developed prototypes of cell-cultured meat products as part of their research and development. In April 2019, representatives from one firm told us that their prototype included about 90 percent plant-based ingredients and 10 percent cell-cultured meat. However, representatives from cell-cultured meat firms stated that they aim to produce products that contain more cell-cultured meat than other ingredients. For example, some cell-cultured meat firms have stated that a long-term goal is to commercially produce cell-cultured meat products that are similar to intact cuts of meat, such as steaks. As of December 2019, these firms had not provided regulators with specific information detailing the composition of their cell-cultured meat prototypes, according to FDA and USDA officials. Environmental, animal welfare, and health impacts. How will cell- cultured meat impact the environment, animal welfare, or human health, if at all? Cell-cultured meat firms and researchers have made various claims about the potential environmental, animal welfare, and health advantages of cell-cultured meat over conventionally produced meat. For example, some cell-cultured meat firms have claimed that cell-cultured meat production would use less water and emit less greenhouse gases than conventional meat production. Some cell- cultured meat firms have also claimed that cell-cultured meat will improve animal welfare because slaughter will be unnecessary. Additionally, some stakeholders stated that because there is less opportunity for contamination from animal feces\u2014a potential source of contamination for conventional meat\u2014cell-cultured meat would be less likely than conventional meat to contain foodborne pathogens. However, there are disagreements regarding the accuracy of these claims. Stakeholders told us that until commercial production methods and final products are established, these claims about impacts on the environment, animal welfare, and human health will remain unsubstantiated. Timeline to market. When will cell-cultured meat products reach consumers? As of December 2019, no cell-cultured meat products were available for retail sale in the United States. Stakeholders give varying estimates for when cell-cultured meat may be commercially available. Some estimates suggest that firms may be able to commercially produce some form of cell-cultured meat product as soon as 2020, while others estimate that such products may not be available for 2 to 4 years. Labeling. How will cell-cultured meat be labeled? Labeling was an area of concern for representatives from both conventional and cell- cultured meat firms who explained that the specific terminology, such as \u201cclean meat\u201d or \u201clab-grown meat,\u201d can sometimes reflect bias for, or against, certain products, potentially affecting consumer acceptance of these products. Additionally, stakeholders, as well as agency officials, have emphasized the importance of labeling to ensure consumers have accurate information about what they are buying. For example, in February 2018 the United States Cattlemen\u2019s Association submitted a petition to USDA requesting that the agency limit the term \u201cbeef\u201d to products \u201cborn, raised, and harvested in a traditional manner\u201d and \u201cmeat\u201d to mean the \u201ctissue or flesh of animals that have been harvested in the traditional manner.\u201d USDA received over 6,000 comments on the petition, and the agency had not responded to the petition as of December 2019. However, according to agency officials, USDA has committed to a public process, likely rulemaking, for the development of labeling requirements for cell- cultured meat and poultry. In addition, in recent years, a number of states have passed laws that could affect the labeling of cell-cultured meat when it comes to market. For example, in 2018, Missouri enacted a law to prohibit plant-based products and cell-cultured meat from being labeled as \u201cmeat.\u201d Consumer Acceptance How will consumers respond to cell-cultured meat? It remains unclear whether consumers will embrace and purchase cell-cultured meat products. Stakeholders we interviewed and studies we reviewed cited consumer acceptance as a challenge for commercializing cell-cultured meat. One study noted that consumers have both positive and negative views toward cell-cultured meat, which could impact their willingness to purchase and consume such products. FDA and USDA have established multiple mechanisms to collaborate on regulatory oversight of cell-cultured meat. Specifically, the agencies have collaborated through a joint public meeting, an interagency agreement, and three working groups. However, the interagency agreement and working groups, which are ongoing mechanisms, do not fully incorporate leading practices for interagency collaboration. In addition, FDA and USDA have not documented which agency will oversee cell-cultured seafood not covered by the interagency agreement. In 2018, FDA and USDA began taking steps to collaborate on the regulatory oversight of cell-cultured meat through several mechanisms: a joint public meeting, an interagency agreement, and three working groups. The agencies held the joint meeting in October 2018 to discuss the use of cell-culture technology to develop products derived from livestock and poultry, and topics included potential hazards, oversight considerations, and labeling. As part of this meeting, FDA and USDA held an open public comment period from September through December 2018, gathered 315 written comments, and offered interested parties the opportunity to offer comments in person. The agencies received public comments from members of the public, as well as from representatives from cell-cultured meat and conventional meat industries, food and consumer safety groups, animal welfare groups, and environmental organizations, among others. The written comments the agencies received focused on such topics as environmental considerations, labeling, potential health and safety implications, and potential regulatory and inspection processes. Stakeholders also presented multiple perspectives on these issues at the meeting. For example, stakeholders expressed different views as to whether cell-cultured meat should be regulated as a food additive, considered a GRAS substance, or whether new regulations were needed. In March 2019, FDA and USDA issued a formal interagency agreement that describes the intended roles and responsibilities of each agency in overseeing cell-cultured meat. The agreement establishes the following: Oversight. FDA will oversee the early phases of growing cell-cultured meat through the point of harvest. During harvest, FDA will work with USDA to transfer regulatory oversight to USDA. USDA will then assume oversight of cell-cultured meat through the food processing phase, including labeling, as shown in figure 3. Types of meat covered. The agreement covers cell-cultured meat derived from species overseen by USDA, such as livestock, poultry, and catfish. Future actions. The agreement also details future actions the agencies plan to take, such as developing a more detailed regulatory framework or standard operating procedures and developing joint principles for product labeling. Reviewing and updating the agreement. The agreement states that the agencies have the ability to modify it as needed and will review the agreement every 3 years to determine whether they should modify or terminate it. In June 2019, FDA and USDA created three working groups to carry out the terms of the interagency agreement."""
x3="""The working groups are comprised of FDA and USDA officials and operate independently, though some individuals are members of multiple groups. The groups are as follows: Pre-market assessment working group. Led by FDA, this group was created to clarify the process FDA will use for pre-market reviews of cell-cultured meat. Labeling working group. Led by USDA, this group will focus on developing joint principles for product labeling and claims. Transfer of jurisdiction working group. Co-led by FDA and USDA, this group will develop procedures for the transfer of inspection at harvest, among other things. According to agency officials, the working groups are still in the initial phases of development, though some have progressed further than others. For example, as of December 2019, the pre-market assessment and labeling groups had met and begun to address various areas, while the transfer of jurisdiction working group was still in discussions to outline the roles, responsibilities, and outcomes for the group and had not held a formal meeting. FDA and USDA could more fully incorporate leading practices for collaboration in their interagency agreement and working groups. We have previously reported that interagency mechanisms or strategies to coordinate programs that address crosscutting issues may reduce potentially duplicative, overlapping, and fragmented efforts. In addition, while collaborative mechanisms may differ in complexity and scope, they all benefit from certain leading practices, which raise issues to consider when implementing these mechanisms. We compared the agencies\u2019 interagency agreement and working groups with the seven leading practices to enhance and sustain interagency collaboration that we previously identified. These leading practices, and examples of the associated issues to consider, are as follows: Defining outcomes and monitoring accountability. Is there a way to track and monitor progress toward short-term and long-term outcomes? Do participating agencies have collaboration-related competencies or performance standards against which individual performance can be evaluated? Bridging organizational cultures. What are the commonalities between the participating agencies\u2019 missions and cultures, and what are some potential challenges? Have participating agencies developed ways for operating across agency boundaries? Have participating agencies agreed on common terminology and definitions? Identifying and sustaining leadership. How will leadership be sustained over the long term? If leadership is shared, have roles and responsibilities been clearly identified and agreed upon? Clarifying roles and responsibilities. Have participating agencies clarified roles and responsibilities? Have participating agencies articulated and agreed to a process for making and enforcing decisions? Including relevant participants. Have all relevant participants been included? Do participants have appropriate knowledge, skills, and abilities to contribute? Identifying and leveraging resources. How will the collaborative mechanism be funded and staffed? Developing and updating written guidance and agreements. If appropriate, have the participating agencies documented their agreement regarding how they will collaborate? (A written document can incorporate agreements reached in any or all of the following areas: leadership, accountability, roles and responsibilities, and resources.) Have participating agencies developed ways to continually update or monitor written agreements? See appendix II for a full list of the associated issues to consider for each leading practice. We found that the interagency agreement for oversight of cell-cultured meat partially incorporates all seven leading practices for collaboration. For example: Defining outcomes and monitoring accountability. The interagency agreement partially incorporates the leading practice of defining outcomes and monitoring progress toward these outcomes. Specifically, the agreement identifies broad outcomes such as the development of labeling principles. However, the agreement does not describe how the agencies will track and monitor progress toward outcomes. Identifying and sustaining leadership. The agreement partially incorporates the leading practice of clarifying leadership structures. For example, it assigns each agency as the lead, or designates shared leadership, for different phases of the cell-cultured meat production process. However, the interagency agreement does not identify how the agencies will sustain leadership over the long term, including through succession planning. We have previously reported that given the importance of leadership to any collaborative effort, transitions and inconsistent leadership can weaken the effectiveness of any collaborative mechanism. Developing and updating written guidance and agreements. The agreement partially incorporates the leading practice of documenting how the agencies will collaborate. For example, the agreement includes a method for updating the document by including a provision that requires a review of the document every 3 years. This is consistent with our leading collaboration practice to continually update or monitor written agreements. However, the interagency agreement does not document how the agencies will track and monitor progress toward short-term and long-term outcomes. Table 1 provides more detail about the agencies\u2019 incorporation of these leading collaboration practices in their interagency agreement. FDA and USDA officials told us that the interagency agreement was intended to be an initial, general outline for their collaboration. They also said that as the technology to produce cell-cultured meat develops and they implement the agreement, including developing the content of a regulatory program, they will consider incorporating leading practices for interagency collaboration. For example: Clarifying roles and responsibilities. FDA and USDA officials said in December 2019 that through the working groups the agencies would continue to explore and define the specific details of how they will manage their shared oversight responsibility. Including relevant participants. FDA officials said in December 2019 that the agency would like to engage many more stakeholders as it continues to develop its oversight of cell-cultured meat. Identifying and leveraging resources. As of December 2019, the pre-market assessment working group and the labeling working group were working to identify any human resources, physical, or financial resources they might need, according to FDA and USDA officials. The federal food safety system is on our High Risk List due to concerns about fragmentation, which we have reported has caused inconsistent oversight, ineffective coordination, and inefficient use of resources. As the agencies continue to collaborate on their shared oversight of cell- cultured meat, by more fully incorporating all seven leading practices for collaboration into their interagency agreement, they will be better positioned to address potential fragmentation in their efforts to ensure the safety of the food supply as cell-cultured meat products near commercialization and entry into the marketplace. We found that the pre-market assessment, labeling, and transfer of jurisdiction working groups that FDA and USDA created to carry out the terms of the interagency agreement either partially incorporate or do not incorporate the seven leading practices for interagency collaboration. Specifically, all three working groups have partially incorporated three of the seven leading practices for collaboration, but none of the working groups have incorporated the four remaining leading practices. For example: Defining outcomes and monitoring accountability. The working groups have all defined and agreed upon their general purposes. However, FDA and USDA have not established methods, such as milestones and metrics, to evaluate the progress of any of the working groups. For example, FDA officials said in December 2019 that their next steps are to conduct a general and qualitative risk assessment of animal cell culture food technology to systematically identify particular areas of interest from a food safety perspective and prepare detailed procedural guidelines for cell-cultured meat firms to follow. However, the officials did not have time frames or a method to evaluate progress towards completing these actions. Including relevant participants. While the working groups have included relevant FDA and USDA officials, none of the groups have included state or tribal officials in initial discussions and planning. According to the state officials we spoke with, being excluded from these federal-level discussions may hinder their ability to align their safety and labeling requirements, among other things, with federal standards. Developing and updating written guidance and agreements. None of the working groups have documented how they will collaborate. For example, the working groups have not documented leadership, accountability, roles and responsibilities, or resources needed for working groups. Table 2 provides more detail about FDA and USDA\u2019s incorporation of leading collaboration practices in the three working groups. In December 2019, FDA and USDA officials said that as they continued to stand up these working groups, they were considering leading practices for collaboration. For example: Defining outcomes and monitoring accountability. FDA and USDA officials said they were considering means to monitor, evaluate, or report on the results of the pre-market assessment working group. Including relevant participants. FDA and USDA officials said that they were working to determine what knowledge participants in the pre-market assessment working group and the labeling working group needed to perform the work of the working group. Developing and updating written guidance and agreements. FDA and USDA officials said they were considering documenting how they will collaborate in the pre-market assessment working group, including potentially creating a charter for the working group. We have previously reported that fragmentation has caused inconsistent oversight and inefficient use of resources in the federal food safety oversight system. """
x4="""The agencies\u2019 2019 agreement to share oversight of cell-cultured meat creates a new relationship between FDA and USDA, since the agencies will oversee different stages of the production of the same food and hand off oversight at a certain point in that production. These factors contribute to an already complicated system in which the two agencies must coordinate on food safety oversight. In this context, some industry representatives and other stakeholders have expressed concerns about potential fragmentation or overlap in oversight of cell-cultured meat, such as could occur during the harvest phase of cell-cultured meat production when FDA hands off its oversight to USDA. Additionally, representatives from one cell-cultured meat firm stated that avoiding overlap in federal oversight whenever possible was important to them. For example, representatives from one firm pointed to inspection, record-keeping requirements, and regulations as potential areas at risk of overlap. They stated that potential overlap would add unnecessary, burdensome requirements and create an uneven playing field with the conventional meat industry. By more fully incorporating all seven leading practices for interagency collaboration early in the development of the three working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat, ensure consistency and efficient use of resources, and provide clarity to key stakeholders. While FDA and USDA officials told us they have decided who will oversee cell-cultured seafood, they have not formally announced or documented this decision, and some stakeholders have reported confusion or ambiguity about which agency will oversee cell-cultured seafood other than catfish. Specifically, FDA and USDA\u2019s interagency agreement regarding cell-cultured meat states that it covers all cell-cultured meat derived from USDA-amenable species required to bear a USDA mark of inspection, which in the agreement includes livestock, poultry, and catfish. However, the agreement does not mention cell-cultured meat made from the cells of other fish, such as tuna and shellfish. FDA and USDA officials told us that FDA will have sole oversight responsibility for cell-cultured seafood other than catfish. According to FDA officials, they have verbally communicated this decision in various meetings with stakeholders. However, FDA and USDA officials told us that formally documenting FDA\u2019s sole oversight of most cell- cultured seafood in their interagency agreement was unnecessary because FDA currently oversees most conventional seafood. According to cell-cultured meat firms, some firms are working on developing cell- cultured versions of seafood, such as bluefin tuna. However, stakeholders from two cell-cultured meat firms, including representatives of a cell- cultured seafood firm we spoke with in April 2019, stated that they did not know who in the federal government would oversee cell-cultured seafood. Representatives from one cell-cultured seafood firm said that not being able to rule out oversight by USDA prevented them from making key decisions regarding what direction to pursue in developing their commercial production method. While FDA and USDA officials told us they had agreed that FDA would oversee cell-cultured seafood other than catfish, as of December 2019, the agencies had not formally announced or documented this agreement. Developing and updating written guidance and agreements is a leading practice for collaboration, as we have previously reported. In addition, standards for internal control in the federal government state that agency management should externally communicate the necessary quality information to achieve its objectives and should select appropriate methods of communication, such as a written document or a face-to-face meeting. Management should also periodically evaluate the entity\u2019s methods of communication so that the organization has the appropriate tools to communicate quality information throughout and outside of the entity on a timely basis. While FDA and USDA officials have informally communicated to some stakeholders that FDA will have sole oversight of most cell-cultured seafood, FDA has not communicated this information formally or in a method readily available to all relevant stakeholders, such as in their interagency agreement or other publicly available written document. FDA and USDA officials told us that they wanted to communicate this information through outreach to individual firms, but FDA or USDA officials said they did not think that revising their interagency agreement was necessary. By taking steps to document which agency will oversee cell-cultured seafood other than catfish, FDA and USDA will better ensure the public, including key stakeholders such as cell-cultured meat firms, have clarity about the agencies\u2019 oversight responsibilities in this area. Cell-cultured meat is a new food product that raises many questions. FDA and USDA\u2019s shared oversight of cell-cultured meat poses various challenges for these agencies, as well as stakeholders such as industry. Compounding this challenge is that specific information about key aspects of cell-cultured meat, such as the technology and production methods to be used as well as the composition of the products, is not yet known. FDA and USDA have taken steps to collaborate on their shared regulatory oversight of cell-cultured meat, including establishing an interagency agreement and three working groups. However, the interagency agreement only partially incorporates the seven leading collaboration practices that can enhance and sustain agencies\u2019 collaborative efforts, and the working groups either partially incorporate or do not incorporate these leading practices, which has raised concerns about potential fragmentation or overlap in oversight. By more fully incorporating all seven leading practices for collaboration into their interagency agreement, FDA and USDA could build on their existing efforts and be better positioned to sustain and enhance their collaborative efforts. Moreover, by more fully incorporating all seven leading practices for interagency collaboration early in the development of the working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat and ensure they are utilizing resources efficiently or effectively. Furthermore, the interagency agreement states that it covers USDA- amenable species required to bear a USDA mark of inspection, which in the agreement includes livestock, poultry, and catfish but does not include cell-cultured seafood other than catfish. FDA and USDA officials told us they have decided FDA will oversee most cell-cultured seafood, but the agencies have not formally documented this decision. By taking steps to document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish, FDA and USDA could better ensure that members of the public and other key stakeholders such as cell-cultured meat firms have clarity about the agencies\u2019 oversight responsibilities in this area. We are making a total of six recommendations, three to FDA and three to USDA: The Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should more fully incorporate the seven leading practices for effective collaboration in the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. (Recommendation 1) The Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should more fully incorporate the seven leading practices for effective collaboration in the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. (Recommendation 2) As the three cell-cultured meat working groups move forward, the Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should more fully incorporate the seven leading practices for effective collaboration, such as identifying specific outcomes and a way to monitor and evaluate progress toward outcomes. (Recommendation 3) As the three cell-cultured meat working groups move forward, the Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should more fully incorporate the seven leading practices for effective collaboration, such as identifying specific outcomes and a way to monitor and evaluate progress toward outcomes. (Recommendation 4) The Commissioner of the Food and Drug Administration, in coordination with the Secretary of Agriculture, should clearly document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish. (Recommendation 5) The Secretary of Agriculture, in coordination with the Commissioner of the Food and Drug Administration, should clearly document in their interagency agreement, or other publicly available document, which agency will oversee cell-cultured seafood other than catfish. (Recommendation 6) We provided a draft of this report to the Department of Health and Human Services\u2019 (HHS) Food and Drug Administration (FDA) and the U.S. Department of Agriculture (USDA) for review and comment. In FDA\u2019s comments, reproduced in appendix III, the agency stated that it values GAO\u2019s recognition of the importance of collaborative mechanisms that facilitate coordination and affirmed its commitment to coordinate closely with USDA to ensure the regulatory framework for cell-cultured meat is clear and transparent to stakeholders. In USDA\u2019s comments, reproduced in appendix IV, the department stated that the report put too much focus on best practices for interagency collaboration and not enough emphasis on industry\u2019s role in providing the agencies with the information they need to move their processes forward to effectively regulate cell-cultured meat."""
x5=""" USDA stated that it is difficult to review a developing technology and its future regulatory oversight when so little detailed information about the technology is known. We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. We also acknowledge in our report that having limited information can affect the agencies\u2019 ability to make regulatory and other decisions. We recognize that cell-cultured meat is a new food product that raises many new questions and that specific information about key aspects of cell-cultured meat is not yet known. In light of this challenging context, it is all the more important that FDA and USDA more fully incorporate leading practices for collaboration into their joint efforts in order to ensure they are in the best possible position to oversee this new food product. FDA concurred with two recommendations and partially concurred with one. USDA also concurred with two recommendations and partially concurred with one. Specifically, both agencies agreed with our recommendations regarding (1) more fully incorporating the seven leading practices for effective collaboration in the three cell-cultured meat working groups as they move forward and (2) clearly documenting which agency will oversee cell-cultured seafood other than catfish. FDA and USDA partially concurred with our recommendation, directed to each agency, to more fully incorporate the seven leading practices for effective collaboration into the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. FDA stated that it concurred with the intent of incorporating the seven leading practices into the interagency agreement, and both agencies said that they are open to incorporating the practices into their development of the structure for joint oversight of cell-cultured meat. However, the agencies stated that they did not agree to revise the agreement at this time. FDA and USDA stated that the agreement is a general framework and that incorporating the leading practices would constitute an inappropriate level of detail. Instead, the agencies stated that they believe it would be most valuable to incorporate the leading practices into a more detailed joint framework or standard operating procedure they plan to issue. We appreciate the agencies\u2019 willingness to incorporate the leading practices for effective collaboration into their efforts. The March 2019 interagency agreement states that the agencies have the ability to modify it as needed and will review the agreement every 3 years to determine whether they should modify or terminate it. Therefore, the agencies are due to revisit the agreement in March 2022, if not sooner. Regarding the agencies\u2019 concern that incorporating the leading practices in the interagency agreement would add an inappropriate level of detail, we note that, as we state in our report, the existing agreement already partially incorporates each of the seven leading practices. We continue to believe that FDA and USDA should more fully incorporate the seven leading practices for effective collaboration into their interagency agreement for the joint oversight of cell-cultured meat. Developing a more detailed joint framework or standard operating procedure in accordance with the existing interagency agreement that incorporates those leading practices would meet the intent of our recommendation to improve the effectiveness of the agencies\u2019 collaboration. FDA and USDA also provided technical comments, which we incorporated as appropriate. As agreed with your office, unless you publicly announce its contents earlier, we plan no further distribution of this report until 30 days from its issue date. At that time, we will send copies of this report to the appropriate congressional committees, the Secretary of Health and Human Services, the Secretary of Agriculture, and other interested parties. In addition, the report is available at no charge on the GAO website at http:\/\/www.gao.gov. If you or your staff members have any questions regarding this report, please contact me at (202) 512-3841 or morriss@gao.gov. Contact points for our Offices of Congressional Relations and Public Affairs may be found on the last page of this report. GAO staff who made key contributions to this report are listed in appendix V. Our report (1) describes what is known about methods for commercially producing cell-cultured meat and (2) examines the extent to which the Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) are collaborating to provide regulatory oversight of cell-cultured meat. For both objectives, we conducted a literature review of journal and media articles from 2016 through 2019 to inform our understanding of cell- cultured meat, as well as regulatory activity related to cell-cultured meat in the United States and in other countries. Specifically, we conducted a review of scholarly and trade news from 2016 through July 2019 for specific terms related to cell-cultured meat and regulatory approaches. We conducted searches in more than 30 different academic and trade databases\u2014such as SCOPUS, Foodline, and ProQuest\u2019s Environmental Science Collection\u2014and identified studies relevant to our research objectives. In addition to these formal literature searches, we also asked agency officials and stakeholders to refer us to research articles and publications on cell-cultured meat. We also reviewed documentation from FDA and USDA, including the 2019 interagency agreement, existing memoranda of understanding between the two agencies, Federal Register notices about relevant public meetings, and press releases. We also reviewed documentation such as letters to regulators, presentation slides, and information on organizations\u2019 websites from the cell-cultured meat industry, conventional meat industry, and consumer safety groups, among others. We also interviewed officials from FDA and USDA and representatives of stakeholders from the cell-cultured meat industry and industry associations, conventional meat firms and industry associations, academia, food and consumer safety groups, and state and tribal public health associations, among others. We identified stakeholders to interview through consultation with agency officials and nonfederal stakeholders and through our review of literature. We conducted 17 interviews with representatives or researchers from: six cell-cultured meat firms or industry associations, four conventional meat firms or industry associations, two food and consumer safety groups, one state and tribal public health association, and one food law policy firm. Because this is a nongeneralizable sample, the results of these interviews do not represent the views of all stakeholders involved in or with an interest in the cell-cultured or conventional meat industries or federal regulation of cell-cultured meat. However, they illustrate the range of perspectives on these topics. We also attended public meetings and conferences and conducted site visits to several locations. Specifically, we attended FDA and USDA\u2019s public meeting in October 2018 and four conferences in 2019 that included content pertaining to food safety or cell-cultured meat. We conducted site visits to two conventional meat-processing facilities in Georgia, three cell-cultured meat firms in California, an academic cell- culturing laboratory in California, and a medical cell-culturing facility in Maryland. We identified facilities and laboratories to visit through our literature review, online research, and the assistance of agency officials and stakeholders, such as representatives from the cell-cultured meat and conventional meat industry. To describe what is known about the process for producing cell-cultured meat and potential commercial production methods, we also reviewed two sets of public comments submitted to FDA and USDA in association with the two 2018 public meetings pertaining to cell-cultured meat. These meetings were \u201cFoods Produced Using Animal Cell Culture Technology\u201d in July 2018 and \u201cUse of Cell Culture Technology to Develop Products Derived from Livestock and Poultry\u201d in October 2018. Public comments were submitted by members of the public; representatives from cell- cultured meat firms and industry associations, conventional meat companies and industry associations, food and consumer safety groups, and animal welfare groups; and environmental organizations, among others. We reviewed and analyzed all comments submitted to (1) FDA related to the July 2018 meeting and (2) FDA and USDA related to the October 2018 meeting. We also attended the October 2018 meeting and listened to agency officials\u2019 presentations and oral remarks made by stakeholders and members of the public. We shared our description of the process for making cell-cultured meat, and associated questions, with representatives from three cell-cultured meat firms and academic researchers at two universities for their technical review and incorporated revisions as appropriate. To examine the extent to which FDA and USDA are coordinating to provide regulatory oversight of cell-cultured meat, we identified actions they took to coordinate from July 2018 through April 2020. To identify these actions, we interviewed agency officials, emailed agency officials written questions, reviewed agency documentation and public announcements, and attended public events such as the October 2018 public meeting. We compared the agencies\u2019 interagency agreement and working groups with seven leading practices to enhance and sustain interagency collaboration. Specifically, two independent GAO reviewers assessed the degree to which agencies\u2019 actions incorporated these leading practices. A description of these leading practices and the associated issues to consider is in appendix II. We also assessed the agencies\u2019 actions against standards for internal control in the federal government, including standards related to communicating quality information. In this report, and in our past work, we define collaboration as any joint activity that is intended to produce more public value than could be produced when organizations act alone. We use the terms \u201ccoordination\u201d and \u201ccollaboration\u201d interchangeably in this report. For the purposes of our report, we define cell-cultured meat as food derived from animal cells that were grown in a controlled environment outside of the animal. We define cell-cultured seafood as a subcategory of cell-cultured meat. When referencing conventional meat, we are referring to food produced from the traditional method of slaughtering an animal, such as a cow, hog, chicken, or fish. When referencing seafood, we are referring to shellfish, sea fish, and freshwater fish served as food. We conducted this performance audit from October 2018 to April 2020 in accordance with generally accepted government auditing standards. Those standards require that we plan and perform the audit to obtain sufficient, appropriate evidence to provide a reasonable basis for our findings and conclusions based on our audit objectives. We believe that the evidence obtained provides a reasonable basis for our findings and conclusions based on our audit objectives. Appendix II: Key Issues to Consider for Implementing Interagency Collaborative Mechanisms Issues to consider Have short-term and long-term outcomes been clearly defined? Is there a way to track and monitor progress toward the short-term and long-term outcomes? Do participating agencies have collaboration-related competencies or performance standards against which individual performance can be evaluated? Do participating agencies have the means to recognize and reward accomplishments related to collaboration? What are the missions and organizational cultures of the participating agencies? What are the commonalities between the participating agencies\u2019 missions and cultures and what are some potential challenges? Have participating agencies developed ways for operating across agency boundaries? Have participating agencies agreed on common terminology and definitions? Has a lead agency or individual been identified? If leadership will be shared between one or more agencies, have roles and responsibilities been clearly identified and agreed upon? How will leadership be sustained over the long term? Have participating agencies clarified the roles and responsibilities of the participants? Have participating agencies articulated and agreed to a process for making and enforcing decisions? Have all relevant participants been included? Do the participants have: Full knowledge of the relevant resources in their agency? The ability to commit these resources? The ability to regularly attend activities of the collaborative mechanism? The appropriate knowledge, skills, and abilities to contribute? Developing and updating written guidance and agreements How will the collaborative mechanism be funded? If interagency funding is needed, is it permitted? If interagency funding is needed and permitted, is there a means to track funds in a standardized manner? How will the collaborative mechanism be staffed? Are there incentives available to encourage staff or agencies to participate? If relevant, do agencies have compatible technological systems? Have participating agencies developed online tools or other resources that facilitate joint interactions? If appropriate, have the participating agencies documented their agreement regarding how they will be collaborating? A written document can incorporate agreements reached in any or all of the following areas: Leadership Accountability Roles and responsibilities Resources Have participating agencies developed ways to continually update or monitor written agreements? Steve D. Morris, (202) 512-3841 or morriss@gao.gov In addition to the contact named above, Nico Sloss (Assistant Director), Angela Miles (Analyst-in-Charge), Sahar Angadjivand, Tim Bober, Kevin Bray, Colleen Candrl, Pin En Annie Chou, Tara Congdon, Heather Dowey, Kim Gianopoulos, Gina Hoover, Hayden Huang, Robert Lepzler, Serena Lo, David Lysy, Marc Meyer, Michael Polak, Danny Royer, Sara Sullivan, and Sarah Veale made key contributions to this report"""
para1 = x1+x2+x3+x4+x5
original_document = para1
#extractive summar
para2 = """FDA and USDA have responsibility for overseeing the safety of the food supply. General information about the process of making cell-cultured meat is available, but specific information about the technology being used and the eventual commercial production methods as well as the final products is not yet known. However, the technology and methods to commercially produce cell- cultured meat are still in development, and producers, regulators, and consumers do not yet have clarity on what these will entail. The composition of the final product is also not yet known. The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. Consequently, they have not finalized aspects of the technology and eventual commercial production methods to be used or the composition of the final product. As a result, certain information is not yet available to stakeholders\u2014including cell-cultured meat firms themselves, regulators, and the public\u2014about specific aspects of the technology and commercial production methods that will be used, such as the composition of the growth medium and of the final products. This lack of information results in unanswered questions about cell- cultured meat as it relates to the eventual technology and commercial production methods to be used and the composition of the final products. Some firms have developed prototypes of cell-cultured meat products as part of their research and development. In June 2019, FDA and USDA created three working groups to carry out the terms of the interagency agreement. FDA and USDA could more fully incorporate leading practices for collaboration in their interagency agreement and working groups. Developing and updating written guidance and agreements. However, the agreement does not describe how the agencies will track and monitor progress toward outcomes. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. Developing and updating written guidance and agreements. By more fully incorporating all seven leading practices for interagency collaboration early in the development of the three working groups, FDA and USDA could proactively minimize potential fragmentation and overlap in their oversight of cell-cultured meat, ensure consistency and efficient use of resources, and provide clarity to key stakeholders. While FDA and USDA officials told us they have decided who will oversee cell-cultured seafood, they have not formally announced or documented this decision, and some stakeholders have reported confusion or ambiguity about which agency will oversee cell-cultured seafood other than catfish. While FDA and USDA officials told us they had agreed that FDA would oversee cell-cultured seafood other than catfish, as of December 2019, the agencies had not formally announced or documented this agreement. Developing and updating written guidance and agreements is a leading practice for collaboration, as we have previously reported. Compounding this challenge is that specific information about key aspects of cell-cultured meat, such as the technology and production methods to be used as well as the composition of the products, is not yet known. FDA and USDA officials told us they have decided FDA will oversee most cell-cultured seafood, but the agencies have not formally documented this decision. We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. FDA concurred with two recommendations and partially concurred with one. USDA also concurred with two recommendations and partially concurred with one. FDA and USDA partially concurred with our recommendation, directed to each agency, to more fully incorporate the seven leading practices for effective collaboration into the agencies\u2019 interagency agreement for the joint oversight of cell-cultured meat. We continue to believe that FDA and USDA should more fully incorporate the seven leading practices for effective collaboration into their interagency agreement for the joint oversight of cell-cultured meat. GAO staff who made key contributions to this report are listed in appendix V. Our report (1) describes what is known about methods for commercially producing cell-cultured meat and (2) examines the extent to which the Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) are collaborating to provide regulatory oversight of cell-cultured meat. Developing and updating written guidance and agreements How will the collaborative mechanism be funded?"""
extractive_summary = para2
#abstractive summary
#para = """Multiple firms have produced cell-cultured meat as part of their research and development. These products appear likely to become available to consumers in coming years. FDA and USDA are the primary agencies responsible for overseeing the safety of the nation's food supply. However, some stakeholders have expressed concern about the agencies' oversight of cell-cultured meat amidst a fragmented federal food safety oversight system. GAO was asked to review federal oversight of cell-cultured meat. This report (1) describes what is known about methods for commercially producing cell-cultured meat, and (2) examines the extent to which FDA and USDA are collaborating to provide regulatory oversight of cell-cultured meat. GAO conducted a literature review; reviewed documentation from FDA, USDA, and stakeholder groups; analyzed public comments submitted to the agencies; compared agency efforts with leading practices for interagency collaboration; and conducted site visits to selected cell-cultured meat firms. General information about the process of making cell-cultured meat\u2014food products grown from the cells of livestock, poultry, and seafood\u2014is available. However, no company is commercially producing cell-cultured meat. Specific information about the technology being used, eventual commercial production methods, and composition of the final products is not yet known. The general process contains five phases: biopsy, cell banking, growth, harvest, and food processing (see figure). The technology and methods to be used for commercial production are still in development, and producers, regulators, and consumers do not have clarity about many specifics about the process and final product. For example, it is unclear whether production methods and products will use or contain genetically-engineered cells or medications such as antibiotics. The Food and Drug Administration (FDA) and U.S. Department of Agriculture (USDA) have begun collaborating on regulatory oversight of cell-cultured meat. For example, in 2019, the agencies signed an interagency agreement and created three working groups to carry out the terms of the agreement. However, the agreement and working groups could more fully incorporate practices to enhance and sustain collaboration, such as defining outcomes. For example, the agreement identifies the development of labeling principles as an outcome, but does not describe how the agencies will track and monitor progress toward this outcome, and the working groups identify a lead agency but not members' roles. Also, agency officials said they decided FDA would oversee cell-cultured seafood other than catfish, but they have not formally announced or documented this decision. Developing and updating written guidance and agreements is also a leading practice for interagency collaboration. By fully incorporating leading practices into their efforts to collaborate, the agencies could minimize potential overlap and fragmentation, use resources in a more efficient manner, and better ensure the public and other key stakeholders have clarity about the agencies' oversight responsibilities. GAO recommends that FDA and USDA more fully incorporate leading practices for effective collaboration in the agencies' interagency agreement. FDA and USDA partially concurred and indicated a willingness to incorporate these practices in a more detailed agreement, which would also meet the intent of the recommendations. The agencies concurred with the four other recommendations."""

results = compare_paragraphs(para1, para2)
for k, v in results.items():
    print(f"{k}: {v}")

from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = scorer.score(original_document, extractive_summary)
print("ROUGE Recall:", scores['rouge1'].recall)


  return forward_call(*args, **kwargs)
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return forward_call(*args, **kwargs)


coverage_score: 1.0
rouge1: 0.1444
rouge2: 0.142
rougeL: 0.1444
bertscore_precision: 0.8171
bertscore_recall: 0.8176
bertscore_f1: 0.8174
ROUGE Recall: 0.07783505154639175


#senetence level Rouge recall (not suitable for project

In [17]:
import spacy
from sentence_transformers import SentenceTransformer, util

# Load models
nlp = spacy.load("en_core_web_sm")
model = SentenceTransformer("all-MiniLM-L6-v2")

# Function to split text into sentences
def split_sentences(text):
    return [sent.text.strip() for sent in nlp(text).sents if sent.text.strip()]

# Function to compute sentence-level recall
def sentence_level_recall(original_text, extractive_summary, threshold=0.75):
    original_sents = split_sentences(original_text)
    extractive_sents = split_sentences(extractive_summary)

    if not original_sents or not extractive_sents:
        return 0.0

    emb_orig = model.encode(original_sents, convert_to_tensor=True)
    emb_ext = model.encode(extractive_sents, convert_to_tensor=True)

    matched = 0
    for i in range(len(original_sents)):
        sims = util.cos_sim(emb_orig[i], emb_ext)[0]
        if sims.max().item() >= threshold:
            matched += 1

    recall = matched / len(original_sents)
    return round(recall, 4)

# Example usage
#original_document = """FDA and USDA are responsible for the safety of the food supply. They are overseeing the development of lab-grown meat. However, commercial methods are not finalized. Stakeholders lack clarity on technology, methods, and final products."""
#extractive_summary = """FDA and USDA oversee food safety. They are monitoring lab-grown meat development. Final product composition is unclear."""

recall_score = sentence_level_recall(original_document, extractive_summary)
print(f"Sentence-level recall: {recall_score}")


  return forward_call(*args, **kwargs)


Sentence-level recall: 0.2581


#QA based evaluation metric (Not suitable for our project)

In [20]:
import spacy
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer, util
import torch
from tqdm import tqdm

# Load models
nlp = spacy.load("en_core_web_sm")
qg_tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-small-qg-hl")
qg_model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-small-qg-hl")
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
qg_model = qg_model.to(device)

# Sentence splitter
def split_sentences(text):
    return [sent.text.strip() for sent in nlp(text).sents if sent.text.strip()]

# Chunk sentences to avoid token overflow
def chunk_sentences(sentences, max_tokens=512):
    chunks, current_chunk, token_count = [], [], 0
    for sent in sentences:
        sent_tokens = len(qg_tokenizer.tokenize(sent))
        if token_count + sent_tokens <= max_tokens:
            current_chunk.append(sent)
            token_count += sent_tokens
        else:
            if current_chunk:
                chunks.append(current_chunk)
            current_chunk = [sent]
            token_count = sent_tokens
    if current_chunk:
        chunks.append(current_chunk)
    return chunks

# Generate QA from a single chunk
def generate_qas_from_chunk(chunk_sentences):
    qas = []
    for sent in tqdm(chunk_sentences, desc="Generating QA pairs"):
        input_text = f"generate question: {sent} </s>"
        inputs = qg_tokenizer(input_text, return_tensors="pt", truncation=True).to(device)
        outputs = qg_model.generate(**inputs, max_length=64)
        question = qg_tokenizer.decode(outputs[0], skip_special_tokens=True)
        qas.append({"question": question, "answer": sent})
    return qas

# Deduplicate questions
def deduplicate_qas(qas):
    seen = set()
    unique_qas = []
    for qa in qas:
        q = qa["question"].strip().lower()
        if q not in seen:
            seen.add(q)
            unique_qas.append(qa)
    return unique_qas

# Rank questions by importance of answer sentences
def rank_questions_by_importance(qas, full_text, top_k=10):
    doc_embedding = embedding_model.encode(full_text, convert_to_tensor=True)
    scores = []
    for qa in qas:
        ans_embedding = embedding_model.encode(qa["answer"], convert_to_tensor=True)
        score = float(util.cos_sim(ans_embedding, doc_embedding))
        scores.append((score, qa))
    top_qas = sorted(scores, key=lambda x: x[0], reverse=True)[:top_k]
    return [qa for _, qa in top_qas]

# Full pipeline
def generate_top_questions(text, top_k=10):
    sentences = split_sentences(text)
    chunks = chunk_sentences(sentences)
    all_qas = []
    for chunk in chunks:
        qas = generate_qas_from_chunk(chunk)
        all_qas.extend(qas)
    deduped = deduplicate_qas(all_qas)
    ranked = rank_questions_by_importance(deduped, text, top_k=top_k)
    return ranked

# Example usage
# original_document = "your long document string here"
qa_pairs = generate_top_questions(extractive_summary, top_k=10)

# Print top 10 important QA pairs
for i, qa in enumerate(qa_pairs, 1):
    print(f"{i}. Q: {qa['question']}\n   A: {qa['answer']}\n")


Generating QA pairs: 100%|██████████| 18/18 [00:13<00:00,  1.34it/s]
Generating QA pairs: 100%|██████████| 12/12 [00:06<00:00,  1.98it/s]
  return forward_call(*args, **kwargs)


1. Q: What is the name of the meat that is produced at a commercial scale?
   A: The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known.

2. Q: What is the name of the meat that is still in development?
   A: We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report.

3. Q: What is the process for making cell-cultured meat?
   A: The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing.

4. Q: What is not known about the process of making cell-cultured meat?
   A: General information about the process of making cell-cultured meat is available, but specific information about the techno

In [22]:
# Install required packages
#!pip install transformers rouge-score sentence-transformers --quiet

from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from rouge_score import rouge_scorer

# Load models
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# --- Sample Inputs ---
#extractive_summary = """FDA and USDA are the two major regulatory bodies overseeing the safety of food products, including cell-cultured meat. These meats are produced from animal cells without slaughtering livestock. In June 2019, FDA and USDA signed an agreement to collaborate on regulating this industry."""
extractive_summary=extractive_summary
qa_pairs = qa_pairs


# --- Evaluation ---
def evaluate_qa(extractive_summary, qa_pairs):
    total = len(qa_pairs)
    rouge_scores = []
    semantic_scores = []

    for pair in qa_pairs:
        try:
            pred = qa_model(question=pair['question'], context=extractive_summary)
            predicted_answer = pred['answer'].strip()

            # ROUGE
            r_score = rouge.score(pair['answer'], predicted_answer)['rougeL'].fmeasure
            rouge_scores.append(r_score)

            # Semantic similarity
            emb_gt = semantic_model.encode(pair['answer'], convert_to_tensor=True)
            emb_pred = semantic_model.encode(predicted_answer, convert_to_tensor=True)
            sem_sim = util.cos_sim(emb_gt, emb_pred).item()
            semantic_scores.append(sem_sim)

            print(f"Q: {pair['question']}")
            print(f"GT: {pair['answer']} | Pred: {predicted_answer}")
            print(f"→ ROUGE-L: {r_score:.4f} | BERTSim: {sem_sim:.4f}\n")

        except Exception as e:
            print(f"Failed on: {pair['question']}\n{e}\n")

    print("=== Overall Evaluation ===")
    print(f"Avg ROUGE-L: {sum(rouge_scores)/total:.4f}")
    print(f"Avg Semantic Similarity: {sum(semantic_scores)/total:.4f}")

# --- Run Evaluation ---
evaluate_qa(abstractive_summary, qa_pairs)


Device set to use cpu
  return forward_call(*args, **kwargs)


Q: What is the name of the meat that is produced at a commercial scale?
GT: The technology to produce cell-cultured meat at a commercial scale is still in development, and information about the methods to be used for commercial production and the composition of the final product are not yet known. | Pred: cell-cultured meat
→ ROUGE-L: 0.1500 | BERTSim: 0.7800

Q: What is the name of the meat that is still in development?
GT: We agree that the technology to produce cell-cultured meat is still in development and that information about the commercial production methods and composition of the final product are not yet known, as we state in our report. | Pred: catfish
→ ROUGE-L: 0.0000 | BERTSim: 0.0852

Q: What is the process for making cell-cultured meat?
GT: The general process for making cell-cultured meat contains five phases: biopsy, cell banking, growth, harvest, and food processing. | Pred: interagency collaboration
→ ROUGE-L: 0.0000 | BERTSim: 0.0370

Q: What is not known about the

#Factual consistency metric - Entailment based evaluation using roberta-large-mnli

In [40]:
# --- 1. Setup and Imports ---
# Make sure you have these installed:
# pip install transformers torch spacy
# python -m spacy download en_core_web_sm

import spacy
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from scipy.special import softmax # Used to convert raw model outputs into probabilities
import numpy as np

# Load spaCy model for sentence segmentation globally once
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    # This block handles the case where the spaCy model isn't downloaded yet.
    # It attempts to download it automatically.
    print("SpaCy model 'en_core_web_sm' not found. Downloading it...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# --- 2. Load Pre-trained NLI Model Globally Once ---
# This is the most critical part: loading the NLI model from Hugging Face.
# 'roberta-large-mnli' is a powerful NLI model.
nli_model_name = "roberta-large-mnli"
tokenizer = AutoTokenizer.from_pretrained(nli_model_name)
model = AutoModelForSequenceClassification.from_pretrained(nli_model_name)

# Ensure the model uses the GPU if available for faster processing.
# If no GPU, it defaults to CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval() # Set the model to evaluation mode (important for inference)

# This map defines how the model's numerical output labels (0, 1, 2)
# correspond to the NLI relationships (contradiction, entailment, neutral).
# This order is standard for MNLI models.
label_map = {0: "contradiction", 1: "entailment", 2: "neutral"}


# --- 3. Helper Function for NLI Prediction ---

def get_nli_prediction(premise: str, hypothesis: str) -> dict:
    """
    Performs Natural Language Inference (NLI) using the loaded model.
    It takes a premise (source text) and a hypothesis (summary sentence)
    and predicts their relationship (entailment, contradiction, neutral).
    Includes a warning if the input is truncated.
    """
    # Tokenize the premise and hypothesis separately to get their individual lengths
    premise_tokens = tokenizer.tokenize(premise)
    hypothesis_tokens = tokenizer.tokenize(hypothesis)

    # Calculate total length needed for both, plus special tokens (CLS, SEP, SEP)
    # The actual number of special tokens depends on the tokenizer and task,
    # but 3 is typical for [CLS] premise [SEP] hypothesis [SEP].
    required_length = len(premise_tokens) + len(hypothesis_tokens) + tokenizer.num_special_tokens_to_add(pair=True) # More robust way

    truncated_warning = False
    if required_length > 512: # Check against the model's max_length
        truncated_warning = True

    try:
        # Tokenize and prepare inputs for the NLI model.
        # max_length=512 is the typical limit for BERT/RoBERTa models.
        # truncation=True ensures inputs are cut if too long.
        inputs = tokenizer.encode_plus(
            premise,
            hypothesis,
            add_special_tokens=True,
            max_length=512,
            truncation=True, # This will trigger the truncation if needed
            return_tensors='pt' # Return PyTorch tensors
        )
        # Move inputs to the same device as the model (CPU/GPU)
        inputs = {key: val.to(device) for key, val in inputs.items()}

        with torch.no_grad(): # Disable gradient calculation for faster inference
            outputs = model(**inputs)
            logits = outputs.logits # Raw output scores from the model
            # Convert logits to probabilities using softmax
            scores = softmax(logits.cpu().numpy(), axis=1)[0]

        # Get the label with the highest probability
        predicted_label_id = np.argmax(scores)
        predicted_label = label_map[predicted_label_id]
        confidence_score = scores[predicted_label_id]

        return {
            "label": predicted_label,
            "score": float(confidence_score),
            "truncated": truncated_warning # Add the warning flag to the result
        }
    except Exception as e:
        # Error handling for potential issues during prediction
        print(f"Warning: Error during NLI prediction for hypothesis '{hypothesis[:50]}...': {e}")
        return {"label": "error", "score": 0.0, "truncated": False} # Default to False on error

# --- 4. Main Evaluation Function ---

def evaluate_summary_nli(source_document: str, abstractive_summary: str) -> dict:
    """
    Evaluates factual consistency of an abstractive summary against a source document
    using an NLI model.
    Each sentence in the summary is treated as a hypothesis, and the
    entire source document serves as the premise.

    Args:
        source_document: The full original text (premise for NLI).
        abstractive_summary: The generated summary (sentences are hypotheses).

    Returns:
        A dictionary containing metrics (consistency, contradiction, neutrality rates)
        and detailed NLI results for each summary sentence.
        Returns empty metrics if the summary is empty.
    """
    # 1. Segment summary into sentences using spaCy.
    # .strip() removes leading/trailing whitespace, and the filter removes empty sentences.
    summary_sentences = [sent.text.strip() for sent in nlp(abstractive_summary).sents if sent.text.strip()]

    if not summary_sentences:
        print("Warning: Summary is empty or contains no valid sentences. Returning empty metrics.")
        return {
            "metrics": {
                "consistency_rate": 0.0,
                "contradiction_rate": 0.0,
                "neutrality_rate": 0.0,
                "total_sentences": 0
            },
            "details": []
        }

    nli_results = []
    consistency_count = 0
    contradiction_count = 0
    neutral_count = 0
    error_count = 0
    truncated_count = 0 # New counter for truncated inputs

    # Print progress for long processes
    print(f"Evaluating {len(summary_sentences)} summary sentences...")

    for i, hypothesis in enumerate(summary_sentences):
        # Print progress for long summaries
        print(f"Processing sentence {i+1}/{len(summary_sentences)}: '{hypothesis[:70]}...'")
        result = get_nli_prediction(source_document, hypothesis)
        nli_results.append({
            "summary_sentence": hypothesis,
            "nli_label": result["label"],
            "nli_score": result["score"],
            "truncated": result["truncated"] # Add truncated info to details
        })

        # Count occurrences of each NLI label.
        if result["label"] == "entailment":
            consistency_count += 1
        elif result["label"] == "contradiction":
            contradiction_count += 1
        elif result["label"] == "neutral":
            neutral_count += 1
        else: # Account for any errors during prediction
            error_count += 1

        if result["truncated"]: # Increment truncated count
            truncated_count += 1

    total_sentences = len(summary_sentences)
    # Calculate rates for each NLI category.
    consistency_rate = consistency_count / total_sentences if total_sentences > 0 else 0
    contradiction_rate = contradiction_count / total_sentences if total_sentences > 0 else 0
    neutrality_rate = neutral_count / total_sentences if total_sentences > 0 else 0

    # Print summary of results, including truncation warning
    print("\n--- NLI Evaluation Summary ---")
    print(f"Total Summary Sentences: {total_sentences}")
    print(f"Entailment (Consistent): {consistency_count} ({consistency_rate:.2%})")
    print(f"Contradiction (Hallucinated): {contradiction_count} ({contradiction_rate:.2%})")
    print(f"Neutral (Unsupported/Potential Hallucination): {neutral_count} ({neutrality_rate:.2%})")
    if error_count > 0:
        print(f"Errors during NLI processing: {error_count}")
    if truncated_count > 0:
        print(f"WARNING: {truncated_count} out of {total_sentences} sentences had their source document truncated during NLI input preparation (premise + hypothesis exceeded 512 tokens). This might affect accuracy, especially for 'Neutral' labels.")


    # Return the aggregated metrics and detailed results.
    return {
        "metrics": {
            "consistency_rate": consistency_rate,
            "contradiction_rate": contradiction_rate,
            "neutrality_rate": neutrality_rate,
            "total_sentences": total_sentences,
            "truncated_inputs_count": truncated_count # Add this to metrics for easy access
        },
        "details": nli_results
    }

# How to use these functions in your project:
# from your_module_name import evaluate_summary_nli
#
#source_text = "Your long source document content goes here."
# generated_summary = "The abstractive summary generated by your model."
#
nli_evaluation_results = evaluate_summary_nli(extractive_summary, abstractive_summary)
#
# # Access the metrics:
# print(f"Consistency Rate: {nli_evaluation_results['metrics']['consistency_rate']:.4f}")
# print(f"Contradiction Rate: {nli_evaluation_results['metrics']['contradiction_rate']:.4f}")
# print(f"Neutrality Rate: {nli_evaluation_results['metrics']['neutrality_rate']:.4f}")
# if nli_evaluation_results['metrics']['truncated_inputs_count'] > 0:
#     print(f"Warning: {nli_evaluation_results['metrics']['truncated_inputs_count']} inputs were truncated.")
#
# # Access detailed results for each sentence, including truncation info:
# for item in nli_evaluation_results['details']:
#     print(f"Summary Sentence: {item['summary_sentence']}")
#     print(f"  NLI Label: {item['nli_label']} (Score: {item['nli_score']:.2f})")
#     if item['truncated']:
#         print("  NOTE: This input was truncated.")
#     print("-" * 20)

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Token indices sequence length is longer than the specified maximum sequence length for this model (891 > 512). Running this sequence through the model will result in indexing errors


Evaluating 23 summary sentences...
Processing sentence 1/23: 'Multiple firms have produced cell-cultured meat as part of their resea...'
Processing sentence 2/23: 'These products appear likely to become available to consumers in comin...'
Processing sentence 3/23: 'FDA and USDA are the primary agencies responsible for overseeing the s...'
Processing sentence 4/23: 'However, some stakeholders have expressed concern about the agencies' ...'
Processing sentence 5/23: 'GAO was asked to review federal oversight of cell-cultured meat....'
Processing sentence 6/23: 'This report (1) describes what is known about methods for commercially...'
Processing sentence 7/23: 'GAO conducted a literature review; reviewed documentation from FDA, US...'
Processing sentence 8/23: 'General information about the process of making cell-cultured meat—foo...'
Processing sentence 9/23: 'However, no company is commercially producing cell-cultured meat....'
Processing sentence 10/23: 'Specific information about the

#Factual consistency metric - Entailment based evaluation using roberta-large-mnli ( eliminating truncation problem of 512 tokens

In [38]:
# --- 1. Setup and Imports ---
# Make sure you have these installed:
# pip install transformers torch spacy sentence-transformers
# python -m spacy download en_core_web_sm

import spacy
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from scipy.special import softmax # Used to convert raw model outputs into probabilities
import numpy as np
from sentence_transformers import SentenceTransformer, util # NEW: for semantic retrieval

# Load spaCy model for sentence segmentation globally once
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    # This block handles the case where the spaCy model isn't downloaded yet.
    # It attempts to download it automatically.
    print("SpaCy model 'en_core_web_sm' not found. Downloading it...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# --- 2. Load Pre-trained NLI Model Globally Once ---
# This is the most critical part: loading the NLI model from Hugging Face.
# 'roberta-large-mnli' is a powerful NLI model.
nli_model_name = "roberta-large-mnli"
nli_tokenizer = AutoTokenizer.from_pretrained(nli_model_name) # Renamed to nli_tokenizer
nli_model = AutoModelForSequenceClassification.from_pretrained(nli_model_name) # Renamed to nli_model

# NEW: Load Sentence Transformer Model for semantic similarity
embedding_model_name = 'all-MiniLM-L6-v2' # Good balance of speed and performance.
embedding_model = SentenceTransformer(embedding_model_name)

# Ensure the models use the GPU if available for faster processing.
# If no GPU, it defaults to CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nli_model.to(device)
embedding_model.to(device) # Move embedding model to device
nli_model.eval() # Set the model to evaluation mode (important for inference)
embedding_model.eval() # Set embedding model to evaluation mode

# This map defines how the model's numerical output labels (0, 1, 2)
# correspond to the NLI relationships (contradiction, entailment, neutral).
# This order is standard for MNLI models.
label_map = {0: "contradiction", 1: "entailment", 2: "neutral"}

# Configuration for retrieval and re-evaluation:
NUM_RETRIEVED_CHUNKS = 5 # Number of top similar sentences to retrieve in the first pass
HARD_CHUNK_SIZE_TOKENS = 400 # Token size for brute-force chunking in the second pass

# --- 3. Helper Functions ---
# (Modified get_nli_prediction, and added chunk_document_by_sentences, chunk_text_by_tokens, retrieve_relevant_chunks)

def chunk_document_by_sentences(document: str) -> list[str]:
    """Splits a document into a list of sentences."""
    return [sent.text.strip() for sent in nlp(document).sents if sent.text.strip()]

def chunk_text_by_tokens(text: str, max_chunk_tokens: int, overlap: int = 50) -> list[str]:
    """
    Splits a long text into chunks of a maximum token size with optional overlap.
    Useful for brute-force checking.
    """
    tokenized_text = nli_tokenizer.encode(text, add_special_tokens=False)
    chunks = []
    start_index = 0
    while start_index < len(tokenized_text):
        end_index = min(start_index + max_chunk_tokens, len(tokenized_text))
        chunk_tokens = tokenized_text[start_index:end_index]
        chunk_text = nli_tokenizer.decode(chunk_tokens, skip_special_tokens=True)
        chunks.append(chunk_text)
        if end_index == len(tokenized_text):
            break
        start_index += max_chunk_tokens - overlap # Move to the next chunk with overlap
    return chunks

def retrieve_relevant_chunks(
    summary_sentence: str,
    source_sentences: list[str],
    num_chunks: int = NUM_RETRIEVED_CHUNKS
) -> str:
    """
    Retrieves the most semantically relevant chunks (sentences) from the source
    document for a given summary sentence.
    """
    if not source_sentences:
        return ""

    try:
        summary_embedding = embedding_model.encode(summary_sentence, convert_to_tensor=True, device=device)
        source_embeddings = embedding_model.encode(source_sentences, convert_to_tensor=True, device=device)
    except RuntimeError as e:
        print(f"Warning: Sentence embedding failed, likely due to device memory. Falling back to CPU. Error: {e}")
        summary_embedding = embedding_model.encode(summary_sentence, convert_to_tensor=True, device='cpu')
        source_embeddings = embedding_model.encode(source_sentences, convert_to_tensor=True, device='cpu')


    # Calculate cosine similarities
    cosine_scores = util.cos_sim(summary_embedding, source_embeddings)[0]

    # Get the indices of the top N most similar source sentences
    top_results_indices = torch.topk(cosine_scores, k=min(num_chunks, len(source_sentences)))[1].cpu().numpy()

    # Concatenate the retrieved sentences to form the premise
    # Sort indices to maintain original document order for retrieved chunks, which can be important for NLI
    top_results_indices.sort()
    retrieved_premise = " ".join([source_sentences[i] for i in top_results_indices])

    return retrieved_premise

def get_nli_prediction(premise: str, hypothesis: str) -> dict:
    """
    Performs Natural Language Inference (NLI) using the loaded model.
    It takes a premise (source text) and a hypothesis (summary sentence)
    and predicts their relationship (entailment, contradiction, neutral).
    Includes a warning if the input is truncated.
    """
    truncated_warning = False
    # Check if combined length might exceed model's max_length
    # Rough estimate to trigger warning early based on tokenization
    # More robust check is inside tokenizer.encode_plus (truncation=True)
    if len(nli_tokenizer.tokenize(premise)) + len(nli_tokenizer.tokenize(hypothesis)) + nli_tokenizer.num_special_tokens_to_add(pair=True) > nli_tokenizer.model_max_length:
         truncated_warning = True

    try:
        # Tokenize and prepare inputs for the NLI model.
        inputs = nli_tokenizer.encode_plus( # Using nli_tokenizer
            premise,
            hypothesis,
            add_special_tokens=True,
            max_length=nli_tokenizer.model_max_length, # Using nli_tokenizer's max_length property (typically 512)
            truncation=True, # This will trigger the truncation if needed
            return_tensors='pt' # Return PyTorch tensors
        )
        # Move inputs to the same device as the model (CPU/GPU)
        inputs = {key: val.to(device) for key, val in inputs.items()}

        with torch.no_grad(): # Disable gradient calculation for faster inference
            outputs = nli_model(**inputs) # Using nli_model
            logits = outputs.logits # Raw output scores from the model
            # Convert logits to probabilities using softmax
            scores = softmax(logits.cpu().numpy(), axis=1)[0]

        # Get the label with the highest probability
        predicted_label_id = np.argmax(scores)
        predicted_label = label_map[predicted_label_id]
        confidence_score = scores[predicted_label_id]

        return {
            "label": predicted_label,
            "score": float(confidence_score),
            "truncated": truncated_warning # Add the warning flag to the result
        }
    except Exception as e:
        # Error handling for potential issues during prediction
        print(f"Warning: Error during NLI prediction for hypothesis '{hypothesis[:50]}...': {e}")
        return {"label": "error", "score": 0.0, "truncated": False} # Default to False on error

# --- 4. Main Evaluation Function ---

def evaluate_summary_nli(source_document: str, abstractive_summary: str) -> dict:
    """
    Evaluates factual consistency of an abstractive summary against a source document
    using an NLI model, incorporating semantic retrieval (first pass) and
    brute-force chunking (second pass for Neutral/Contradiction).
    """
    summary_sentences = chunk_document_by_sentences(abstractive_summary)
    source_sentences_for_retrieval = chunk_document_by_sentences(source_document) # For first pass retrieval
    source_chunks_for_brute_force = chunk_text_by_tokens(source_document, HARD_CHUNK_SIZE_TOKENS) # For second pass

    if not summary_sentences:
        print("Warning: Summary is empty or contains no valid sentences. Returning empty metrics.")
        return {
            "metrics": {
                "consistency_rate": 0.0, "contradiction_rate": 0.0,
                "neutrality_rate": 0.0, "total_sentences": 0, "truncated_inputs_count": 0
            },
            "details": [], "categorized_sentences": {}
        }

    # Store results from the first pass
    first_pass_results = []

    # Store final aggregated results
    final_nli_results = []
    consistency_count = 0
    contradiction_count = 0
    neutral_count = 0
    error_count = 0
    truncated_count = 0 # Truncation count (mainly for first pass, less common in second)

    entailment_sentences = []
    contradiction_sentences = []
    neutral_sentences = []
    error_sentences = []

    print(f"\n--- Starting NLI Evaluation (Two-Pass with Semantic Retrieval) ---")
    print(f"Total Summary Sentences to Process: {len(summary_sentences)}")
    print(f"Source Document broken into {len(source_sentences_for_retrieval)} sentences for retrieval.")
    print(f"Source Document also broken into {len(source_chunks_for_brute_force)} token chunks for brute-force re-evaluation.")
    print(f"First Pass: Retrieving top {NUM_RETRIEVED_CHUNKS} relevant source sentences for each summary sentence.")
    print("-" * 70)

    # --- FIRST PASS: Semantic Retrieval ---
    print("\n--- FIRST PASS: Evaluating all sentences with semantic retrieval ---")
    for i, hypothesis in enumerate(summary_sentences):
        print(f"\nProcessing Sentence {i+1}/{len(summary_sentences)} (First Pass):")
        print(f"  Summary Sentence: \"{hypothesis}\"")

        retrieved_premise = retrieve_relevant_chunks(
            summary_sentence=hypothesis,
            source_sentences=source_sentences_for_retrieval,
            num_chunks=NUM_RETRIEVED_CHUNKS
        )

        if not retrieved_premise:
            print("  Warning: No relevant source chunks found for this summary sentence. Defaulting to Neutral.")
            result = {"label": "neutral", "score": 0.0, "truncated": False}
        else:
            result = get_nli_prediction(retrieved_premise, hypothesis)


        first_pass_results.append({
            "summary_sentence": hypothesis,
            "nli_label": result["label"],
            "nli_score": result["score"],
            "truncated": result["truncated"]
        })

        if result["truncated"]:
            truncated_count += 1
            print("  WARNING: The RETRIEVED PREMISE + hypothesis still exceeded 512 tokens and was truncated in First Pass.")

        print(f"  First Pass NLI Result: {result['label'].upper()} (Confidence: {result['score']:.2f})")

    print("\n--- FIRST PASS COMPLETE ---")
    print("-" * 70)

    # --- SECOND PASS: Brute-force for Neutral/Contradiction ---
    print("\n--- SECOND PASS: Re-evaluating Neutral/Contradiction sentences with full source chunking ---")
    re_evaluated_count = 0
    for i, first_pass_result in enumerate(first_pass_results):
        hypothesis = first_pass_result["summary_sentence"]
        initial_label = first_pass_result["nli_label"]
        final_label = initial_label # Assume initial label is final until re-evaluated

        if initial_label in ["neutral", "contradiction"]:
            re_evaluated_count += 1
            print(f"\nRe-evaluating Sentence {i+1}/{len(summary_sentences)} (Initial Label: {initial_label.upper()}):")
            print(f"  Summary Sentence: \"{hypothesis}\"")

            found_entailment_in_second_pass = False
            best_contradiction_score = 0.0
            second_pass_contradiction_count = 0
            second_pass_neutral_count = 0

            # Iterate through all source chunks
            for chunk_idx, source_chunk in enumerate(source_chunks_for_brute_force):
                print(f"    Checking against source chunk {chunk_idx+1}/{len(source_chunks_for_brute_force)}...")
                chunk_result = get_nli_prediction(source_chunk, hypothesis)

                # Check for truncation in the second pass (should be rare if HARD_CHUNK_SIZE_TOKENS is chosen well)
                if chunk_result["truncated"]:
                     print(f"    WARNING: Chunk {chunk_idx+1} + hypothesis truncated in second pass.")


                if chunk_result["label"] == "entailment":
                    found_entailment_in_second_pass = True
                    final_label = "entailment"
                    print(f"    -> Found ENTAILMENT with chunk {chunk_idx+1}. Bypassing further checks for this sentence.")
                    break # Stop processing chunks for this hypothesis if entailment is found
                elif chunk_result["label"] == "contradiction":
                    second_pass_contradiction_count += 1
                    if chunk_result["score"] > best_contradiction_score:
                        best_contradiction_score = chunk_result["score"]
                elif chunk_result["label"] == "neutral":
                    second_pass_neutral_count += 1

            if found_entailment_in_second_pass:
                # Label is already "entailment"
                pass # The final_label was already set to "entailment"
            elif second_pass_contradiction_count > 0 and second_pass_neutral_count == 0:
                # If only contradictions or primarily strong contradictions found and no entailment
                final_label = "contradiction" # Keep as contradiction if it was contradiction, or upgrade from neutral
                print(f"    -> Aggregated: Final label is CONTRADICTION (found {second_pass_contradiction_count} contradictions).")
            else:
                # If no entailment was found across all chunks, and not overwhelmingly contradictory, it remains neutral.
                # Or, if it was initially contradiction but found some neutral in 2nd pass, keep as contradiction if stronger.
                print(f"    -> Aggregated: No ENTAILMENT found. Retaining initial label as {initial_label.upper()}.")
                final_label = initial_label # Revert to initial_label if no stronger evidence

        # Add the final label to the final results list
        final_nli_results.append({
            "summary_sentence": hypothesis,
            "nli_label": final_label,
            "nli_score": first_pass_result["nli_score"], # Keeping first pass score for now, could average or take best of 2nd pass
            "truncated_first_pass": first_pass_result["truncated"] # Keep track of original truncation warning
        })

        # Update global counts based on FINAL labels
        if final_label == "entailment":
            consistency_count += 1
            entailment_sentences.append(hypothesis)
        elif final_label == "contradiction":
            contradiction_count += 1
            contradiction_sentences.append(hypothesis)
        elif final_label == "neutral":
            neutral_count += 1
            neutral_sentences.append(hypothesis)
        elif final_label == "error": # Errors should ideally be handled within get_nli_prediction
            error_count += 1
            error_sentences.append(hypothesis)

    print("\n--- SECOND PASS COMPLETE ---")
    print(f"Total sentences re-evaluated in second pass: {re_evaluated_count}")
    print("-" * 70)


    total_sentences = len(summary_sentences)
    consistency_rate = consistency_count / total_sentences if total_sentences > 0 else 0
    contradiction_rate = contradiction_count / total_sentences if total_sentences > 0 else 0
    neutrality_rate = neutral_count / total_sentences if total_sentences > 0 else 0

    print("\n" + "="*70)
    print("--- FINAL NLI Evaluation Summary (Aggregated Two-Pass Results) ---")
    print(f"Total Summary Sentences Evaluated: {total_sentences}")
    print(f"Entailment (Consistent): {consistency_count} ({consistency_rate:.2%})")
    print(f"Contradiction (Hallucinated): {contradiction_count} ({contradiction_rate:.2%})")
    print(f"Neutral (Unsupported/Potential Hallucination): {neutral_count} ({neutrality_rate:.2%})")
    if error_count > 0:
        print(f"Errors during NLI processing: {error_count}")
    if truncated_count > 0:
        print(f"GLOBAL WARNING: {truncated_count} inputs were truncated in the FIRST PASS (semantic retrieval). This is less likely in the second pass if HARD_CHUNK_SIZE_TOKENS is small enough.")
    print("="*70)

    print("\n--- Detailed Sentence Categorization (Final Labels) ---")

    if entailment_sentences:
        print("\n--- Entailed (Consistent) Sentences ---")
        for i, sentence in enumerate(entailment_sentences):
            print(f"{i+1}. \"{sentence}\"")
    else:
        print("\nNo Entailed (Consistent) Sentences found.")

    if contradiction_sentences:
        print("\n--- Contradictory (Hallucinated) Sentences ---")
        for i, sentence in enumerate(contradiction_sentences):
            print(f"{i+1}. \"{sentence}\"")
    else:
        print("\nNo Contradictory (Hallucinated) Sentences found.")

    if neutral_sentences:
        print("\n--- Neutral (Unsupported/Potential Hallucination) Sentences ---")
        for i, sentence in enumerate(neutral_sentences):
            print(f"{i+1}. \"{sentence}\"")
    else:
        print("\nNo Neutral (Unsupported/Potential Hallucination) Sentences found.")

    if error_sentences:
        print("\n--- Error Processing Sentences ---")
        for i, sentence in enumerate(error_sentences):
            print(f"{i+1}. \"{sentence}\"")
    else:
        print("\nNo Error Processing Sentences.")

    print("\n" + "="*70 + "\n")

    return {
        "metrics": {
            "consistency_rate": consistency_rate,
            "contradiction_rate": contradiction_rate,
            "neutrality_rate": neutrality_rate,
            "total_sentences": total_sentences,
            "truncated_inputs_count": truncated_count # This reflects first pass truncation
        },
        "details": final_nli_results, # This now contains final labels
        "categorized_sentences": {
            "entailment": entailment_sentences,
            "contradiction": contradiction_sentences,
            "neutral": neutral_sentences,
            "error": error_sentences
        }
    }

print("\n--- Evaluating ABSTRACTIVE Summary ---")
evaluation_results_abstractive = evaluate_summary_nli(extractive_summary, abstractive_summary)


Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).



--- Evaluating ABSTRACTIVE Summary ---


Token indices sequence length is longer than the specified maximum sequence length for this model (891 > 512). Running this sequence through the model will result in indexing errors



--- Starting NLI Evaluation (Two-Pass with Semantic Retrieval) ---
Total Summary Sentences to Process: 23
Source Document broken into 30 sentences for retrieval.
Source Document also broken into 3 token chunks for brute-force re-evaluation.
First Pass: Retrieving top 5 relevant source sentences for each summary sentence.
----------------------------------------------------------------------

--- FIRST PASS: Evaluating all sentences with semantic retrieval ---

Processing Sentence 1/23 (First Pass):
  Summary Sentence: "Multiple firms have produced cell-cultured meat as part of their research and development."
  First Pass NLI Result: NEUTRAL (Confidence: 0.97)

Processing Sentence 2/23 (First Pass):
  Summary Sentence: "These products appear likely to become available to consumers in coming years."
  First Pass NLI Result: ENTAILMENT (Confidence: 1.00)

Processing Sentence 3/23 (First Pass):
  Summary Sentence: "FDA and USDA are the primary agencies responsible for overseeing the safe

#installation of libraries

In [39]:
#!pip install rouge_score
#!pip install bert_score