In [2]:
# Install necessary libraries
import os
os.system("pip install langchain==0.3.12 rouge_score langchain-community transformers torch sentence-transformers datasets evaluate")
import asyncio
from transformers import pipeline
from langchain.agents import initialize_agent, Tool
from langchain_community.llms import HuggingFacePipeline
from langchain.tools import tool
from langchain.agents import AgentType
from datasets import load_dataset
import torch  # Import torch for detecting GPU

# 1. Load the Dataset
print("Loading the dataset...")
ds = load_dataset("Ka4on/radiology")

# Shuffle the dataset for randomness
shuffled_train = ds['train'].shuffle(seed=42)  # You can change the seed for different random splits
shuffled_test = ds['test'].shuffle(seed=42)

# Split the shuffled dataset into training, evaluation, and test sets
train_dataset = shuffled_train.select(range(70))  # First 700 samples for training
eval_dataset = shuffled_test.select(range(20))  # First 200 samples for evaluation
test_dataset = shuffled_test.select(range(20, 30))  # Next 100 samples for testing

# Display a sample row for reference
print("\nSample from the Training Set:")
print(f"Instruction: {train_dataset[0]['instruction']}")
print(f"Input: {train_dataset[0]['input'][:500]}...")  # First 500 characters of the input
print(f"Output: {train_dataset[0]['output']}\n")

# 2. Check for GPU Availability
device = 0 if torch.cuda.is_available() else -1  # If GPU is available, use device 0, else use CPU
device_name = "GPU" if device >= 0 else "CPU"
print(f"Using {device_name} for inference.\n")

# 3. Load the Summarization Pipeline with explicit truncation
print("Loading the summarization model...")
summarization_pipeline = pipeline("summarization",
                                  model="facebook/bart-large-cnn",
                                  device=device,  # Pass the device argument to use GPU or CPU
                                  truncation=True,  # Ensure truncation is explicitly enabled
                                  max_length=1024)  # You can adjust the max length as needed

# 4. Define the Summarization Tool
@tool
async def summarize_tool(text: str) -> str:
    """Summarize the input text into a concise format asynchronously."""
    # Truncate the input if it's too long to avoid errors
    max_input_length = 1024  # You can change this depending on the model's input limits
    if len(text.split()) > max_input_length:
        text = ' '.join(text.split()[:max_input_length])  # Truncate to the first 1024 tokens (approx.)

    # Summarize text using the HuggingFace pipeline asynchronously
    summary = await asyncio.to_thread(summarization_pipeline, text, max_length=1000, min_length=30, do_sample=False, truncation=True)
    return summary[0]['summary_text']

# 5. Load the Reasoning LLM for LangChain
print("Loading the reasoning model for the agent...")
generation_pipeline = pipeline("text-generation",
                               model="EleutherAI/gpt-neo-1.3B",
                               device=device,  # Pass the device argument to use GPU or CPU
                               max_length=1200,
                               truncation=True)  # Explicitly enable truncation
llm = HuggingFacePipeline(pipeline=generation_pipeline)

# 6. Initialize the Summarization Tool for LangChain Agent
tools = [
    Tool(
        name="TextSummarizer",
        func=summarize_tool,
        description="Summarizes a given text into a shorter version."
    )
]

# 7. Initialize the LangChain Agent
print("Initializing the LLM agent...")
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# 8. Summarization Worker Function (Asynchronous)
async def process_entry(entry):
    try:
        input_text = entry['input']
        # Summarize the text using the agent asynchronously
        summarized_text = await agent.arun(f"Summarize the following text: {input_text}")

        # Ensure the summarized text is valid before returning
        if summarized_text is None or len(summarized_text.strip()) == 0:
            summarized_text = "Summary not available"  # Fallback summary

        return summarized_text
    except Exception as e:
        print(f"Error processing entry: {e}")
        return "Summary failed"  # Ensure some value is returned

# 9. Parallel Processing (Asynchronous)
async def process_dataset(dataset, name="Dataset"):
    print(f"\nProcessing {name} dataset...")

    # Use asyncio.gather to process the dataset in parallel
    tasks = [process_entry(entry) for entry in dataset]
    results = await asyncio.gather(*tasks)

    # Display the results (summaries)
    for i, result in enumerate(results):
        print(f"Entry {i+1}: {result}\n")

# 10. Main Execution for Jupyter/IPython
async def main():
    # Process the datasets (training, evaluation, test)
    await process_dataset(train_dataset, name="Training")
    await process_dataset(eval_dataset, name="Evaluation")
    await process_dataset(test_dataset, name="Test")

# Run the main asynchronous function in Jupyter/IPython environment
if __name__ == "__main__":
    await main()


Loading the dataset...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


train_set.jsonl:   0%|          | 0.00/110M [00:00<?, ?B/s]

test_set.jsonl:   0%|          | 0.00/27.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/135466 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/33869 [00:00<?, ? examples/s]


Sample from the Training Set:
Instruction: Generate impression based on medical findings.
Input: Male, 68 years old.Reason: 68yo M w/ hx heart/kidney transplant, PTLD, assess for causes of hypoxia History: as above New port catheter terminates near the cavoatrial junction. New diffuse pulmonary opacities with more focal left basilar opacity. Question of small pleural effusions. Unchanged cardiomegaly. No pneumothorax....
Output: New Port-A-Cath terminates near the cavoatrial junction. New diffuse pulmonary opacities most consistent with pulmonary edema.

Using GPU for inference.

Loading the summarization model...


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


Loading the reasoning model for the agent...


config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=generation_pipeline)
  agent = initialize_agent(


Initializing the LLM agent...

Processing Training dataset...


  summarized_text = await agent.arun(f"Summarize the following text: {input_text}")
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m

[1m> Entering new AgentExecutor chain...[0m

[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m

[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...[0m


[1m> Entering new AgentExecutor chain...

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `

Error processing entry: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: Answer the following questions as best you can. You have access to the following tools:

TextSummarizer(tool_input: 'str', callbacks: 'Callbacks' = None) -> 'str' - Summarizes a given text into a shorter version.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [TextSummarizer]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Summarize the following text: Male, 68 years old.Reason: 68yo 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Error processing entry: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: Answer the following questions as best you can. You have access to the following tools:

TextSummarizer(tool_input: 'str', callbacks: 'Callbacks' = None) -> 'str' - Summarizes a given text into a shorter version.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [TextSummarizer]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Summarize the following text: Male, 51 years old.Reason: Pulm 

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

Error processing entry: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: Answer the following questions as best you can. You have access to the following tools:

TextSummarizer(tool_input: 'str', callbacks: 'Callbacks' = None) -> 'str' - Summarizes a given text into a shorter version.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [TextSummarizer]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Summarize the following text: 40 years old Male. Reason: fever

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Error processing entry: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: Answer the following questions as best you can. You have access to the following tools:

TextSummarizer(tool_input: 'str', callbacks: 'Callbacks' = None) -> 'str' - Summarizes a given text into a shorter version.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [TextSummarizer]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Summarize the following text: 63 years old Female. Reason: sob

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Error processing entry: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Parsing LLM output produced both a final answer and a parse-able action:: Answer the following questions as best you can. You have access to the following tools:

TextSummarizer(tool_input: 'str', callbacks: 'Callbacks' = None) -> 'str' - Summarizes a given text into a shorter version.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [TextSummarizer]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: Summarize the following text: Other malaise and fatigue [780.7

In [1]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [11]:
from transformers import pipeline
import torch
device = 0 if torch.cuda.is_available() else -1  # If GPU is available, use device 0, else use CPU
summarization_pipeline = pipeline("summarization",
                                  model="facebook/bart-large-cnn",
                                  device=device)

# Test the pipeline directly (to check if it's working outside of Gradio)
test_text = "Summarize the following text: Age: 46 yearsGender: MaleReason for Study: Reason: cause of fevers History: Left ventricular aneurysm. Left ventricular dilating with ventricular fibrillation. Left ventricular hypertrophy with dilatation.Left basal and apical atelectasis. Right ventricle systolic heart failure. Left ventricular aneurysm. "  # Make it long enough for summarization
summary = summarization_pipeline(test_text)
print(summary)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
Your max_length is set to 142, but your input_length is only 87. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)


[{'summary_text': 'Left ventricular dilating with ventricular fibrillation. Left ventricular hypertrophy with dilatation. Left basal and apical atelectasis. Right ventricle systolic heart failure. Left Ventricular aneurysm. Right Ventricle Systolic Heart failure.'}]


In [12]:
!pip install gradio
import gradio as gr
import asyncio
from transformers import pipeline
import torch

# ... (Your existing code to define summarization_pipeline) ...
#  from previous response, including the 'import torch' line

# Define the Gradio Interface
def gradio_interface(text_input):
    """Interface function for Gradio."""
    # Use the summarization_pipeline directly
    summary = summarization_pipeline(text_input, max_length=1000, min_length=30, do_sample=False, truncation=True)
    # Extract the summary text
    summary_text = summary[0]['summary_text'] if isinstance(summary, list) and len(summary) > 0 else "Summary not available."
    return summary_text

# Create the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,  # Function to call for summarization
    inputs=gr.Textbox(label="Input Text", lines=5, placeholder="Enter text here..."),  # Text input
    outputs=gr.Textbox(label="Summary"),  # Output textbox for the summary
    title="Text Summarizer",  # Interface title
    description="Enter a passage of text, and this tool will summarize it for you.",  # Description
    theme="default",  # You can choose a theme, "default" is good for a simple UI
)

# Launch the Gradio interface
if __name__ == "__main__":
    iface.launch(share=True)  # share=True gives you a public URL to access the interface

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://838db98afe9f8c7597.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [13]:
!pip install gradio
import gradio as gr
import asyncio
from transformers import pipeline
import torch

# ... (Your existing code to define summarization_pipeline) ...
#  from previous response, including the 'import torch' line

# Define the Gradio Interface
def gradio_interface(text_input):
    """Interface function for Gradio."""
    # Use the summarization_pipeline directly
    summary = summarization_pipeline(text_input, max_length=1000, min_length=30, do_sample=False, truncation=True)
    # Extract the summary text
    summary_text = summary[0]['summary_text'] if isinstance(summary, list) and len(summary) > 0 else "Summary not available."
    return summary_text

# Create the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,  # Function to call for summarization
    inputs=gr.Textbox(label="Input Text", lines=5, placeholder="Enter text here..."),  # Text input
    outputs=gr.Textbox(label="Summary"),  # Output textbox for the summary
    title="Radiology Report Summarizer",  # Interface title
    description="Enter a Radiology report text, and this tool will summarize it for you.",  # Description
    theme="default",  # You can choose a theme, "default" is good for a simple UI
)

# Launch the Gradio interface
if __name__ == "__main__":
    iface.launch(share=True)  # share=True gives you a public URL to access the interface

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://54f1e7b389fe16e20f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
