In [4]:
#Abstractive Summarization with Hugging Face Transformers
#Uses a pre-trained model like BART
#Use this for natural language summaries (more human-like)

import fitz  # PyMuPDF
from transformers import pipeline

# Extract text from PDF
def extract_pdf_text(path):
    doc = fitz.open(path)
    return "".join(page.get_text() for page in doc)

# Load the summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Set your PDF file path here
pdf_path =r"C:\Users\salsubhi1\PycharmProjects\Enigmatic Research\Dr.X Files\new-approaches-and-procedures-for-cancer-treatment.pdf"  # Change this to your actual file path

# Extract text from the PDF
pdf_text = extract_pdf_text(pdf_path)

# Make sure the text isn't too short or too long for the model
if len(pdf_text) > 1024:
    pdf_text = pdf_text[:1024]  # Limit to 1024 characters for demo (can be chunked for full doc)

# Generate summary
summary = summarizer(pdf_text, max_length=150, min_length=50, do_sample=False)
print("Summary:\n", summary[0]['summary_text'])


  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


Summary:
 Cancer is a global health problem responsible for one in six                 deaths worldwide. In 2020, there were an estimated 19.3                 million new cancer cases and about 10 million cancer deaths. Cancer is a very complicated sequence of disease progressing gradually with a generalized loss of                 growth control.


In [10]:
#T5 Model with Transformers
#t5-small handles up to 512 tokens — not characters — so we split the text by word count.

import fitz  # PyMuPDF
from transformers import T5Tokenizer, T5ForConditionalGeneration

# -------- Step 1: Extract text from PDF --------
def extract_pdf_text(path):
    doc = fitz.open(path)
    return "".join(page.get_text() for page in doc)

# -------- Step 2: Chunk text into 512-token pieces --------
def chunk_text(text, chunk_size=500):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

# -------- Step 3: Load T5 model and tokenizer --------
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

# -------- Step 4: Summarize each chunk --------
def summarize_with_t5(text_chunks):
    summaries = []
    for chunk in text_chunks:
        input_text = "summarize: " + chunk
        input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

        output_ids = model.generate(
            input_ids,
            max_length=150,
            min_length=40,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
        summary = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        summaries.append(summary)
    return " ".join(summaries)

# -------- Step 5: Put it all together --------
pdf_path = r"C:\Users\salsubhi1\PycharmProjects\Enigmatic Research\Dr.X Files\new-approaches-and-procedures-for-cancer-treatment.pdf"
full_text = extract_pdf_text(pdf_path)
text_chunks = chunk_text(full_text)
final_summary = summarize_with_t5(text_chunks)

# -------- Print the result --------
print("FINAL SUMMARY:\n")
print(final_summary)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


FINAL SUMMARY:

this article is distributed under the terms of the Creative Commons Attribution-NonCommercial 4.0 License (https://creativecommons.org/licenses/by-nc/4.0/) which permits non-commercial use, reproduction and distribution of the work without further permission provided the original work is attributed as specified on the SAGE and Open Access pages. in 2020, there were an estimated 19.3 million new cancer cases and about 10 million cancer deaths globally. treatment, stem cell, targeted drugs, ablation, natural antioxidants, gene therapy Date received: 4 March 2021; accepted: 5 July 2021 1Center for Innovative Drug Development and Therapeutic Trials for Africa (CDT-Africa), College of Health Sciences, Addis Ababa University, Addis Ababa, Ethiopia 2Enteric Diseases and Vaccines Research Unit, Centre for Infectious Disease Research in Zambia. stem cells therapy Stem cells are undifferentiated cells present in the bone mar- row (BM) with an ability to differentiate into any typ

In [12]:
#TextRank Summarization with Sumy
import fitz  # PyMuPDF
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.text_rank import TextRankSummarizer

def extract_pdf_text(path):
    doc = fitz.open(path)
    return "".join(page.get_text() for page in doc)

# Set your PDF file path
pdf_path = r"C:\Users\salsubhi1\PycharmProjects\Enigmatic Research\Dr.X Files\new-approaches-and-procedures-for-cancer-treatment.pdf"

# Extract and summarize
text = extract_pdf_text(pdf_path)
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = TextRankSummarizer()
summary = summarizer(parser.document, sentences_count=5)

print("TextRank Summary:\n")
for sentence in summary:
    print(sentence)



TextRank Summary:

Cancer is a very complicated sequence of disease conditions progressing gradually with a generalized loss of growth control.1–3 There were only a few options of cancer treatment for patients for many decades which include sur- gery, radiation therapy, and chemotherapy as single treat- ments or in combination.4,5 But recently, many pathways involved in cancer therapy progression and how they can be targeted has improved dramatically, with combinatorial strategies, involving multiple targeted therapies or “tradi- tional” chemotherapeutics, such as the taxanes and platinum compounds, being found to have a synergistic eﬀect.6 New approaches, such as drugs, biological molecules, and immune-mediated therapies, are being used for treatment even if the excepted therapy level has not reached that resists the mortality rate and decreases the prolonged sur- vival time for metastatic cancer.
This mechanism is dependent on the active interaction between stem cell CXCR4 receptors 

In [13]:
#Luhn Summarizer (Sumy)
import fitz  # PyMuPDF
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer

def extract_pdf_text(path):
    doc = fitz.open(path)
    return "".join(page.get_text() for page in doc)

# Set your PDF file path
pdf_path = r"C:\Users\salsubhi1\PycharmProjects\Enigmatic Research\Dr.X Files\new-approaches-and-procedures-for-cancer-treatment.pdf"

# Extract and summarize
text = extract_pdf_text(pdf_path)
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = LuhnSummarizer()
summary = summarizer(parser.document, sentences_count=5)

print("Luhn Summary:\n")
for sentence in summary:
    print(sentence)


Luhn Summary:

Cancer is a very complicated sequence of disease conditions progressing gradually with a generalized loss of growth control.1–3 There were only a few options of cancer treatment for patients for many decades which include sur- gery, radiation therapy, and chemotherapy as single treat- ments or in combination.4,5 But recently, many pathways involved in cancer therapy progression and how they can be targeted has improved dramatically, with combinatorial strategies, involving multiple targeted therapies or “tradi- tional” chemotherapeutics, such as the taxanes and platinum compounds, being found to have a synergistic eﬀect.6 New approaches, such as drugs, biological molecules, and immune-mediated therapies, are being used for treatment even if the excepted therapy level has not reached that resists the mortality rate and decreases the prolonged sur- vival time for metastatic cancer.
This mechanism is dependent on the active interaction between stem cell CXCR4 receptors and 

In [14]:
#SpaCy + Sentence Scoring (Custom Extractive)
import fitz  # PyMuPDF
import spacy

def extract_pdf_text(path):
    doc = fitz.open(path)
    return "".join(page.get_text() for page in doc)

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Set your PDF file path
pdf_path = r"C:\Users\salsubhi1\PycharmProjects\Enigmatic Research\Dr.X Files\new-approaches-and-procedures-for-cancer-treatment.pdf"

# Extract text and process with SpaCy
text = extract_pdf_text(pdf_path)
doc = nlp(text)
sentence_scores = {}

# Simple heuristic: score = named entities + noun chunks
for sent in doc.sents:
    score = len(list(sent.ents)) + len(list(sent.noun_chunks))
    sentence_scores[sent] = score

# Get top 5 sentences
top_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:5]

print("SpaCy Custom Extractive Summary:\n")
for sentence in top_sentences:
    print(sentence.text.strip())


SpaCy Custom Extractive Summary:

The second mechanism is the 
tumor-tropic effect in which the migration of MSCs toward 
tumor microenvironment (TM) after attraction by CXCL16, 
SDF-1, CCL-25, and IL-6 secreted by tumor cells and dif-
ferentiation of MSCs within the tumor cells which contrib-
utes to tumor stromal development.24 Stem cells also act by 
paracrine factor secretion, including extracellular vesicles 
(EVs) and soluble materials,25 and their differentiation 
capacity, such as transplanted HSCs, can give rise to all 
blood cell types.26
Generally, cancer treatment using stem cell therapy by 
various strategies, including transplantation of HSC,27 MSC 
infusion,28 therapeutic carriers,29 generation of immune 
effector cells,30 and vaccine production.31 The stem cell can-
cer therapy approach confronted the following side effects: 
(1) tumorigenesis, (2) adverse events in allogeneic HSC 
transplantation, (3) drug toxicity and drug resistance, (4) 
increased immune responses a