In [None]:
!pip install transformers sentence-transformers faiss-cpu datasets rank_bm25 accelerate bitsandbytes scispacy
!pip install -U pip setuptools wheel
!pip install -U 'spacy[cuda11x,transformers,lookups]'
!python -m spacy download en_core_web_sm
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_core_sci_sm-0.5.4.tar.gz

Collecting faiss-cpu
  Using cached faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting datasets
  Using cached datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting rank_bm25
  Using cached rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Collecting bitsandbytes
  Using cached bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting scispacy
  Using cached scispacy-0.5.5-py3-none-any.whl.metadata (18 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Using cached xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Using cached multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Coll

**Load Medical books**

In [None]:
!pip install --upgrade datasets
from datasets import load_dataset

dataset = load_dataset("scientific_papers", "pubmed", split="train")

texts = [item['article'] for item in dataset]

print("Sample Text:", texts[0])
print("Total Documents:", len(texts))



README.md:   0%|          | 0.00/8.27k [00:00<?, ?B/s]

scientific_papers.py:   0%|          | 0.00/5.35k [00:00<?, ?B/s]

The repository for scientific_papers contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/scientific_papers.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/3.62G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/880M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/119924 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/6633 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6658 [00:00<?, ? examples/s]

Sample Text: a recent systematic analysis showed that in 2011 , 314 ( 296 - 331 ) million children younger than 5 years were mildly , moderately or severely stunted and 258 ( 240 - 274 ) million were mildly , moderately or severely underweight in the developing countries .
in iran a study among 752 high school girls in sistan and baluchestan showed prevalence of 16.2% , 8.6% and 1.5% , for underweight , overweight and obesity , respectively .
the prevalence of malnutrition among elementary school aged children in tehran varied from 6% to 16% .
anthropometric study of elementary school students in shiraz revealed that 16% of them suffer from malnutrition and low body weight .
snack should have 300 - 400 kcal energy and could provide 5 - 10 g of protein / day . nowadays , school nutrition programs are running as the national programs , world - wide . national school lunch program in the united states
there are also some reports regarding school feeding programs in developing countries . 

# Implement Adaptive Chunking
Now, we will split text intelligently using:

Token Density (more words → smaller chunks) <br>
Semantic Entropy (important words → finer splits) <br>
Medical Entity Frequency (UMLS terms → finer splits) <br>
Graph Centrality (higher importance → finer splits)


✅Handles Empty Texts <→ Skips them before processing.<br>
✅ Avoids TF-IDF Errors → Uses stop_words="english" and checks if matrix is empty.<br>
✅ Improves Medical NLP → Uses en_core_sci_sm for better entity recognition.


In [None]:
import nltk
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

nltk.download('punkt')
nltk.download('punkt_tab')

try:
    nlp = spacy.load("en_core_sci_sm")
except:
    nlp = spacy.load("en_core_web_sm")

def chunk_importance(text):
    if not text.strip():
        return 0

    # Token Density
    tokens = nltk.word_tokenize(text)
    density = len(tokens) / (len(text) + 1)

    # Semantic Entropy (TF-IDF)
    try:
        vectorizer = TfidfVectorizer(stop_words="english")
        tfidf_matrix = vectorizer.fit_transform([text])
        entropy = np.mean(tfidf_matrix.toarray()) if tfidf_matrix.shape[1] > 0 else 0
    except ValueError:
        entropy = 0

    # Medical Entity Frequency
    doc = nlp(text)
    medical_entities = len([ent for ent in doc.ents if ent.label_ in ["DISEASE", "TREATMENT", "MEDICATION"]])

    # Combined Score
    return 0.4 * density + 0.3 * entropy + 0.3 * medical_entities

def adaptive_chunk(text, max_length=256):
    sentences = nltk.sent_tokenize(text)
    chunks = []
    current_chunk = []

    for sent in sentences:
        current_chunk.append(sent)
        chunk_text = " ".join(current_chunk)
        if len(chunk_text.split()) > max_length or chunk_importance(chunk_text) > 0.5:
            chunks.append(chunk_text)
            current_chunk = []

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

filtered_texts = [text for text in texts if text.strip()]

# Apply adaptive chunking
chunked_texts = [adaptive_chunk(text) for text in filtered_texts[:100]]

print("Sample Chunk:", chunked_texts[0])
print("Total Chunks Created:", sum(len(chunks) for chunks in chunked_texts))


--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy-cuda11x, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy-cuda11x, cupy-cuda12x

  Follow

0it [00:00, ?it/s]

  deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk(  # type: ignore[union-attr]


Sample Chunk: ['a recent systematic analysis showed that in 2011 , 314 ( 296 - 331 ) million children younger than 5 years were mildly , moderately or severely stunted and 258 ( 240 - 274 ) million were mildly , moderately or severely underweight in the developing countries . in iran a study among 752 high school girls in sistan and baluchestan showed prevalence of 16.2% , 8.6% and 1.5% , for underweight , overweight and obesity , respectively . the prevalence of malnutrition among elementary school aged children in tehran varied from 6% to 16% . anthropometric study of elementary school students in shiraz revealed that 16% of them suffer from malnutrition and low body weight . snack should have 300 - 400 kcal energy and could provide 5 - 10 g of protein / day . nowadays , school nutrition programs are running as the national programs , world - wide . national school lunch program in the united states\nthere are also some reports regarding school feeding programs in developing countrie

In [None]:
!pip install sentence-transformers faiss-cpu scikit-learn



In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np

biomed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

test_embedding = biomed_model.encode("This is a test sentence.")
print("Embedding Shape:", test_embedding.shape)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Embedding Shape: (384,)


Generate Dense and Sparse Embeddings

In [None]:
from sentence_transformers import SentenceTransformer

dense_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")


In [None]:
import faiss
from sklearn.preprocessing import normalize
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.decomposition import TruncatedSVD


vectorizer = TfidfVectorizer(max_features=5000)


from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import normalize
import numpy as np
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import normalize

def generate_hybrid_embeddings(texts, target_dim=484):
    global vectorizer

    dense_embeddings = dense_model.encode(texts, normalize_embeddings=True)  # (N, 384)

    if not hasattr(vectorizer, "vocabulary_"):
        raise ValueError(" Vectorizer not trained! Run `vectorizer.fit(corpus)` before using it.")

    sparse_embeddings = vectorizer.transform(texts).toarray()  # (N, 5000)

    #  Handle empty sparse embeddings
    if np.all(sparse_embeddings == 0):
        print(" Warning: All sparse embeddings are zero! Adding small noise to avoid SVD failure.")
        sparse_embeddings += np.random.normal(0, 1e-6, sparse_embeddings.shape)

    sparse_target_dim = target_dim - dense_embeddings.shape[1]  # 484 - 384 = 100

    try:
        svd = TruncatedSVD(n_components=sparse_target_dim)
        reduced_sparse = svd.fit_transform(sparse_embeddings)  # (N, 100)

        if np.isnan(reduced_sparse).any():
            print(" NaN values in SVD output! Replacing with zeros.")
            reduced_sparse = np.nan_to_num(reduced_sparse)

    except ValueError as e:
        print("TruncatedSVD failed:", e)
        reduced_sparse = np.zeros((sparse_embeddings.shape[0], sparse_target_dim))

    # Normalize and combine embeddings
    dense_norm = normalize(dense_embeddings, axis=1)
    sparse_norm = normalize(reduced_sparse, axis=1)

    hybrid_embeddings = np.hstack([dense_norm, sparse_norm])  # (N, 484)

    print(f"Hybrid Embeddings Shape: {hybrid_embeddings.shape}")
    return hybrid_embeddings

# Apply to our chunked data
flat_chunks = [" ".join(chunks) for chunks in chunked_texts]
vectorizer.fit(flat_chunks)
hybrid_embeddings = generate_hybrid_embeddings(flat_chunks)

print("Hybrid Embeddings Shape:", hybrid_embeddings.shape)

Hybrid Embeddings Shape: (100, 484)
Hybrid Embeddings Shape: (100, 484)


In [None]:
import faiss
import numpy as np
index = faiss.IndexFlatL2(484)
index.add(hybrid_embeddings.astype(np.float32))

print(f" FAISS index created with {index.ntotal} entries.")


 FAISS index created with 100 entries.


Search function

In [None]:
def search(query, top_k=3):
    query_embedding = generate_hybrid_embeddings([query])
    query_dim = query_embedding.shape[1]
    faiss_dim = index.d

    print(f"Query embedding shape: {query_embedding.shape}")
    print(f"FAISS index expected shape: ({faiss_dim},)")

    if query_dim != faiss_dim:
        print(f" Shape Mismatch: Query ({query_dim}) vs FAISS ({faiss_dim})")

        if query_dim < faiss_dim:
            padding = np.zeros((1, faiss_dim - query_dim))
            query_embedding = np.hstack([query_embedding, padding])
        else:
            query_embedding = query_embedding[:, :faiss_dim]

        print(f" Fixed Query Shape: {query_embedding.shape}")

    # Perform FAISS search
    distances, indices = index.search(query_embedding.astype(np.float32), top_k)

    # Retrieve matched text chunks
    results = [flat_chunks[idx] for idx in indices[0] if idx != -1]

    return results


In [None]:
query = "What are the symptoms of leukemia?"
results = search(query, top_k=3)

print("\n Search Results:")
for i, res in enumerate(results):
    print(f"{i+1}. {res[:200]}...")


Hybrid Embeddings Shape: (1, 385)
Query embedding shape: (1, 385)
FAISS index expected shape: (484,)
 Shape Mismatch: Query (385) vs FAISS (484)
 Fixed Query Shape: (1, 484)

 Search Results:
1. health is not only related to the absence of the disease , therefore we need to conceptualize and operationalize what health is . increasingly , we have come to understand that information about funct...
2. cystic echinococcosis ( ce ) is a severe zoonosis caused by the cyclophyllidean cestode echinococcus granulosus . the disease has a worldwide distribution , with endemic regions in many countries of t...
3. polycystic ovary syndrome ( pcos ) which was first reported in 1935 is known as one of the most common endocrine hormones disorders in the women of the reproductive age afflicting as many as % 10 of t...


  self.explained_variance_ratio_ = exp_var / full_var


In [None]:
!pip install transformers accelerate sentencepiece



In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

print(" LLM Loaded Successfully!")


tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



 LLM Loaded Successfully!


Model Generator

In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU Found")


CUDA Available: True
GPU: Tesla T4


In [None]:
def generate_answer(query, top_k=5, max_new_tokens=150):
    retrieved_chunks = search(query, top_k)
    context = "\n".join(retrieved_chunks)

    max_context_tokens = 400
    tokenized_context = tokenizer(context, truncation=True, max_length=max_context_tokens)
    trimmed_context = tokenizer.decode(tokenized_context["input_ids"], skip_special_tokens=True)

    prompt = f"""
    You are a helpful assistant. Answer the question using the provided context.

    Context:
    {trimmed_context}

    Question: {query}
    Answer:
    """


    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    output = model.generate(**inputs, max_new_tokens=max_new_tokens)
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

# Test the updated function
query = "What are the symptoms of diabetes?"
response = generate_answer(query)

print("\n LLM Response:\n", response)


  self.explained_variance_ratio_ = exp_var / full_var


Hybrid Embeddings Shape: (1, 385)
Query embedding shape: (1, 385)
FAISS index expected shape: (484,)
 Shape Mismatch: Query (385) vs FAISS (484)
 Fixed Query Shape: (1, 484)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 LLM Response:
 
    You are a helpful assistant. Answer the question using the provided context.

    Context:
    to review the current knowledge about nonpharmacologic approaches in the prevention and early treatment of type 2 diabetes . this study reviewed the research reports dealing with nonpharmacologic interventions aimed at preventing type 2 diabetes with early lifestyle interventions . the results from the randomized controlled trials all show that people with impaired glucose tolerance who received enhanced lifestyle advice had significantly lower ( on average 50% reduced ) incidence of type 2 diabetes compared with those allocated to receive  usual care . individuals who were able to correct their lifestyle habits as recommended for usual healthy life patterns were mostly protected against type 2 diabetes . thus , compelling evidence exists that most of the cases of type 2 diabetes can be prevented or at least the onset of the disease can be significantly delayed . randomi

Optimize Performance (Speed & Memory Efficiency)<br>
Now that the RAG pipeline is working, let's:<br>
✅ Reduce model memory usage (quantization)<br>
✅ Speed up response time (batch processing)

Quantization

In [None]:
!pip install bitsandbytes transformers accelerate



In [None]:
!nvidia-smi


Tue Feb 11 12:56:34 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   68C    P0             30W /   70W |   13532MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Load Mistral with 4-bit Quantization
model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

print(" Model Loaded Successfully")

def generate_answer(query, top_k=5, max_new_tokens=150):
    # Step 1: Retrieve relevant chunks
    retrieved_chunks = search(query, top_k)
    context = "\n".join(retrieved_chunks)

    # Step 2: Limit the context length
    max_context_tokens = 256  # Reduced from 400 to avoid potential issues
    tokenized_context = tokenizer(context, truncation=True, max_length=max_context_tokens)
    trimmed_context = tokenizer.decode(tokenized_context["input_ids"], skip_special_tokens=True)

    # Step 3: Format the prompt
    prompt = f"""
    You are a helpful assistant. Answer the question using the provided context.

    Context:
    {trimmed_context}

    Question: {query}
    Answer:
    """

    # Step 4: Tokenize and move inputs to GPU
    inputs = tokenizer(prompt, return_tensors="pt")

    # Move tensors to CUDA if available, otherwise use CPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate response
    with torch.no_grad():  # Ensure inference mode for efficiency
        output = model.generate(**inputs, max_new_tokens=max_new_tokens)

    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

def batch_generate_answers(queries, top_k=5, max_new_tokens=150):
    results = []
    for query in queries:
        response = generate_answer(query, top_k, max_new_tokens)
        results.append({"query": query, "response": response})
    return results

# Test batch queries
queries = ["What are the symptoms of diabetes?", "How to treat hypertension?", "What is insulin resistance?"]
responses = batch_generate_answers(queries)

for r in responses:
    print(f"\n Query: {r['query']}\n🤖 Response: {r['response']}")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 28.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 12.12 MiB is free. Process 23302 has 14.73 GiB memory in use. Of the allocated memory 14.58 GiB is allocated by PyTorch, and 18.51 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
!pip install evaluate rouge_score sacrebleu nltk
!pip install evaluate



evaluate

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from evaluate import load

# Load model and tokenizer (Mistral as an example)
model_name = "mistralai/Mistral-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

# Load NLP Metrics
rouge = load("rouge")
bleu = load("sacrebleu")
meteor = load("meteor")

def generate_answer(query, retrieved_chunks, top_k=5, max_new_tokens=150):
    # Step 1: Retrieve & Select Most Relevant Context
    context = "\n".join(retrieved_chunks[:top_k])  # Take top-k most relevant parts

    # Step 2: Limit Context Length for Efficiency
    max_context_tokens = 400  # Adjust based on model size
    tokenized_context = tokenizer(context, truncation=True, max_length=max_context_tokens)
    trimmed_context = tokenizer.decode(tokenized_context["input_ids"], skip_special_tokens=True)

    # Step 3: Use a More Structured Prompt for High-Scoring Output
    prompt = f"""You are an expert assistant. Answer concisely and factually using the provided context.

    Context:
    {trimmed_context}

    Question: {query}
    Answer (structured list format for clarity):
    """

    # Step 4: Generate Response with Optimized Decoding
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    output = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,  # Lower temperature for more deterministic answers
        top_p=0.9,        # Top-p sampling for coherence
        num_beams=5,      # Beam search for structured output
        repetition_penalty=1.2  # Reduce redundant words
    )

    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

# Step 5: Evaluation Function
def evaluate_model(predictions, references):
    rouge_scores = rouge.compute(predictions=predictions, references=references)
    bleu_score = bleu.compute(predictions=predictions, references=[references])
    meteor_score = meteor.compute(predictions=predictions, references=references)

    return {
        "ROUGE Scores": rouge_scores,
        "BLEU Score": bleu_score["score"],
        "METEOR Score": meteor_score
    }

# Example Test Run
query = "What are the symptoms of diabetes?"
retrieved_chunks = [
    "Diabetes symptoms include frequent urination, increased thirst, and fatigue.",
    "Some other symptoms are blurred vision, weight loss, and slow healing of wounds."
]

generated_response = generate_answer(query, retrieved_chunks)
print("\n LLM Response:\n", generated_response)

# Ground Truth Reference for Evaluation
references = ["Diabetes symptoms include frequent urination, increased thirst, and fatigue."]

# Evaluate Model's Output
scores = evaluate_model([generated_response], references)
print("\n🔹 Evaluation Scores:\n", scores)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!



 Evaluation Scores:
ROUGE Scores: {'rouge1': 0.6, 'rouge2': 0.5714285714285715, 'rougeL': 0.6, 'rougeLsum': 0.6}
BLEU Score: 40.98094978791076
METEOR Score: {'meteor': 0.701388888888889}
