In [1]:
!pip install scispacy
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_ner_bionlp13cg_md-0.5.1.tar.gz

Collecting scispacy
  Downloading scispacy-0.5.5-py3-none-any.whl.metadata (18 kB)
Collecting spacy<3.8.0,>=3.7.0 (from scispacy)
  Downloading spacy-3.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (27 kB)
Collecting scipy (from scispacy)
  Downloading scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
Collecting conllu (from scispacy)
  Downloading conllu-6.0.0-py3-none-any.whl.metadata (21 kB)
Collecting joblib (from scispacy)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn>=0.20.3 (from scispacy)
  Downloading scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting pysbd (from scispacy)
  Downloading pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Collecting nmslib>=1.7.3.6 (from scispacy)
  Downloading nmslib-2.1.1

In [4]:
import pandas as pd
import numpy as np
import spacy
from transformers import pipeline
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
df = pd.read_csv("Synthetic_HCUP-Structured_Data.csv")

In [6]:
admission_type_mapping = {
    1: "Emergency", 2: "Urgent", 3: "Elective", 5: "Trauma", 9: "Unknown"
}
discharge_status_mapping = {
    1: "Home/self-care", 2: "Transferred to hospital", 3: "Transferred to SNF",
    6: "Home health care", 7: "Left against medical advice", 30: "Still patient",
    50: "Hospice (home)", 51: "Hospice (facility)", 20: "Expired",
    43: "Transferred to federal hospital", 62: "Transferred to rehab", 63: "Transferred to long-term care"
}

df['Admission_Type_Text'] = df['ADMSN_TYPE'].map(admission_type_mapping).fillna('Unknown')
df['Discharge_Status_Text'] = df['DSCHRG_STUS'].map(discharge_status_mapping).fillna('Other')

In [7]:
generator = pipeline("text2text-generation", model="google/flan-t5-base", max_length=256)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use cuda:0


In [8]:
def create_prompt(row):
    return f"""
Patient admission type: {row['Admission_Type_Text']}.
Length of stay: {row['LOS']} days.
Discharge status: {row['Discharge_Status_Text']}.
Attending specialty: {row['AT_SPCLTY']}.
Plan paid amount: ${row['PLAN_PMT_AMT']}.
Generate a detailed clinical admission and discharge note.
"""

tqdm.pandas()
df["Prompt"] = df.progress_apply(create_prompt, axis=1)
df["Generated_Note"] = df["Prompt"].progress_apply(lambda x: generator(x)[0]["generated_text"])

100%|██████████| 10000/10000 [00:00<00:00, 88168.05it/s]
  0%|          | 11/10000 [00:08<2:08:21,  1.30it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 10000/10000 [1:28:43<00:00,  1.88it/s] 


In [9]:
nlp = spacy.load("en_ner_bionlp13cg_md")

def extract_entities(text):
    doc = nlp(text)
    return [(ent.text, ent.label_) for ent in doc.ents]

df["Extracted_Entities"] = df["Generated_Note"].progress_apply(extract_entities)

100%|██████████| 10000/10000 [00:52<00:00, 192.12it/s]


In [10]:
output_cols = [
    "PERSON_ID", "Admission_Type_Text", "Discharge_Status_Text",
    "LOS", "AT_SPCLTY", "PLAN_PMT_AMT", "Generated_Note", "Extracted_Entities"
]
df[output_cols].to_csv("hcup_notes_with_entities.csv", index=False)
print(" File saved: hcup_notes_with_entities.csv")

 File saved: hcup_notes_with_entities.csv


In [11]:
!pip install datasets transformers scikit-learn seqeval

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting scipy>=1.6.0 (from scikit-learn)
  Using cached scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.6 MB)
Using 

In [83]:
pip install transformers datasets accelerate peft bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.5
[0mNote: you may need to restart the kernel to use updated packages.


In [84]:
from datasets import load_dataset
# Load MedQA dataset
dataset = load_dataset("GBaker/MedQA-USMLE-4-options")["train"]

In [123]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType
from datasets import load_dataset
import torch

# Step 1: Load model and tokenizer (no fp16, no quantization)
model_id = "stanford-crfm/biomedlm"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Fix padding error

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float32  
)

# Step 2: Apply LoRA
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=["c_attn"], 
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()



trainable params: 5,242,880 || all params: 2,599,490,560 || trainable%: 0.2017


In [127]:
def format_medqa(example):
    question = example["question"]
    options = example["options"]
    answer = ord(example["answer_idx"].strip().upper()) - 65 
    
    prompt = f"Q: {question}\nOptions:\n"
    for i, opt in enumerate(options):
        prompt += f"{chr(65 + i)}. {opt}\n"
    
    prompt += "A:"
    response = f" {chr(65 + answer)}"
    
    return {"text": prompt + response}

# Re-map the dataset with corrected function
formatted_data = dataset.map(format_medqa)

Map: 100%|██████████| 10178/10178 [00:00<00:00, 12870.84 examples/s]


In [128]:
from transformers import DataCollatorForLanguageModeling

def tokenize(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

tokenized_dataset = formatted_data.map(tokenize, batched=True)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

Map: 100%|██████████| 10178/10178 [00:03<00:00, 2890.33 examples/s]


In [131]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./biomedlm-medqa-lora",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    max_steps=300,                    
    logging_steps=10,
    save_strategy="no",            
    learning_rate=5e-5,
    report_to="none",
    fp16=False                        
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset, 
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,3.4866
20,3.4605
30,3.4039
40,3.4161
50,3.3346
60,3.2526
70,3.2165
80,3.2005
90,3.0786
100,3.042


TrainOutput(global_step=300, training_loss=2.982263552347819, metrics={'train_runtime': 8196.6341, 'train_samples_per_second': 0.293, 'train_steps_per_second': 0.037, 'total_flos': 1.8600802910208e+16, 'train_loss': 2.982263552347819, 'epoch': 0.2356637863315004})

In [132]:
# Save final model
model.save_pretrained("./biomedlm-medqa-lora")
tokenizer.save_pretrained("./biomedlm-medqa-lora")

('./biomedlm-medqa-lora/tokenizer_config.json',
 './biomedlm-medqa-lora/special_tokens_map.json',
 './biomedlm-medqa-lora/vocab.json',
 './biomedlm-medqa-lora/merges.txt',
 './biomedlm-medqa-lora/added_tokens.json',
 './biomedlm-medqa-lora/tokenizer.json')

In [22]:
!pip install sentence-transformers faiss-cpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading sentence_transformers-4.1.0-py3-none-any.whl (345 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.7/345.7 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading faiss_cpu-1.11.0-cp310-cp310-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m51.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu, sentence-transformers
Successfully installed faiss-cpu-1.11.0 sentence-transformers-4.1.0
[0m

In [14]:
import pandas as pd
import numpy as np
import faiss
import pickle
from sentence_transformers import SentenceTransformer

# Load HCUP notes
df_hcup = pd.read_csv("hcup_notes_with_entities.csv")

# Step 1: Embed notes
embedder = SentenceTransformer("all-MiniLM-L6-v2")
corpus = df_hcup["Generated_Note"].tolist()
embeddings = embedder.encode(corpus, show_progress_bar=True, convert_to_numpy=True)

# Step 2: Create FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Step 3: Save index and metadata
faiss.write_index(index, "hcup_faiss_index.idx")

with open("hcup_id_mapping.pkl", "wb") as f:
    pickle.dump(df_hcup[["PERSON_ID", "Generated_Note"]].to_dict(orient="records"), f)

print(" FAISS index and ID mapping saved.")

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Batches: 100%|██████████| 313/313 [00:02<00:00, 128.27it/s]


 FAISS index and ID mapping saved.


In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
import torch

import pandas as pd

# Load and inspect
file_path = "Synthetic_Diagnosis_Prompts.csv"
df = pd.read_csv(file_path)
df.head()
print(df.columns.tolist())  # Just for verification

# Load model and tokenizer
model_id = "stanford-crfm/biomedlm"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token  # Ensure compatibility
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float32)

# PEFT LoRA config
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn", "q_attn"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# Prepare dataset
train_dataset = Dataset.from_pandas(df[["prompt", "diagnosis"]])

def format_prompt(example):
    return {
        "text": f"{example['prompt']}\n\nDiagnosis: {example['diagnosis']}"
    }

dataset = train_dataset.map(format_prompt)
tokenized = dataset.map(lambda x: tokenizer(x["text"], truncation=True, padding="max_length", max_length=512), batched=True)

# Define collator
collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Training setup
training_args = TrainingArguments(
    output_dir="./biomedlm-diagnosis-finetuned",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    max_steps=300,  # You can raise this as needed
    logging_steps=10,
    save_strategy="no",
    report_to="none",
    fp16=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    tokenizer=tokenizer,
    data_collator=collator
)

trainer.train()

# Save
model.save_pretrained("./biomedlm-diagnosis-finetuned")
tokenizer.save_pretrained("./biomedlm-diagnosis-finetuned")

  from .autonotebook import tqdm as notebook_tqdm
Map: 100%|██████████| 5000/5000 [00:01<00:00, 3878.52 examples/s]


OSError: stanford-biomed/biomedlm is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [20]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import re
import pandas as pd

In [21]:
# Load tokenizer and base model
base_model_id = "stanford-crfm/biomedlm"
adapter_path = "./biomedlm-diagnosis-finetuned"

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(base_model_id)
model = PeftModel.from_pretrained(model, adapter_path)

In [22]:
# Move model to appropriate device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [23]:
# Load FAISS index and ID mapping
index = faiss.read_index("hcup_faiss_index.idx")
with open("hcup_id_mapping.pkl", "rb") as f:
    id_mapping = pickle.load(f)

In [24]:
# Load sentence transformer for embeddings
embedder = SentenceTransformer("all-MiniLM-L6-v2")

In [25]:
# Load additional CSV-based context
csv_df = pd.read_csv("Synthetic_Diagnosis_QA_Dataset.csv")
csv_contexts = csv_df["prompt"].tolist()

In [26]:
# Utility: Retrieve top-k FAISS notes
def retrieve_similar_notes(query, k=3):
    query_vec = embedder.encode([query], convert_to_numpy=True)
    distances, indices = index.search(query_vec, k)
    return [id_mapping[i]["Generated_Note"] for i in indices[0]]

In [27]:
# Utility: Retrieve top-k similar prompts from CSV
def retrieve_context_from_csv(query, top_k=1):
    csv_vectors = embedder.encode(csv_contexts, convert_to_tensor=True)
    query_vec = embedder.encode(query, convert_to_tensor=True)
    scores = torch.nn.functional.cosine_similarity(query_vec, csv_vectors)
    top_idx = torch.topk(scores, k=top_k).indices
    return [csv_contexts[i] for i in top_idx]

In [28]:
# Build prompt

def build_diagnosis_prompt(patient_info, historical_notes, csv_context=None):
    prompt = (
        "You are a clinical diagnosis assistant.\n\n"
        "Your task is to analyze the patient's symptoms and history and provide the most likely diagnosis with a concise medical explanation.\n\n"
        f"### Patient Summary:\n{patient_info.strip()}\n\n"
        f"### Historical Notes:\n" + "\n".join(f"- {note.strip()}" for note in historical_notes)
    )

    if csv_context:
        prompt += (
            f"\n\n### Additional Clinical Context:\n"
            + "\n".join(f"- {ctx.strip()}" for ctx in csv_context)
        )

    prompt += (
        "\n\n### Task:\n"
        "What is the most likely diagnosis and why?\n\n"
        "Diagnosis and Explanation:"
    )
    return prompt

In [42]:
# Clean hallucinated artifacts from response
import re
import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize

def clean_response(text):
    import re

    # Remove markdown images and formatting artifacts
    text = re.sub(r"!\[.*?\]\(.*?\)", "", text)
    text = re.sub(r">+", "", text)
    text = re.sub(r"\d+\.", "", text)
    text = re.sub(r"[•\-:]+", " ", text)

    # Normalize specific known noisy phrases
    text = re.sub(r"\b(Hypertension)\s*[:]?[\s]*\b", "Hypertension. ", text, flags=re.IGNORECASE)
    text = re.sub(r"\b(Acute coronary syndrome)\s*[:]?[\s]*\b", "Acute coronary syndrome. ", text, flags=re.IGNORECASE)
    text = re.sub(r"\b(Plan|Answer|Question|Diagnosis)\s*[:]?[\s]*", "", text, flags=re.IGNORECASE)

    # Remove questions and incomplete phrases
    lines = re.split(r"[.\n]", text)
    filtered = []
    for line in lines:
        line = line.strip()
        if not line:
            continue
        if line.endswith("?"):
            continue
        if re.match(r"(?i)^(what|why|how|when|is|are|can|do|does|did|your|who|where|which|¿)\b", line):
            continue
        filtered.append(line)

    # Combine back to paragraph
    clean_text = ". ".join(filtered).strip()

    # Final fix: punctuation and whitespace
    clean_text = re.sub(r"\s{2,}", " ", clean_text)
    if clean_text and not clean_text.endswith("."):
        clean_text += "."

    return clean_text

# Main inference function
def suggest_diagnosis(patient_info, k=3, csv_top_k=1, max_tokens=200):
    faiss_notes = retrieve_similar_notes(patient_info, k=k)
    csv_context = retrieve_context_from_csv(patient_info, top_k=csv_top_k)

    prompt = build_diagnosis_prompt(patient_info, faiss_notes, csv_context=csv_context)

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            num_beams=4,
            do_sample=False,
            early_stopping=True,
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=3,
            repetition_penalty=1.2
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    # Cut off any prefix text
    if "Diagnosis and Explanation:" in decoded:
        decoded = decoded.split("Diagnosis and Explanation:")[-1]

    return clean_response(decoded)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [57]:
# Sample test
query = "Patient presents with stoamch pain, and was admitted for 3 days under gastroenterologist."
diagnosis = suggest_diagnosis(query)
print("Suggested Diagnosis and Explanation:\n", diagnosis)

Suggested Diagnosis and Explanation:
 The patient presented to the emergency department (ED). Admission Admits to the ED with a chief complaint of chest pain. Clinical Presentation Acute myocardial infarction. Discharged self care under neurology. You are an orthopaedic surgeon. Introduction Based on the Gastrointestinal bleed. Place the patient under cardiologist or admits. The most likely reason for admission Cardiology Gastroenterology. Please note Gastroparesis. Symptoms Acute coronary syndrome. ``` Your You will find. Shortness of Breath Self care. Please explain.


In [53]:
import evaluate
import pandas as pd

# Load and sample test CSV
test_df = pd.read_csv("Synthetic_Diagnosis_Prompts.csv")
test_df = test_df.sample(n=25, random_state=42)

# Load metrics
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")

# Evaluation function
def compute_nlg_metrics(predictions, references):
    bleu_score = bleu.compute(predictions=predictions, references=[[ref] for ref in references])
    rouge_score = rouge.compute(predictions=predictions, references=references)
    meteor_score = meteor.compute(predictions=predictions, references=references)

    return {
        "BLEU": bleu_score['bleu'],
        "ROUGE-L": rouge_score['rougeL'],
        "METEOR": meteor_score['meteor']
    }

# Optional: keyword coverage metric
clinical_terms = ["hypertension", "acute coronary syndrome", "myocardial infarction", "chest pain", "troponin", "ecg", "long-term care"]

def keyword_coverage(prediction):
    found = [term for term in clinical_terms if term in prediction.lower()]
    return len(found) / len(clinical_terms)

# Run predictions
preds = []
refs = []

for _, row in test_df.iterrows():
    pred = suggest_diagnosis(row['prompt'])
    preds.append(pred)
    refs.append(row['diagnosis'])

# Compute metrics
results = compute_nlg_metrics(preds, refs)
print(results)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


{'BLEU': 0.0, 'ROUGE-L': 0.07842936873243765, 'METEOR': 0.1229333670009592}
