# Google T5-Small Model Transfer Learning Training on A-100 GPU High RAM (40 GB instance)

## Finetuning

In [None]:
!pip install transformers datasets rouge-score nltk evaluate

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.4.1-py3-none-any.whl (487 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m487.4/487.4 kB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading

In [None]:
from datasets import load_dataset, DatasetDict
from transformers import T5Tokenizer, T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
import numpy as np

# Load the dataset
dataset = load_dataset("ccdv/pubmed-summarization")

# Load the pre-trained model and tokenizer
model_name = "Falconsai/text_summarization"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/3.80k [00:00<?, ?B/s]

train-00000-of-00005.parquet:   0%|          | 0.00/210M [00:00<?, ?B/s]

train-00001-of-00005.parquet:   0%|          | 0.00/208M [00:00<?, ?B/s]

train-00002-of-00005.parquet:   0%|          | 0.00/207M [00:00<?, ?B/s]

train-00003-of-00005.parquet:   0%|          | 0.00/211M [00:00<?, ?B/s]

train-00004-of-00005.parquet:   0%|          | 0.00/210M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/59.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/58.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/119924 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/6633 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6658 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['article', 'abstract'],
        num_rows: 119924
    })
    validation: Dataset({
        features: ['article', 'abstract'],
        num_rows: 6633
    })
    test: Dataset({
        features: ['article', 'abstract'],
        num_rows: 6658
    })
})


In [None]:
train_samples = 3000  # Number of training samples
val_samples = 300    # Number of validation samples
test_samples = 300   # Number of test samples

In [None]:
# Function to randomly select a specific number of samples
def get_subset(dataset_split, num_samples):
    # Shuffle the dataset and select the first `num_samples`
    shuffled_dataset = dataset_split.shuffle(seed=42)  # Seed for reproducibility
    subset = shuffled_dataset.select(range(num_samples))
    return subset

# Get subsets for train, validation, and test
train_subset = get_subset(dataset["train"], train_samples)
val_subset = get_subset(dataset["validation"], val_samples)
test_subset = get_subset(dataset["test"], test_samples)

# Create a new DatasetDict with the subsets
dataset = DatasetDict({
    "train": train_subset,
    "validation": val_subset,
    "test": test_subset
})

In [None]:
def preprocess_data(examples):
    # Tokenize inputs with padding and truncation
    inputs = ["summarize: " + doc for doc in examples["article"]]
    model_inputs = tokenizer(
        inputs,
        max_length=5096,  # Maximum length for input sequences
        truncation=True,  # Truncate sequences longer than max_length
        padding="max_length",  # Pad sequences to max_length
    )

    # Tokenize labels with padding and truncation
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["abstract"],
            max_length=512,  # Maximum length for label sequences
            truncation=True,  # Truncate sequences longer than max_length
            padding="max_length",  # Pad sequences to max_length
        )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the preprocessing to the dataset
tokenized_datasets = dataset.map(preprocess_data, batched=True)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]



Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [None]:
training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    weight_decay=0.01,
    save_total_limit=2,
    num_train_epochs=10,
    predict_with_generate=True,
    fp16=True,
)



In [None]:
import numpy as np
from nltk.translate.bleu_score import sentence_bleu

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    rouge_scores = {
        'rouge1': [],
        'rouge2': [],
        'rougeL': []
    }
    for pred, label in zip(decoded_preds, decoded_labels):
        scores = scorer.score(label, pred)
        rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
        rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
        rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

    bleu_scores = []
    for pred, label in zip(decoded_preds, decoded_labels):
        bleu_scores.append(sentence_bleu([label.split()], pred.split()))

    return {
        'pred':decoded_preds
    }

In [None]:
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train the model
trainer.train()

  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss,Pred
1,1.5089,1.291652,"['', 'the prevalence of obesity in early childhood has shown some signs of decreasing ; however, for the', '', '', '', 'the sensitivity of fluorescence - based bioassays is greatly improved with', 'the morphology of the nanofibrous nonwoven materials makes them suitable for wound dressings', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'the pathogenesis of copd is usually progressive and associated with an abnormal inflammatory response in the', 'the ephrin system may play a central role in the cell communication -', '', '', 'the number of s. granulatus fruiting bodies was 56 in 2013 and 60', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'the number of cases positive for a single viral marker ( hbsag )', '', 'a new microarray - genotyping platform called whole genome sampling amplification (', '', '', 'ac - a(2535)-nh2 is an amphipath', 'elisa kits for the determination of insulin were purchased from perkin - e', '', '', 'the ion currents of a type i cell were simulated with original hod', '', '', 'a thrombosed avf was accessed by a brachial', '', '', '', 'the japanese health care system allows patients free access to any clinic irrespective of specialty', '', 'the attitude scale of using tobacco which is used in this survey was first established by awaisu', '', 'the cocrystals are a crystalline material comprised of at least two different components', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'cnas are variations in the genome that result in either gain or loss of one or', '', '', 'emergence of the a / h1n109 influenza virus resulted in the', '', '', 'the national council of ageing and older people study undertaken in 2005 in the republic of irel', '', 'a systme de gestion de linformation en anesthsie', '', '', '', '', 'the challenge of treating diabetes in this section of population is to avoid hypoglycemia.', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'theta amplitude of theta amplitudes is consistent with the primary membrane', '', 'adenocarcinoma is the most common form of nsclc', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'a sample size of minimum 32 patients / group was expected to have an 80% power to', 'linagliptin is a potent inhibitor of the serine protease', '', '', '', 'the end goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'i proposes a new approach to the history of forensic medicine, emphasising travelling', '', 'the xrf release rate is a standardized method for assessing the release rate of', '', '', '', '', '', '', '', '', 'the corresponding two z - values were compared between the measurements ( patient acute v', '', '', '', '', 'mv is a highly contagious acute viral disease characterized by high fever,', '', 'escrt - iii and escrt - ii', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'the raas is a circulating hormonal system, which promotes diabetes', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the e. coli clones were analyzed by a api 20e system', 'adenoviral vectors ( adenoviral vectors ) were the first', '', '', '', 'dilated cardiomyopathy ( dcm ) is the second most prevalent form', 'the escherichia coli groe chaperonin system facilitates protein', '', 'transcripts of mage 1 - 6 genes were detected in 24 ( 82.8%', '', '', '', 'sfks are a unique group of enzymes that have diverse functions in cell proliferation', 'the study was conducted using qualitative content analysis. the results of the study clarify the caring self', '', 'longitudinal follow - up began recently, so the present results are based on baseline data only', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'ovcf is a systemic disease characterized by low bone mass, bone', 'cross - linking gd1b derived gangliosides are sia', 'the effect of collapsing the high - end categories of a comor', 'ssa / ps ( ssa / ps', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', ""the screening india's twin epidemic study was conducted in eight states maharashtra,"", 'the cytotoxicity and antibacterial efficacy of root - end filling materials were assessed', '', '', '', '', '', '', '', '', '', '', '', 'forest walking is carried out at a slower and self - regulated pace, meaning that', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'apoptosis induction in osteosarcoma cells was examined by fluor', '', '', '', '', 'the pulmonary hyperinflation ( dh ) which induces the so -', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was daily exercised in an outdoor rotary exerciser ( kondi - trainer', '', 'a prospective study of 262 women with advanced eoc cancer showed that patients treated with', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'a meta - analysis of all mice studies excluding the ilsxiss strains', 'the 5-year disease - free survival ( dfs ) has improved in recent years', '', '', 'schizophrenia participants completed an online, structured daily - diary questionnaire of mood and symptoms every evening for', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the biosynthesis of marineosins ( 14 ) and marineosins (', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'dcs mediated therapies targeted to human cancer cells are promising candidates for antigens against', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'a retrospective analysis of prostate cancer patient data collected between 2006 and 2010 from a tert', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime distribution', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'the mis - alif technique was developed for minimizing abdominal soft tissue and internal organ injury', '', '', '', '', 'cultivated conjunctival epithelial cells were transplanted to the defecte', 'acculturation was associated with meal frequency, snacking, and eating -', '', '', 'peptide synthesis was obtained from novabiochem or irisbiotech.', '', 'the bm is a basement membrane ( bm ) disease characterized by', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'the results of the study were based on the standardized lower extremity functional scale ( lef', '', '', '', '', ""the aim of this study was to investigate the relatives'satisfaction and involvement on a general surgery"", '', '', '', '', '', '', 'the results of a retrospective study of 70 women who underwent laparoscopic supracervical', '', 'a splicing variant is a highly regulated process, the outcome of which', 'participants were recruited via advertisements approved by the institutional review board ( irb ),', '', '', '', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'the study protocol was approved by the observational research protocol evaluation committee of the french language society of', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'dna topoisomerases ( topii ) are enzy', '', '', '', '', '', 'the development of personalised medicines is of particular interest for children. the aim of the network is', 'the ll diet used in this study is administered in the form of soups, shakes', '', ""sperm's membrane polyunsaturated fatty acids have a critical role"", '']"
2,1.4084,1.258028,"['', 'the prevalence of obesity in early childhood has shown some signs of decreasing ; however, for the', '', '', '', 'the sensitivity of fluorescence - based bioassays is greatly improved with', 'the morphology of nanofibrous nonwoven materials makes them suitable for wound dressings.', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'the effects of exogenous administration of neurotrophic molecules on neurorestoration of lesion', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'the number of cases positive for a single viral marker ( hbsag )', '', 'substantial progress has been made using genetic markers to elucidate the evolutionary histories of populations,', '', '', 'the ac - a(2535)-nh2 peptide is', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are known as the auditory brains', '', '', 'a thrombosed avf was performed in 17 thrombosed fis', '', '', '', 'the objective of this study was to describe treatment patterns and estimate health care resource utilization and costs among ja', '', 'the attitude scale of using tobacco which is used in this survey was first established by awaisu', '', 'the cocrystals are a crystalline material comprised of at least two different components', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'the genome mappability was the first issue we aimed to address in this paper. we', '', '', 'emergence of the a / h1n109 influenza virus resulted in the', '', '', 'the national council of ageing and older people study undertaken in 2005 in the republic of irel', '', 'a t complte de gestion de linformation en anesthsie', '', '', '', '', 'canagliflozin is the first oral inhibitor of sodium / glucose cotransporter', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'the presence and importance of theta oscillations in grid cells is therefore currently controversial and requires', '', 'adenocarcinoma is the most common cause of cancer mortality world - wide', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'a resurgence in the research of 2 agonists for sedation developed after', 'linagliptin is a potent inhibitor of the serine protease', '', '', '', 'periodontal regeneration is the reconstruction of the lost tissues as evidenced histologically in the', '', '', '', '', 'i proposes a new approach to the history of forensic medicine, emphasising travelling', '', 'the xrf release rate is a standardized method for assessing the release rate of', '', '', '', '', '', '', '', '', 'the corresponding two z - values were compared between the measurements ( patient acute v', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and escrt - ii', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'the raas is a circulating hormonal system, which promotes diabetes', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the e. coli clones were considered resistant or susceptible according to a standard introduced', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'dilated cardiomyopathy ( dcm ) is the fourth most common form', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the ssx 1 - 9 gene family consists of 9 subtypes of cancer', '', '', '', 'sfks are a unique group of enzymes that have diverse functions in cell proliferation', 'the study was conducted using qualitative content analysis. the results of this study clarify the threats to pediatric', '', 'longitudinal follow - up began recently, so the present results are based on baseline data only', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'osteoporosis ( op ) is a systemic disease characterized by', 'cross - linking gd1b derived gangliosides are sia', 'collapsing the high - end categories of a comorbidity scale', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', ""the screening india's twin epidemic study was conducted in eight states maharashtra,"", 'the streptococcal strains used in this study were provided from the culture collection of', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. however, it remains unclear', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the treatment of osteosarcoma currently involves surgical resection in combination with ne', '', '', '', '', 'the weakness of the respiratory musculature ( with reduced strength and muscular resistance ) has', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was daily exercised in an outdoor rotary exerciser ( kondi - trainer', '', 'a prospective histoculture drug response assay ( hdra ) study in advanced', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective. this is', 'the 5-year disease - free survival ( dfs ) has significantly improved in recent', '', '', 'schizophrenia participants completed an online, structured daily - diary questionnaire of mood and symptoms every evening for', 'we propose that vascular endothelial cells ( ecs ) form the', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the biosynthesis of prodiginines ( 14 ) and marineosins ( 5', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'dcs - mediated therapies targeted to human cancer cells are promising candidates for antigen', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'in 2012, prostate cancer was the fifth most commonly diagnosed malignancy in korean', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'the purpose of the present study was to evaluate the radiographic results of mis - alif and', '', '', 'the duration of untreated psychosis ( dup ) may be associated with short -', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is a growing problem in the united states. accultura', '', '', 'peptide synthesis was a process of peptide synthesis. peptide', '', 'the aims of this case report are to show the changes in the gingival tissues in', '', '', 'migraine is a disabling neurological condition that is commonly observed in an episodic manner.', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'the goal of this study was to determine if dancers demonstrated better static single - leg', '', '', '', '', ""the aim of this study was to investigate the relatives'satisfaction and involvement on a general surgery"", '', '', '', '', '', '', 'the results of a retrospective study of 70 women who underwent laparoscopic supracervical', '', 'a splicing variant is a highly regulated process, the outcome of which', 'participants were recruited via advertisements approved by the institutional review board ( irb ),', '', '', 'adolescent reproductive health outcomes in africa are the worst in the world.ad', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pr with uepe was proposed to 71 patients with copd', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'dna topoisomerases ( topii ) are enzy', '', '', '', '', '', 'the aim of this research is to ensure that clinical trials are well designed and conducted in an ethical manner', 'weight loss is a significant improvement from baseline to three months for adipose tissue', '', ""sperm's membrane polyunsaturated fatty acids have a critical role"", '']"
3,1.4013,1.229144,"['', 'the prevalence of obesity in early childhood has shown signs of decreasing ; however, for the his', '', '', '', 'the sensitivity of fluorescence - based bioassays is greatly improved with', 'the morphology of nanofibrous nonwoven materials makes them suitable for wound dressings.', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'the number of cases positive for hcv or hcv infections was significantly increased in', '', 'substantial progress has been made using genetic markers to elucidate the evolutionary histories of populations,', '', '', 'ac - a(2535)-nh2 is an amphipath', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are known as the auditory brains', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'the objective of this study was to describe treatment patterns and estimate health care resource utilization and costs among ja', '', 'the attitude scale of using tobacco which is used in this survey was first established by awaisu', '', 'the cocrystals are a crystalline material comprised of at least two different components', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'cnas are variations in the genome that result in either gain or loss of one or', '', '', 'emergence of the a / h1n109 influenza virus resulted in the', '', '', 'the national council of ageing and older people study undertaken in 2005 in the republic of irel', '', 'a t complte de gestion de linformation en anesthsie', '', '', '', '', 'canagliflozin is the first oral inhibitor of sodium / glucose cotransporter', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'the presence and importance of theta oscillations in grid cells is a key factor in', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'the main inclusion criteria were age > 18 years, mechanical ventilation with endotracheal in', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'periodontal regeneration is the reconstruction of the lost tissues as evidenced histologically in the', '', '', '', '', 'the aim of this article is to explore the influence of forensic medicine and psychiatry', '', 'the aim of the present study was to develop a cheap, fast and reliable release rate method', '', '', '', '', '', '', '', '', 'vestibular neuritis is defined as a sudden unilateral partial failure of the vestibular', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and yeast escrt - i', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'the raas is a major circulating hormonal system, which promotes at', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the e. coli clones were a bacterial resistant e. coli', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'dilated cardiomyopathy ( dcm ) is the fourth most common form', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the ssx 1 - 9 gene family consists of 9 subtypes of cancer', '', '', '', 'src family kinases ( sfks ) are a', 'the study was conducted using qualitative content analysis. the results of this study clarify the threats to pediatric', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'ovcf is a structural change in ovcf. the', 'cross - linking gd1b derived gangliosides are sia', 'the effect of collapsing comorbid diseases into a single scale provides an', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'cardiomyocyte apoptosis is a distinct form of cell death characterized by', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', ""the screening india's twin epidemic study was conducted in eight states maharashtra,"", 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. however, it remains unclear', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the treatment of osteosarcoma currently involves surgical resection in combination with ne', '', '', '', '', 'systematic inflammation is now known to be an important aspect of chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was daily exercised in an outdoor rotary exerciser ( kondi - trainer', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective. this is', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'schizophrenia participants with low lpfc activity were able to assess the interaction between l', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'dcs expressing human tumor transmembrane antigens are promising candidates for', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'in 2012, prostate cancer was the fifth most commonly diagnosed malignancy in korean', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'the purpose of the present study was to evaluate the radiographic results of mis - alif and', '', '', 'the duration of untreated psychosis ( dup ) may be associated with short -', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is a growing problem in the united states. accultura', '', '', 'peptide synthesis was performed by peptide synthesis. peptide synthesis', '', 'alport syndrome ( as ) is a basement membrane ( bm ) disease', '', '', 'migraine is a disabling neurological condition that is commonly observed in an episodic manner.', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""the aim of this study was to investigate the relatives'satisfaction and involvement on a general surgery"", '', '', '', '', '', '', 'the results of a retrospective study of 9 randomized controlled trials, including a meta', '', 'a splicing variant is a highly regulated process, the outcome of which', 'the initial scale development included 234 men and women who lived independently in either the community or continuing care', '', '', 'adolescent reproductive health outcomes in africa are the worst in the world.ad', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pr with uepe was proposed to 71 patients with copd', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'cryptosporidium parvum is one of the most important causal agents of human diarrheas during', '', '', '', '', '', 'the aim of this research is to ensure that clinical trials are well designed and conducted in an ethical manner', 'weight loss is a significant improvement from baseline to three months for a low carbohydrate', '', ""sperm's membrane polyunsaturated fatty acids play an important defensive role"", '']"
4,1.3591,1.222756,"['', 'primary outcome of a randomized clinical trial was a pilot, randomized clinical trial designed', '', '', '', 'the ability to concentrate or extract micro / nanoparticles from the background matrix is essential to', 'the morphology of nanofibrous nonwoven materials makes them suitable for wound dressings.', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'systematic review combining 27,881 hcc cases from 90 studies published between janu', '', 'substantial progress has been made using genetic markers to elucidate the evolutionary histories of populations,', '', '', 'ac - a(2535 ) and ac - a', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to assess health care resource utilization and costs among japanese women with newly', '', 'objective : the attitudes, attitude and practice ( kap ) of youths about smoking', '', 'the cocrystals are a crystalline material comprised of at least two different components', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'we describe a simple yet effective method that addresses these issues. we describe a simple yet', '', '', 'emergence of the a / h1n109 influenza virus resulted in the', '', '', 'the national council of ageing and older people study undertaken in 2005 in the republic of irel', '', 'anesthesiologists and anesthesia registrars voluntarily reported complications and critical incidents', '', '', '', '', 'canagliflozin is the first oral inhibitor of sodium / glucose cotransporter', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'the presence and importance of theta for grid formation is therefore currently controversial and requires further study', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'periodontal regeneration is the reconstruction of the lost tissues as evidenced histologically in the', '', '', '', '', 'the theme of infanticide has received increasing attention from historians in recent years. the aim of', '', 'the aim of the present study was to develop a cheap, fast and reliable release rate method', '', '', '', '', '', '', '', '', 'the present study investigated the effects of unilateral vestibular neuritis on the functional connectivity between the', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'the raas is a major circulating hormonal system but also a tissue system', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'dilated cardiomyopathy ( dcm ) is the fourth most common form', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the gene of cancer / testis antigens ( cta ) such as', '', '', '', 'src family kinases ( sfks ) are a', 'this paper is part of a nursing doctorate thesis conducted in 2014 in iran to clarify', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'ovcf is a systemic disease characterized by low bone mass, bone', 'cross - linking gd1b derived gangliosides are sia', 'the effect of collapsing the high - end categories of a comor', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', 'the fourth diabetes atlas, published by the international diabetes federation ( id', 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. however, it remains unclear', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'this experiment was conducted at the norwegian university of life sciences. according to the norwe', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective. this is', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'schizophrenia symptomatology is a biological vulnerability that contributes to illness severity. we have', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'dcs expressing human tumor transmembrane antigens are promising candidates for', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'in 2012, prostate cancer was the fifth most commonly diagnosed malignancy in korean', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'the purpose of the present study was to evaluate the radiographic results of mis - alif and', '', '', 'the duration of untreated psychosis ( dup ) may be associated with short -', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is a growing problem in the united states. accultura', '', '', 'peptide synthesis was performed by peptide synthesis : benzotriazole', '', 'alport syndrome ( as ) is a basement membrane ( bm ) disease', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is neurodegenerative disorder characterized"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""the aim of this study was to investigate the relatives'satisfaction and involvement on a general surgery"", '', '', '', '', '', '', 'the results of a retrospective study of 9 randomized controlled trials, including a pathologic', '', 'pre - mrna splicing is a highly regulated process', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'adolescent reproductive health outcomes in africa are the worst in the world. we', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pr with uepe was proposed to 71 patients with copd', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'cryptosporidium parvum is one of the most important causal agents of human diarrheas during', '', '', '', '', '', 'the aim of this research is to ensure that clinical trials are well designed and conducted in an ethical manner', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"
5,1.3869,1.20902,"['', 'primary outcome of a randomized clinical trial was a pilot, randomized clinical trial designed', '', '', '', 'the ability to concentrate or extract micro / nanoparticles from the background matrix is essential to', 'the aim of this study was to employ needleless electrospinning for the preparation of multi', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'systematic review combining 27,881 hcc cases from 90 studies published between janu', '', 'substantial progress has been made using genetic markers to elucidate the evolutionary histories of populations,', '', '', 'a peptide is produced in the extracellular space from the proteolytic clea', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to assess health care resource utilization and costs among japanese women with newly', '', 'objective : the attitudes, attitude and practice ( kap ) of youths about smoking', '', 'the cocrystals are a crystalline material comprised of at least two different components', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'the genome mappability was the first issue we aimed to address in this paper. we', '', '', 'emergence of the a / h1n109 influenza virus in 2009 resulted', '', '', 'the aim of this study was to explore attitudes held by health care workers towards older people in this rural', '', 'anesthesiologists and anesthesia registrars voluntarily reported complications and critical incidents', '', '', '', '', 'canagliflozin is the first oral inhibitor of sodium / glucose cotransporter', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'the presence and importance of theta for grid formation is therefore currently controversial and requires further study', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'the ultimate goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'the theme of infanticide has received increasing attention from historians in recent years. the aim of', '', 'the aim of the present study was to develop a cheap, fast and reliable release rate method', '', '', '', '', '', '', '', '', 'the present study investigated the effects of unilateral vestibular neuritis on the functional connectivity between the', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for primary', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'the raas is a major circulating hormonal system, which promotes at', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'dilated cardiomyopathy ( dcm ) is the fourth most prevalent form', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the ssx 1 - 9 gene family consists of 9 subtypes of cancer', '', '', '', 'src family kinases ( sfks ) are a', 'the study was conducted using qualitative content analysis. the study was conducted to clarify the caring self', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'ovcf is a systemic disease characterized by low bone mass, bone', 'cross - linking gd1b derived gangliosides are sia', 'the effect of collapsing the high - end categories of a comor', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', ""the screening india's twin epidemic study was conducted in eight states maharashtra,"", 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. however, it remains unclear', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse cannulation of the horses ( four geldings of norwegian cold', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective. this is', 'the 5-year disease - free survival ( dfs ) has significantly improved in recent', '', '', 'schizophrenia symptomatology is a complex and complex problem. we present a study of', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'dcs are the most potent antigen presenting cells and robustly induce adaptive immunity', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'in 2012, prostate cancer was the fifth most commonly diagnosed malignancy in korean', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'the purpose of the present study was to evaluate the radiographic results of mis - alif and', '', '', 'background : schizophrenia is often preceded by nonspecific changes in behavior, emotional state,', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is associated with a process of overall adaptation on both individual and group levels', '', '', 'peptide synthesis was performed by peptide synthesis : benzotriazole', '', 'alport syndrome ( as ) is a hereditary disorder characterized by a combination', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""the aim of this study was to investigate the relatives'satisfaction and involvement on a general surgery"", '', '', '', '', '', '', 'the results of a retrospective study of 9 randomized controlled trials, including a meta', '', 'splicing is a highly regulated process, the outcome of which is critical for', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'background : adolescent reproductive health outcomes in africa are the worst in the world', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pr with uepe was proposed to 71 patients with copd', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'cryptosporidium parvum is one of the most important causal agents of human diarrheas during', '', '', '', '', '', 'the aim of this research is to ensure that clinical trials are well designed and conducted in an ethical manner', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"
6,1.3577,1.206285,"['', 'primary outcome was bmi z - score change at the end of intervention ( six', '', '', '', 'ac dielectrophoresis ( dep ) is the motion of a', 'the aim of this study was to employ needleless electrospinning for the preparation of multi', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'the prevalence of hcv in a population of 50 countries worldwide is a significant increase', '', 'substantial progress has been made using genetic markers to elucidate the evolutionary histories of populations,', '', '', 'ac - a(2535)-nh2 is an amphipath', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to describe treatment patterns and estimate health care resource utilization and costs among japanes', '', 'objective : the aim of this study was to determine the kap of a sample of', '', 'the cocrystals are a crystalline material comprised of at least two different components', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'the genome mappability was the first issue we aimed to address in this paper. we', '', '', 'emergence of the a / h1n109 influenza virus in 2009 resulted', '', '', 'objective : to measure attitudes of health care workers towards older people in this rural county, the', '', 'anesthesia - related incidents were reported in a tertiary referral university hospital (', '', '', '', '', 'the challenge of treating diabetes in this section of population is to avoid hypoglycemia.', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'the presence and importance of theta for grid formation is therefore currently controversial and requires further study', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'the ultimate goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'the theme of infanticide has received increasing attention from historians in recent years. the theme of', '', 'the aim of the present study was to determine the biocidal release rate of biocides from', '', '', '', '', '', '', '', '', 'the present study investigated the effects of unilateral vestibular neuritis on the functional connectivity between the', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'cardiovascular diseases ( cvd ) are the main cause of diabetes - related morbid', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'background : cardiovascular disease is the fourth most common cause of death in dogs and one of the most', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the gene of cancer / testis antigens ( cta ) such as', '', '', '', 'src family kinases ( sfks ) are a', 'background : a nursing doctorate thesis was conducted in 2014 in iran to clarify the', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'ovcf is a systemic disease characterized by low bone mass, bone', 'cross - linking gd1b derived gangliosides are a', 'the effect of collapsing the high - end categories of a comor', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', 'the fourth diabetes atlas, published by the international diabetes federation ( id', 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. however, it remains unclear', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was daily exercised in an outdoor rotary exerciser ( kondi - trainer', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective. this is', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'background : interpersonal conflicts are emotionally difficult and require regulation of negative affect and behavior for successful resolution', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'dcs are the most potent antigen presenting cells and robustly induce adaptive immunity', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'background : prostate cancer is the fifth most commonly diagnosed malignancy in korean men', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'objective : the purpose of the present study was to evaluate the radiographic results of mis -', '', '', 'background : schizophrenia is often preceded by nonspecific changes in behavior, emotional state,', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is associated with lower meal frequency, more frequent snacking, and', '', '', 'peptide synthesis was performed by peptide synthesis : benzotriazole', '', 'alport syndrome ( as ) is a hereditary disorder characterized by a combination', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is neurodegenerative disorder characterized"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""objective : this study measured the impact of different levels of social support on patients'self"", '', '', '', '', '', '', 'the results of a retrospective study of 9 randomized controlled trials, including a meta', '', 'pre - mrna splicing is a highly regulated process', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'background : adolescent reproductive health outcomes in africa are the worst in the world', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pulmonary rehabilitation ( pr ) for these patients has been amply demonstrated', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'dna topoisomerases ( topii ) are enzy', '', '', '', '', '', 'the aim of this research is to ensure that clinical trials are well designed and conducted in an ethical manner', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"
7,1.3563,1.198926,"['', 'primary outcome was bmi z - score change at the end of intervention ( six', '', '', '', 'ac dielectrophoresis ( dep ) is the motion of a', 'the aim of this study was to employ needleless electrospinning for the preparation of multi', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'systematic review combining 27,881 hcc cases from 90 studies published between janu', '', 'background : we analysed 11,555 single nucleotide polymorphism (', '', '', 'a full length tau is a 3943-residue peptide with', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to describe treatment patterns and estimate health care resource utilization among japanese women', '', 'objective : the knowledge, attitude and practice ( kap ) of youths about smoking', '', 'the cocrystals are a commonplace and often used to modify the physical properties of', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'background : cnas are variations in the genome that result in either gain or loss', '', '', 'emergence of the a / h1n109 influenza virus in 2009 resulted', '', '', 'background : attitudes towards older people are a global phenomenon, and the republic of ire', '', 'anesthesia - related incidents were reported in a tertiary referral university hospital (', '', '', '', '', 'the challenge of treating diabetes in this section of population is to avoid hypoglycemia.', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'the presence and importance of theta for grid formation is therefore currently controversial and requires further study', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'the ultimate goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'the theme of infanticide has received increasing attention from historians in recent years. the theme of', '', 'the aim of the present study was to determine the release rate of biocide - free anti', '', '', '', '', '', '', '', '', 'objective : the present study investigated the effects of unilateral vestibular neuritis on the functional', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for primary', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'cardiovascular diseases ( cvd ) are the main cause of diabetes - related morbid', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'background : cardiovascular disease is the fourth most common cause of death in dogs and one of the most', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the gene of cancer / testis antigens ( cta ) such as', '', '', '', 'src family kinases ( sfks ) are a', 'background : the study was conducted to clarify the caring self - efficacy concept among pediatric', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, in place of locations', '', '', '', '', 'ovcf is a structural change in ovcf. the presence', 'cross - linking gd1b derived gangliosides are a', 'the effect of collapsing the high - end categories of a comor', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', 'the fourth diabetes atlas, published by the international diabetes federation ( id', 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. we investigated the effects of forest', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse cannulation of the horses ( four geldings of norwegian cold', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'background : the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'background : interpersonal conflicts are emotionally difficult and require regulation of negative affect and behavior for successful resolution', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'immunization of human dendritic cells ( dcs ) is', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'background : prostate cancer is the fifth most commonly diagnosed malignancy in korean men', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'objective : the purpose of the present study was to evaluate the radiographic results of mis -', '', '', 'background : schizophrenia is often preceded by nonspecific changes in behavior, emotional state,', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is associated with acculturation, snacking, and', '', '', 'peptide synthesis was performed by peptide synthesis. peptide synthesis', '', 'alport syndrome ( as ) is a hereditary disorder characterized by a combination', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""objective : this study measured the impact of different levels of social support on patients'self"", '', '', '', '', '', '', 'the results of a retrospective study of adenomyosis and bleeding after la', '', 'pre - mrna splicing is a highly regulated process', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'background : adolescent reproductive health outcomes in africa are the worst in the world', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pulmonary rehabilitation ( pr ) for these patients has been amply demonstrated', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant -', '', 'cryptosporidium parvum is one of the most important causal agents of human diarrheas during', '', '', '', '', '', 'background : pharmacodiagnostic ( theranostic ) applications can simplify the', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"
8,1.341,1.199486,"['', 'primary outcome was bmi z - score change at the end of intervention ( six', '', '', '', 'ac dielectrophoresis ( dep ) is the motion of a', 'the aim of this study was to employ needleless electrospinning for the preparation of multi', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts produce the dentin matrix and are responsible for the synthesis of', '', 'systematic review combining 27,881 hcc cases from 90 studies published between janu', '', 'background : we analysed 11,555 single nucleotide polymorphism (', '', '', 'a full length tau is a 3943-residue peptide with', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to describe treatment patterns and estimate health care resource utilization and costs among japanes', '', 'objective : the knowledge, attitude and practice ( kap ) of youths about smoking', '', 'the cocrystalline examples presented herein will possess the following criteria : ( 1 ) an', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'background : cnas are variations in the genome that result in either gain or loss', '', '', 'emergence of the a / h1n109 influenza virus in 2009 resulted', '', '', 'background : attitudes towards older people are a global phenomenon, and the republic of ire', '', 'anesthesia - related incidents were reported in a tertiary referral university hospital (', '', '', '', '', 'the challenge of treating diabetes in this section of population is to avoid hypoglycemia.', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'background : grid cells in the medial entorhinal cortex ( mec )', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'the ultimate goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'background : the theme of infanticide has received increasing attention from historians in recent years.', '', 'the aim of the present study was to determine the release rate of biocide - free anti', '', '', '', '', '', '', '', '', 'objective : the present study investigated the effects of unilateral vestibular neuritis on the functional', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'cardiovascular diseases ( cvd ) are the main cause of diabetes - related morbid', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'background : cardiovascular disease is the fourth most common cause of death in dogs and one of the most', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the gene of cancer / testis antigens ( cta ) such as', '', '', '', 'src family kinases ( sfks ) are a', 'background : the study was conducted to clarify the caring self - efficacy concept among pediatric', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, consider only autosom', '', '', '', '', 'ovcf is a structural change in ovcf. the presence', 'cross - linking gd1b derived gangliosides are a', 'objective : collapsing comorbid diseases into a single scale provides an', 'colorectal polyps are pathologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', 'background : the fourth diabetes atlas, published by the international diabetes federation (', 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'forest walking has been reported as a novel form of therapy. we investigated the effects of forest', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was fed at 06:00, 16:00, and 22:00 hours,', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'background : the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'background : interpersonal conflicts are emotionally difficult and require regulation of negative affect and behavior for successful resolution', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'immunization of human dendritic cells ( dcs ) is', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'background : prostate cancer is the fifth most commonly diagnosed malignancy in korean men', '', '', 'the fluorescence lifetimes are a function of viscosity, and the flu', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'objective : the purpose of the present study was to evaluate the radiographic results of mis -', '', '', 'background : schizophrenia is often preceded by nonspecific changes in behavior, emotional state,', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is associated with lower meal frequency, more frequent snacking, and', '', '', 'peptide synthesis was performed by peptide synthesis. peptide synthesis', '', 'alport syndrome ( as ) is a hereditary disorder characterized by a combination', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""objective : this study measured the impact of different levels of social support on patients'self"", '', '', '', '', '', '', 'the results of a retrospective study of adenomyosis and bleeding after la', '', 'pre - mrna splicing is a highly regulated process', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'background : adolescent reproductive health outcomes in africa are the worst in the world', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pulmonary rehabilitation ( pr ) for these patients has been amply demonstrated', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant /', '', 'cryptosporidium parvum is one of the most important causal agents of human diarrheas during', '', '', '', '', '', 'background : pharmacodiagnostic ( theranostic ) applications can simplify the', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"
9,1.3141,1.196486,"['', 'primary outcome was bmi z - score change at the end of intervention ( six', '', '', '', 'ac dielectrophoresis ( dep ) is the motion of a', 'the aim of this study was to employ needleless electrospinning for the preparation of multi', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts are the principal mineralized tissue of teeth. the odon', '', 'objective : in 2007, our group published a systematic review combining 27,881', '', 'background : we analysed 11,555 single nucleotide polymorphism (', '', '', 'a full length tau is a 3943-residue peptide with', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to describe treatment patterns and estimate health care resource utilization and costs among japanes', '', 'objective : the knowledge, attitude and practice ( kap ) of youths about smoking', '', 'the cocrystalline examples presented herein will possess the following criteria : ( 1 ) an', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'background : cnas are variations in the genome that result in either gain or loss', '', '', 'emergence of the a / h1n109 influenza virus in 2009 resulted', '', '', 'background : attitudes towards older people are a global phenomenon, and the republic of ire', '', 'anesthesia - related incidents were reported in a tertiary referral university hospital (', '', '', '', '', 'the challenge of treating diabetes in this section of population is to avoid hypoglycemia.', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'background : grid cells in the medial entorhinal cortex ( mec )', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'the ultimate goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'background : the theme of infanticide has received increasing attention from historians in recent years.', '', 'the aim of the present study was to determine the release rate of biocide - free anti', '', '', '', '', '', '', '', '', 'objective : the present study investigated the effects of unilateral vestibular neuritis on the functional', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'cardiovascular diseases ( cvd ) are the main cause of diabetes - related morbid', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'background : cardiovascular disease is the fourth most common cause of death in dogs and one of the most', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the gene of cancer / testis antigens ( cta ) such as', '', '', '', 'src family kinases ( sfks ) are a', 'background : the study was conducted to clarify the caring self - efficacy concept among pediatric', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, consider only autosom', '', '', '', '', 'ovcf is a structural change in ovcf. the presence', 'cross - linking gangliosides are sialic acid containing glycosphin', 'objective : collapsing comorbid diseases into a single scale provides an', 'colorectal polyps are histologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', ""background : the screening india's twin epidemic study was conducted in eight states maharash"", 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'background : forest walking has been reported as a novel form of therapy. however,', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was fed at 06:00, 16:00, and 22:00 hours,', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'background : the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'background : interpersonal conflicts are emotionally difficult and require regulation of negative affect and behavior for successful resolution', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'immunization of human dendritic cells ( dcs ) is', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'background : prostate cancer is the fifth most commonly diagnosed malignancy in korean men', '', '', 'the fluorescence lifetimes are monoexponential, and the fluorescence lifetime is', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'objective : the purpose of the present study was to evaluate the radiographic results of mis -', '', '', 'background : schizophrenia is often preceded by nonspecific changes in behavior, emotional state,', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is associated with lower meal frequency, more frequent snacking, and', '', '', 'peptide synthesis was performed by peptide synthesis : benzotriazole', '', 'alport syndrome ( as ) is a hereditary disorder characterized by a combination', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""objective : this study measured the impact of different levels of social support on patients'self"", '', '', '', '', '', '', 'the results of a retrospective study of adenomyosis and bleeding after la', '', 'pre - mrna splicing is a highly regulated process', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'background : adolescent reproductive health ( arh ) outcomes in africa are', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pulmonary rehabilitation ( pr ) for patients with copd is characterized', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant -', '', 'cryptosporidium parvum is one of the most important causal agents of human diarrheas during', '', '', '', '', '', 'background : pharmacodiagnostic ( theranostic ) applications can simplify the', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"
10,1.319,1.194219,"['', 'primary outcome was bmi z - score change at the end of intervention ( six', '', '', '', 'ac dielectrophoresis ( dep ) is the motion of a', 'the aim of this study was to employ needleless electrospinning for the preparation of multi', 'owing to demographic aging, dementia has been identified as a major economic and societal', 'chronic obstructive pulmonary disease ( copd ) is one of the most prevalent', 'pigment epithelium derived factor ( pedf ) has emerged as a potential', '', '', 's. granulatus, which belongs to the family of boletacea', 'odontoblasts are the principal mineralized tissue of teeth. the odon', '', 'objective : in 2007, our group published a systematic review combining 27,881', '', 'background : we analysed 11,555 single nucleotide polymorphism (', '', '', 'a full length tau is a 3943-residue peptide with', 'elisa kits for the determination of insulin were purchased from alpco diagnostics', '', '', 'the auditory brainstem responses ( abr ) are characterized by the following four phases', '', '', 'thrombosed native avfs underwent percutaneous restoration for 20 episodes of', '', '', '', 'objective : to describe treatment patterns and estimate health care resource utilization and costs among japanes', '', 'objective : the knowledge, attitude and practice ( kap ) of youths about smoking', '', 'the cocrystalline examples presented herein will possess the following criteria : ( 1 ) an', '', '', 'gender has been determined from pelvis, skull, and long bones, with assessment of', 'background : cnas are variations in the genome that result in either gain or loss', '', '', 'emergence of the a / h1n109 influenza virus in 2009 resulted', '', '', 'background : healthcare workers are considered to be at particular risk of developing ageist attitudes because they are', '', 'anesthesiologists and anesthesia registrars voluntarily reported complications and critical incidents', '', '', '', '', 'the challenge of treating diabetes in this section of population is to avoid hypoglycemia.', '', '', 'the anterior cervical microforaminotomy ( acmf ) technique involves not', '', '', 'background : grid cells in the medial entorhinal cortex ( mec )', '', 'lung carcinoma is the most common cause of cancer mortality world - wide. adeno', '', 'multiple sclerosis ( ms ) is one of the most frequent diseases of the central', 'dry eye is a multifactorial disorder of the ocular surface characterized by symptoms of', 'objective : a resurgence in the research of 2 agonists for sed', 'linagliptin ( tradjenta, boehringer ingel', '', '', '', 'the ultimate goal of periodontal therapy is the regeneration of periodontal tissues that have been destroyed', '', '', '', '', 'background : the theme of infanticide has received increasing attention from historians in recent years.', '', 'the aim of the present study was to determine the release rate of biocide - free anti', '', '', '', '', '', '', '', '', 'objective : the present study investigated the effects of unilateral vestibular neuritis on the functional', '', '', '', '', 'measles virus ( mv ) is a highly contagious acute viral', '', 'escrt - iii and vacuolar protein sorting ( v', 'meta - analyses of epidemiological surveys have reported that myopia is a risk factor for', '', '', 'the emergence of secondary - structure elements constitutes an early event in the chronology of folding', '', '', 'cardiovascular diseases ( cvd ) are the main cause of diabetes - related morbid', '', '', 'ruthenium compounds belong to the most promising candidates of non - platinum containing metal complex', '', '', '', 'the emergence of quinolone resistance in fecal e. coli of', 'conventional lentiviral vectors based on hiv-1 establish permanent genetic modification of target cells', '', '', '', 'background : cardiovascular disease is the fourth most common cause of death in dogs and one of the most', 'the escherichia coli groe chaperonin system facilitates protein', '', 'the gene of cancer / testis antigens ( cta ) such as', '', '', '', 'src family kinases ( sfks ) are a', 'background : the study was conducted to clarify the caring self - efficacy concept among pediatric', '', 'this study consists of elderly ( 65 years of age ) type 2 diabetic subjects who are', '', '', '', '', '', 'enhancing the binding potency of carbohydrate inhibitors of protein carbohydrate interactions is an', '', '', '', '', '', 'we consider the case of intercrosses among pairs of taxa, consider only autosom', '', '', '', '', 'ovcf is a structural change in ovcf. the presence', 'cross - linking gangliosides are sialic acid containing glycosphin', 'objective : collapsing comorbid diseases into a single scale provides an', 'colorectal polyps are histologically classified as neoplastic or', '', '', '', 'apoptosis is a distinct form of cell death characterized by a series', '', '', '', '', 'chemoprevention utilizes appropriate pharmacological agents, consumed in diverse forms like', 'fabry disease ( fd ) is an x - linked recess', '', '', ""background : the screening india's twin epidemic study was conducted in eight states maharash"", 'intermediate restorative material ( irm ) has addressed some drawbacks including moisture', '', '', '', '', '', '', '', '', '', '', '', 'background : forest walking has been reported as a novel form of therapy. however,', 'hdacis are effective drugs in the treatment of a variety of cancers,', '', '', '', '', 'the generation of global enh, global cypher, cy', 'the 5-year survival rate of the treated patients was only about 20%. the treatment of osteos', '', '', '', '', 'objective : the effectiveness of imt in patients with chronic obstructive pulmonary disease', '', '', 'microwave imaging has recently emerged as one of the most promising non - invasive imaging modalities', '', 'the horse was fed at 06:00, 16:00, and 22:00 hours,', '', 'ovarian cancer is the most lethal and second most common gynecologic', '', '', 'acinetobacter baumannii is a glucose non - fermentative gram', 'a medullary population of basophils with few granules,', 'background : the existence of conserved longevity pathways may seem counterintuitive from an evolutionary perspective', 'the 5-year survival rate of patients with larc has improved in recent years, mainly', '', '', 'background : interpersonal conflicts are emotionally difficult and require regulation of negative affect and behavior for successful resolution', 'vascular endothelial cells ( ecs ) form the inner lining', '', '', '', 'the mortality curve in hemophiliacs over the years has a biphasic', 'the marineosin ( mar ) gene cluster is a family of linear and cycl', '', 'the pulmonary toxicity of airborne nanomaterials involves exposing animals to nanomaterials by', 'immunization of human dendritic cells ( dcs ) is', '', 'nocturia is a prevalent condition of waking to void during the night that is', '', '', 'problem - based learning ( pbl ) has its roots in medical education programs', 'background : prostate cancer is the fifth most commonly diagnosed malignancy in korean men', '', '', 'the fluorescence lifetimes are a function of viscosity, and the flu', '', '', '', '', '', '', '', 'adnexa refer to the anatomical area adjacent to the uterus', 'chronic constrictive injury ( cci ) has been the common neuropathic pain model', '', '', '', 'objective : the purpose of the present study was to evaluate the radiographic results of mis -', '', '', 'background : schizophrenia is often preceded by nonspecific changes in behavior, emotional state,', '', 'new zealand white rabbits of both sexes aged between 4 and 6 months', 'acculturation is associated with lower meal frequency, more frequent snacking, and', '', '', 'peptide synthesis was performed by peptide synthesis : benzotriazole', '', 'alport syndrome ( as ) is a hereditary disorder characterized by a combination', '', '', 'migraine is a disabling neurological condition that is commonly observed to occur in an episodic manner', '', '', 'autism spectrum disorders ( asd ) are a complex group of severe neurodevelopmental disorders that', ""parkinson's disease ( pd ) is a neurodegenerative disorder"", '', '', '', '', '', '', '', '', '', '', '', 'objective : the purpose of this study was to determine if dancers demonstrated better static single', '', '', '', '', ""objective : this study measured the impact of different levels of social support on patients'self"", '', '', '', '', '', '', 'the results of a retrospective study of adenomyosis and bleeding after la', '', 'pre - mrna splicing is a highly regulated process', 'the method for this investigation was approved by our institutional review board ( irb ) for', '', '', 'background : adolescent reproductive health ( arh ) outcomes in africa are', '', 'recruitment, which began in january 1999 and ended in may 2001, took place', '', '', 'home - based pulmonary rehabilitation ( pr ) for patients with copd is characterized', '', '', '', '', '', '', '', 'a prospective clinical trial has identified a similar relationship between the location of the implant -', '', 'dna topoisomerases ( topii ) are enzy', '', '', '', '', '', 'background : pharmacodiagnostic ( theranostic ) applications can simplify the', 'weight loss is a significant adipokine, adiponectin', '', 'objective : the aim of the present study was to evaluate the effects of cigarette smoking on the', '']"


TrainOutput(global_step=15000, training_loss=1.4051358072916666, metrics={'train_runtime': 8905.0847, 'train_samples_per_second': 3.369, 'train_steps_per_second': 1.684, 'total_flos': 4.041221603328e+16, 'train_loss': 1.4051358072916666, 'epoch': 10.0})

In [None]:
test_results = trainer.evaluate(tokenized_datasets["test"])
print(test_results)

# Save the fine-tuned model
model.save_pretrained("./fine-tuned-text-summarization")
tokenizer.save_pretrained("./fine-tuned-text-summarization")

Trainer is attempting to log a value of "['the birth of a premature infant and the following neonatal intensive care unit ( nicu', 'we propose that a definition of health ( nor treatment ) is regulated in a similar', '', '', 'objective : the purpose of this review is to present updated data available on the efficacy', 'objective : the aim of this study was to investigate the effect of varied protein - energy combinations', 'tcr mice ( 21 ) were mated with rag2 mice ( 23', 'background : sprayed refuges are a compromise between sprayed and unsprayed refuge', 'the hypnotic assessment method was based on the prolongation of sleep induced by pen', 'succinic acid is classified as the most promising chemical among 12 biobased chemicals by the u', '', 'this review was based on a systematic comprehensive search of six databases : ovid', '', '', '', 'axonal excitability is a major determinant of the treatment regimen required to', 'neurological soft signs ( nss ) have long been considered one of

{'eval_loss': 1.2174769639968872, 'eval_pred': ['the birth of a premature infant and the following neonatal intensive care unit ( nicu', 'we propose that a definition of health ( nor treatment ) is regulated in a similar', '', '', 'objective : the purpose of this review is to present updated data available on the efficacy', 'objective : the aim of this study was to investigate the effect of varied protein - energy combinations', 'tcr mice ( 21 ) were mated with rag2 mice ( 23', 'background : sprayed refuges are a compromise between sprayed and unsprayed refuge', 'the hypnotic assessment method was based on the prolongation of sleep induced by pen', 'succinic acid is classified as the most promising chemical among 12 biobased chemicals by the u', '', 'this review was based on a systematic comprehensive search of six databases : ovid', '', '', '', 'axonal excitability is a major determinant of the treatment regimen required to', 'neurological soft signs ( nss ) have long been considered 

('./fine-tuned-text-summarization-specific-samples/tokenizer_config.json',
 './fine-tuned-text-summarization-specific-samples/special_tokens_map.json',
 './fine-tuned-text-summarization-specific-samples/spiece.model',
 './fine-tuned-text-summarization-specific-samples/added_tokens.json')

In [None]:
!zip -r ./fine-tuned-text-summarization.zip ./fine-tuned-text-summarization

  adding: content/fine-tuned-text-summarization-specific-samples/ (stored 0%)
  adding: content/fine-tuned-text-summarization-specific-samples/spiece.model (deflated 48%)
  adding: content/fine-tuned-text-summarization-specific-samples/config.json (deflated 62%)
  adding: content/fine-tuned-text-summarization-specific-samples/special_tokens_map.json (deflated 85%)
  adding: content/fine-tuned-text-summarization-specific-samples/tokenizer_config.json (deflated 94%)
  adding: content/fine-tuned-text-summarization-specific-samples/model.safetensors (deflated 9%)
  adding: content/fine-tuned-text-summarization-specific-samples/added_tokens.json (deflated 83%)
  adding: content/fine-tuned-text-summarization-specific-samples/generation_config.json (deflated 29%)


## Benchmarking against CompScholar


In [None]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=e755c5e6c1951487b4ec4488f7206b29736e820fb86136aea6d11506673cb0ce
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
!unzip ./fine-tuned-text-summarization.zip

Archive:  /content/fine-tuned-text-summarization-specific-samples-version7_og.zip
   creating: content/fine-tuned-text-summarization-specific-samples/
  inflating: content/fine-tuned-text-summarization-specific-samples/special_tokens_map.json  
  inflating: content/fine-tuned-text-summarization-specific-samples/config.json  
  inflating: content/fine-tuned-text-summarization-specific-samples/tokenizer_config.json  
  inflating: content/fine-tuned-text-summarization-specific-samples/model.safetensors  
  inflating: content/fine-tuned-text-summarization-specific-samples/generation_config.json  
  inflating: content/fine-tuned-text-summarization-specific-samples/spiece.model  
  inflating: content/fine-tuned-text-summarization-specific-samples/added_tokens.json  


In [None]:
import pandas as pd
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import nltk
import warnings
warnings.filterwarnings("ignore")

nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
tokenizer = AutoTokenizer.from_pretrained("/content/content/fine-tuned-text-summarization")
model = AutoModelForSeq2SeqLM.from_pretrained("/content/content/fine-tuned-text-summarization")
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)

Device set to use cpu


In [None]:
df = pd.read_csv('/content/Brain Dead CompScholar Dataset.csv')

In [None]:
def calculate_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)
    return {
        'rouge1': scores['rouge1'].fmeasure,
        'rouge2': scores['rouge2'].fmeasure,
        'rougeL': scores['rougeL'].fmeasure
    }

def calculate_bleu(reference, candidate):
    smoothie = SmoothingFunction().method1
    reference_tokens = nltk.word_tokenize(reference.lower())
    candidate_tokens = nltk.word_tokenize(candidate.lower())

    return sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)


In [None]:
results = []

print("Processing documents and evaluating...")
for _, row in df.iterrows():
    paper_id = row['Paper Id']
    document = row['Document']
    reference_summary = row['Summary']

    try:
        print(f"Generating summary for Paper ID: {paper_id}")
        generated_summary = summarizer(document, max_length=300, min_length=90, do_sample=False)
        generated_summary_text = generated_summary[0]['summary_text']

        rouge_scores = calculate_rouge(reference_summary, generated_summary_text)
        bleu_score = calculate_bleu(reference_summary, generated_summary_text)

        results.append({
            'Paper_Id': paper_id,
            'Reference_Summary': reference_summary,
            'Generated_Summary': generated_summary_text,
            'ROUGE-1': rouge_scores['rouge1'],
            'ROUGE-2': rouge_scores['rouge2'],
            'ROUGE-L': rouge_scores['rougeL'],
            'BLEU': bleu_score
        })

        print(f"Processed Paper ID: {paper_id}")
    except Exception as e:
        print(f"Error processing Paper ID {paper_id}: {str(e)}")



Token indices sequence length is longer than the specified maximum sequence length for this model (607 > 512). Running this sequence through the model will result in indexing errors


Processing documents and evaluating...
Generating summary for Paper ID: 1
Processed Paper ID: 1
Generating summary for Paper ID: 2
Processed Paper ID: 2
Generating summary for Paper ID: 3
Processed Paper ID: 3
Generating summary for Paper ID: 4
Processed Paper ID: 4
Generating summary for Paper ID: 5
Processed Paper ID: 5
Generating summary for Paper ID: 6
Processed Paper ID: 6
Generating summary for Paper ID: 7
Processed Paper ID: 7
Generating summary for Paper ID: 8
Processed Paper ID: 8
Generating summary for Paper ID: 9
Processed Paper ID: 9
Generating summary for Paper ID: 10
Processed Paper ID: 10
Generating summary for Paper ID: 11
Processed Paper ID: 11
Generating summary for Paper ID: 12
Processed Paper ID: 12
Generating summary for Paper ID: 13
Processed Paper ID: 13
Generating summary for Paper ID: 14
Processed Paper ID: 14
Generating summary for Paper ID: 15
Processed Paper ID: 15
Generating summary for Paper ID: 16
Processed Paper ID: 16
Generating summary for Paper ID: 17

Your max_length is set to 300, but your input_length is only 285. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=142)


Processed Paper ID: 186
Generating summary for Paper ID: 187
Processed Paper ID: 187
Generating summary for Paper ID: 188
Processed Paper ID: 188
Generating summary for Paper ID: 189
Processed Paper ID: 189
Generating summary for Paper ID: 190
Processed Paper ID: 190
Generating summary for Paper ID: 191
Processed Paper ID: 191
Generating summary for Paper ID: 192
Processed Paper ID: 192
Generating summary for Paper ID: 193
Processed Paper ID: 193
Generating summary for Paper ID: 194
Processed Paper ID: 194
Generating summary for Paper ID: 195
Processed Paper ID: 195
Generating summary for Paper ID: 196
Processed Paper ID: 196
Generating summary for Paper ID: 197
Processed Paper ID: 197
Generating summary for Paper ID: 198
Processed Paper ID: 198
Generating summary for Paper ID: 199
Processed Paper ID: 199
Generating summary for Paper ID: 200


Your max_length is set to 300, but your input_length is only 295. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=147)


Processed Paper ID: 200
Generating summary for Paper ID: 201
Processed Paper ID: 201
Generating summary for Paper ID: 202
Processed Paper ID: 202
Generating summary for Paper ID: 203
Processed Paper ID: 203
Generating summary for Paper ID: 204
Processed Paper ID: 204
Generating summary for Paper ID: 205
Processed Paper ID: 205
Generating summary for Paper ID: 206
Processed Paper ID: 206
Generating summary for Paper ID: 207
Processed Paper ID: 207
Generating summary for Paper ID: 208
Processed Paper ID: 208
Generating summary for Paper ID: 209
Processed Paper ID: 209
Generating summary for Paper ID: 210
Processed Paper ID: 210
Generating summary for Paper ID: 211
Processed Paper ID: 211
Generating summary for Paper ID: 212
Processed Paper ID: 212
Generating summary for Paper ID: 213
Processed Paper ID: 213
Generating summary for Paper ID: 214
Processed Paper ID: 214
Generating summary for Paper ID: 215
Processed Paper ID: 215
Generating summary for Paper ID: 216
Processed Paper ID: 216


Your max_length is set to 300, but your input_length is only 240. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=120)


Processed Paper ID: 220
Generating summary for Paper ID: 221
Processed Paper ID: 221
Generating summary for Paper ID: 222
Processed Paper ID: 222
Generating summary for Paper ID: 223
Processed Paper ID: 223
Generating summary for Paper ID: 224
Processed Paper ID: 224
Generating summary for Paper ID: 225
Processed Paper ID: 225
Generating summary for Paper ID: 226
Processed Paper ID: 226
Generating summary for Paper ID: 227
Processed Paper ID: 227
Generating summary for Paper ID: 228
Processed Paper ID: 228
Generating summary for Paper ID: 229
Processed Paper ID: 229
Generating summary for Paper ID: 230
Processed Paper ID: 230
Generating summary for Paper ID: 231
Processed Paper ID: 231
Generating summary for Paper ID: 232
Processed Paper ID: 232
Generating summary for Paper ID: 233
Processed Paper ID: 233
Generating summary for Paper ID: 234
Processed Paper ID: 234
Generating summary for Paper ID: 235
Processed Paper ID: 235
Generating summary for Paper ID: 236
Processed Paper ID: 236


Your max_length is set to 300, but your input_length is only 274. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=137)


Processed Paper ID: 243
Generating summary for Paper ID: 244
Processed Paper ID: 244
Generating summary for Paper ID: 245
Processed Paper ID: 245
Generating summary for Paper ID: 246
Processed Paper ID: 246
Generating summary for Paper ID: 247
Processed Paper ID: 247
Generating summary for Paper ID: 248
Processed Paper ID: 248
Generating summary for Paper ID: 249
Processed Paper ID: 249
Generating summary for Paper ID: 250
Processed Paper ID: 250
Generating summary for Paper ID: 251
Processed Paper ID: 251
Generating summary for Paper ID: 252
Processed Paper ID: 252
Generating summary for Paper ID: 253
Processed Paper ID: 253
Generating summary for Paper ID: 254
Processed Paper ID: 254
Generating summary for Paper ID: 255
Processed Paper ID: 255
Generating summary for Paper ID: 256
Processed Paper ID: 256
Generating summary for Paper ID: 257
Processed Paper ID: 257
Generating summary for Paper ID: 258
Processed Paper ID: 258
Generating summary for Paper ID: 259
Processed Paper ID: 259


Your max_length is set to 300, but your input_length is only 251. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=125)


Processed Paper ID: 291
Generating summary for Paper ID: 292
Processed Paper ID: 292
Generating summary for Paper ID: 293
Processed Paper ID: 293
Generating summary for Paper ID: 294
Processed Paper ID: 294
Generating summary for Paper ID: 295


Your max_length is set to 300, but your input_length is only 291. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=145)


Processed Paper ID: 295
Generating summary for Paper ID: 296
Processed Paper ID: 296
Generating summary for Paper ID: 297
Processed Paper ID: 297
Generating summary for Paper ID: 298
Processed Paper ID: 298
Generating summary for Paper ID: 299
Processed Paper ID: 299
Generating summary for Paper ID: 300
Processed Paper ID: 300
Generating summary for Paper ID: 301
Processed Paper ID: 301
Generating summary for Paper ID: 302


Your max_length is set to 300, but your input_length is only 296. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=148)


Processed Paper ID: 302
Generating summary for Paper ID: 303
Processed Paper ID: 303
Generating summary for Paper ID: 304
Processed Paper ID: 304
Generating summary for Paper ID: 305
Processed Paper ID: 305
Generating summary for Paper ID: 306
Processed Paper ID: 306
Generating summary for Paper ID: 307
Processed Paper ID: 307
Generating summary for Paper ID: 308
Processed Paper ID: 308
Generating summary for Paper ID: 309
Processed Paper ID: 309
Generating summary for Paper ID: 310
Processed Paper ID: 310
Generating summary for Paper ID: 311
Processed Paper ID: 311
Generating summary for Paper ID: 312
Processed Paper ID: 312
Generating summary for Paper ID: 313
Processed Paper ID: 313
Generating summary for Paper ID: 314
Processed Paper ID: 314
Generating summary for Paper ID: 315
Processed Paper ID: 315
Generating summary for Paper ID: 316
Processed Paper ID: 316
Generating summary for Paper ID: 317
Processed Paper ID: 317
Generating summary for Paper ID: 318
Processed Paper ID: 318


Your max_length is set to 300, but your input_length is only 232. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=116)


Processed Paper ID: 344
Generating summary for Paper ID: 345
Processed Paper ID: 345
Generating summary for Paper ID: 346
Processed Paper ID: 346
Generating summary for Paper ID: 347


Your max_length is set to 300, but your input_length is only 237. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=118)


Processed Paper ID: 347
Generating summary for Paper ID: 348


Your max_length is set to 300, but your input_length is only 215. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=107)


Processed Paper ID: 348
Generating summary for Paper ID: 349
Processed Paper ID: 349
Generating summary for Paper ID: 350
Processed Paper ID: 350
Generating summary for Paper ID: 351
Processed Paper ID: 351
Generating summary for Paper ID: 352
Processed Paper ID: 352
Generating summary for Paper ID: 353
Processed Paper ID: 353
Generating summary for Paper ID: 354
Processed Paper ID: 354
Generating summary for Paper ID: 355
Processed Paper ID: 355
Generating summary for Paper ID: 356
Processed Paper ID: 356
Generating summary for Paper ID: 357
Processed Paper ID: 357
Generating summary for Paper ID: 358
Processed Paper ID: 358
Generating summary for Paper ID: 359
Processed Paper ID: 359
Generating summary for Paper ID: 360
Processed Paper ID: 360
Generating summary for Paper ID: 361
Processed Paper ID: 361
Generating summary for Paper ID: 362
Processed Paper ID: 362
Generating summary for Paper ID: 363
Processed Paper ID: 363
Generating summary for Paper ID: 364
Processed Paper ID: 364


In [None]:
results_df = pd.DataFrame(results)

# Multiply the scores by 100 to scale them to 0-100 from 0-1
results_df['ROUGE-1'] = results_df['ROUGE-1'] * 100
results_df['ROUGE-2'] = results_df['ROUGE-2'] * 100
results_df['ROUGE-L'] = results_df['ROUGE-L'] * 100
results_df['BLEU'] = results_df['BLEU'] * 100

avg_scores = {
    'ROUGE-1': results_df['ROUGE-1'].mean(),
    'ROUGE-2': results_df['ROUGE-2'].mean(),
    'ROUGE-L': results_df['ROUGE-L'].mean(),
    'BLEU': results_df['BLEU'].mean()
}

print("\nSummary Statistics:")
for metric, score in avg_scores.items():
    print(f"Average {metric}: {score:.4f}")


print("\nDetailed Results:")
print(results_df[['Paper_Id', 'ROUGE-1', 'ROUGE-2', 'ROUGE-L', 'BLEU']])

results_df.to_csv('CompScholar_summarization_evaluation_results.csv', index=False)
print("\nResults saved to 'CompScholar_summarization_evaluation_results.csv'")


Summary Statistics:
Average ROUGE-1: 46.2173
Average ROUGE-2: 21.4349
Average ROUGE-L: 29.5130
Average BLEU: 12.3004

Detailed Results:
     Paper_Id    ROUGE-1    ROUGE-2    ROUGE-L       BLEU
0           1  61.635220  24.203822  32.704403  15.710743
1           2  37.777778   5.617978  23.333333   2.222723
2           3  48.000000  22.972973  36.000000  14.858638
3           4  50.370370  24.060150  34.074074   9.384487
4           5  50.322581  22.222222  29.677419  10.218584
..        ...        ...        ...        ...        ...
366       367  44.615385  14.062500  29.230769   9.388698
367       368  45.962733  12.578616  26.086957   3.218976
368       369  46.575342  12.500000  30.136986   7.834416
369       370  44.970414  11.976048  20.118343   2.474897
370       371  50.000000  20.253165  35.000000  18.891116

[371 rows x 5 columns]

Results saved to 'summarization_evaluation_results.csv'
