In [3]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [4]:
!pip install accelerate openai tiktoken transformers torch  retry bitsandbytes

Collecting accelerate
  Downloading accelerate-0.32.1-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.1/314.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai
  Downloading openai-1.35.14-py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.5/328.5 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
Collecting retry
  Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading

## Hàm tạo Prompt

In [12]:

def create_prompt(text):
  prompt = f"""
Analyze the following scientific text as an expert in natural language processing and information extraction. Without relying on predefined categories, identify the key concepts, entities, and relationships between them that are crucial for understanding the scientific content. Your analysis should capture the essence of the research described, including any methods, tasks, materials, or metrics mentioned.

Present your findings in a structured format that clearly distinguishes between:
1. ner: Important entities or concepts as objects. Each object has a text and a type.
2. relations: Significant relationships or interactions between these entities as objects. Each object has a head, a tail and a type
Directionality is taken into account except for symmetric relation types.
3. types: provide a dictionary of types and their meanings.

Ensure your response is based solely on the information explicitly stated in the text, avoiding any inferences or predictions beyond what is directly presented.
Use your expertise to determine the most appropriate labels and categories based on the content of the text.


Text to analyze:
English is shown to be trans-context-free on the basis of coordinations of the respectively type that involve strictly syntactic cross-serial agreement. The agreement in question involves number in nouns and reflexive pronouns and is syntactic rather than semantic in nature because grammatical number in English, like grammatical gender in languages such as French, is partly arbitrary. The formal proof, which makes crucial use of the Interchange Lemma of Ogden et al., is so constructed as to be valid even if English is presumed to contain grammatical sentences in which respectively operates across a pair of coordinate phrases one of whose members has fewer conjuncts than the other; it thus goes through whatever the facts may be regarding constructions with unequal numbers of conjuncts in the scope of respectively, whereas other arguments have foundered on this problem.
Extracted results:
{{
  "ner": [
    {{"text": "English", "type": "Material"}},
    {{"text": "coordinations", "type": "OtherScientificTerm"}},
    {{"text": "cross-serial agreement", "type": "OtherScientificTerm"}},
    {{"text": "agreement", "type": "Generic"}},
    {{"text": "number", "type": "OtherScientificTerm"}},
    {{"text": "nouns and reflexive pronouns", "type": "OtherScientificTerm"}},
    {{"text": "grammatical number", "type": "OtherScientificTerm"}},
    {{"text": "English", "type": "Material"}},
    {{"text": "grammatical gender", "type": "OtherScientificTerm"}},
    {{"text": "French", "type": "Material"}},
    {{"text": "Interchange Lemma", "type": "Method"}},
    {{"text": "English", "type": "Material"}}
  ],
  "relations": [
    {{
      "text": "number",
      "tail": "nouns and reflexive pronouns",
      "type": "CONJUNCTION"
    }},
    {{
      "head": "grammatical gender",
      "tail": "French",
      "type": "FEATURE-OF"
    }},
    {{
      "head": "English",
      "tail": "French",
      "type": "HYPONYM-OF"
    }}
  ],
  "types": {{
    "Method": "Methods , models, systems to use, or tools, components of a system, frameworks.",
    "Material": "Data, datasets, resources, Corpus, Knowledge base.",
    "Other Scientific Terms": "Phrases that are a scientific terms but do not fall into any of the above classes.",
    "Generic": "General terms or pronouns that may refer to a entity but are not themselves informative, often used as connection words.",
    "FEATURE-OF": "attribute or characteristic",
    "HYPONYM-OF": "specific type or instance",
    "CONJUNCTION":"related or used together."
  }}
}}
IMPORTANT:
- Format your response as a JSON-like structure.
- The "text", "head", "tail" and "type" field for each entity or relation must always be a single string, never an array.
- If an entity consists of multiple words or phrases, combine them into a single string.
- Do not include any information other than the required JSON structure.
- No clarification is needed except for the types dictionary


Now, please extract the relationships from the following text:
  Text: {text}

  Extracted Results:

  """
  return prompt


# Chọn ngẫu nhiên mẫu

In [6]:
import json
def read_json_file(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line.strip()))
    return data
import random


def get_random_samples(input_list, num_samples=30):
    """
    Get random samples from the input list.

    Args:
    input_list (list): The list to sample from.
    num_samples (int): The number of samples to return. Default is 20.

    Returns:
    list: A list of random samples.
    """
    if len(input_list) < num_samples:
        raise ValueError(f"Input list must have at least {num_samples} elements.")

    return random.sample(input_list, num_samples)
file_path = '/content/gdrive/My Drive/llama3_data/SciErc/scierc.json'
json_data = read_json_file(file_path)

last_samples = get_random_samples(json_data)
def create_text_from_sample(data):
    # Ghép các từ trong mỗi câu
    sentences = [' '.join(sentence) for sentence in data['sentences']]

    # Ghép các câu thành một đoạn văn bản
    full_text = ' '.join(sentences)

    return full_text
def extract_entities_and_relations(data):
    all_tokens = [token for sentence in data['sentences'] for token in sentence]

    entities = []
    for ner_sentence in data['ner']:
        for start, end, entity_type in ner_sentence:
            entity_text = ' '.join(all_tokens[start:end+1])
            entities.append({"text": entity_text, "type": entity_type})

    relations = []
    for relation_sentence in data['relations']:
        for head_start, head_end, tail_start, tail_end, relation_type in relation_sentence:
            head_text = ' '.join(all_tokens[head_start:head_end+1])
            tail_text = ' '.join(all_tokens[tail_start:tail_end+1])
            relations.append({
                "head": head_text,
                "tail": tail_text,
                "type": relation_type
            })

    return {
        "ner": entities,
        "relations": relations
    }



## Hàm đánh giá kết quả

In [7]:
from difflib import SequenceMatcher
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
import numpy as np
from scipy.spatial.distance import cosine



semantic_check_model_name = "bert-base-uncased"
semantic_check_tokenizer = AutoTokenizer.from_pretrained(semantic_check_model_name)
semantic_check_model = AutoModel.from_pretrained(semantic_check_model_name)

relation_meanings = {
    "USED-FOR": "utilization or application",
    "FEATURE-OF": "attribute or characteristic",
    "HYPONYM-OF": "specific type or instance",
    "PART-OF": "component or element",
    "COMPARE": "contrast or evaluation",
    "CONJUNCTION": "related or used together"
}

ner_meanings = {
    "Method": "Methods , models, systems to use, or tools, components of a system, frameworks.",
    "Material": "Data, datasets, resources, Corpus, Knowledge base.",
    "Other Scientific Terms": "Phrases that are a scientific terms but do not fall into any of the above classes.",
    "Generic": "General terms or pronouns that may refer to a entity but are not themselves informative, often used as connection words.",
    "Evaluation Metric": "Metrics, measures, or entities that can express quality of a system/method.",
    "Task": "Applications, problems to solve, systems to construct."
  }


def get_word_embedding(word):

    inputs = semantic_check_tokenizer(word, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = semantic_check_model(**inputs)

    return outputs.last_hidden_state[0][0].numpy()

def semantic_similarity(word1, word2):
    emb1 = get_word_embedding(word1)
    emb2 = get_word_embedding(word2)
    return 1 - cosine(emb1, emb2)

def text_similarity(text1, text2):
    #print(text1)
    #print(text2)
    return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()


def calculate_ner_similarity(pred, gt, sem_dict):
    text_sim = 1 if text_similarity(pred['text'], gt['text']) > 0.4 else 0
    type_sim = semantic_similarity(sem_dict.get(pred['type'], pred['type']), ner_meanings.get(gt['type'], gt['type']))
    return (text_sim + type_sim) / 2

def calculate_relation_similarity(pred, gt, sem_dict):
    head_sim = 1 if text_similarity(pred['head'], gt['head']) > 0.4 else 0
    tail_sim = 1 if text_similarity(pred['tail'], gt['tail']) > 0.4 else 0
    type_sim = semantic_similarity(sem_dict.get(pred['type'], pred['type']), relation_meanings.get(gt['type'].lower(), gt['type']))
    return (head_sim + tail_sim + type_sim) / 3

def calculate_f1(predicted, ground_truth, sem_dict, task='ner', threshold=0.7):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    pred_items = predicted[task]
    gt_items = ground_truth[task]

    similarity_func = calculate_ner_similarity if task == 'ner' else calculate_relation_similarity

    for pred in pred_items:
        best_score = 0
        for gt in gt_items:
            similarity = similarity_func(pred, gt, sem_dict)
            best_score = max(best_score, similarity)

        if best_score >= threshold:
            true_positives += 1
        else:
            false_positives += 1

    for gt in gt_items:
        best_score = 0
        for pred in pred_items:
            similarity = similarity_func(pred, gt, sem_dict)
            best_score = max(best_score, similarity)

        if best_score < threshold:
            false_negatives += 1

    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0

    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    return precision, recall, f1


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

## Xử lý output

In [8]:
def parse_llm_output(output):
    # Tìm phần JSON trong đầu ra của LLM
    json_start = output.find('{')
    json_end = output.rfind('}') + 1
    json_str = output[json_start:json_end]
    parsed_json = json.loads(json_str)
    return parsed_json



In [9]:
model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)


tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

# Chạy từng sample

In [10]:
def run_each_samples(sample_data):
  text = create_text_from_sample(sample_data)
  ground_truth = extract_entities_and_relations(sample_data)

  prompt = create_prompt(text)
  #print(prompt)
  messages = [
    {"role": "system", "content": prompt },
  ]

  input_ids = tokenizer.apply_chat_template(
      messages,
      add_generation_prompt=True,
      return_tensors="pt"
  ).to(model.device)

  terminators = [
      tokenizer.eos_token_id,
      tokenizer.convert_tokens_to_ids("<|eot_id|>")
  ]
  outputs = model.generate(
    input_ids,
    max_new_tokens=3000,
    eos_token_id=terminators,
    do_sample=False,
    temperature=0.0,
  )
  response = outputs[0][input_ids.shape[-1]:]
  llm_output = tokenizer.decode(response, skip_special_tokens=True)
  print(llm_output)
  llm_results = parse_llm_output(llm_output)
  sem_dict = llm_results["types"]
  print(llm_results)
  return {
      "ner": calculate_f1(llm_results, ground_truth,sem_dict, task='ner'),
      "relations": calculate_f1(llm_results, ground_truth,sem_dict, task='relations')
  }



In [13]:

total_precision = 0
total_recall = 0
total_f1 = 0
total_count
for i, sample in enumerate(last_samples, 1):
    try:
      res = run_each_samples(sample)
    except Exception as e:
      print(f"Error in sample {i}")
      print(e)
      continue
    print(res)
    ner_precision, ner_recall, ner_f1 = res['ner']
    relations_precision, relations_recall, relations_f1 = res['relations']
    total_precision += ner_precision + relations_precision
    total_recall += ner_recall + relations_recall
    total_f1 += ner_f1 + relations_f1
    total_count += 1
    print(total_precision)
    print(total_recall)
    print(total_f1)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Here are the extracted results in the required JSON-like structure:

{
  "ner": [
    {"text": "Chinese-to-English SMT model", "type": "Method"},
    {"text": "word sense disambiguation performance", "type": "OtherScientificTerm"},
    {"text": "standard WSD evaluation methodology", "type": "Method"},
    {"text": "datasets", "type": "Material"},
    {"text": "Senseval-3 Chinese lexical sample task", "type": "Material"},
    {"text": "WSD", "type": "OtherScientificTerm"},
    {"text": "BLEU scores", "type": "OtherScientificTerm"},
    {"text": "statistical machine translation", "type": "OtherScientificTerm"},
    {"text": "SMT models", "type": "Method"},
    {"text": "dedicated WSD models", "type": "Method"},
    {"text": "WSD accuracy", "type": "OtherScientificTerm"},
    {"text": "current typical SMT models", "type": "Method"},
    {"text": "dedicated WSD models", "type": "Method"},
    {"text": "speculative claims", "type": "OtherScientificTerm"}
  ],
  "relations": [
    {
      "h

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 0.875, 0.9333333333333333), 'relations': (0.3333333333333333, 0.2727272727272727, 0.3)}
1.3333333333333333
1.1477272727272727
1.2333333333333334
Here are the extracted results:

{
  "ner": [
    {"text": "bit-vector-based CKY-style parser", "type": "Method"},
    {"text": "CKY-style parser", "type": "Method"},
    {"text": "context-free parsing", "type": "OtherScientificTerm"},
    {"text": "parse forest representation", "type": "OtherScientificTerm"},
    {"text": "complete set of possible analyses", "type": "OtherScientificTerm"},
    {"text": "large treebank grammars", "type": "Material"},
    {"text": "long input sentences", "type": "Material"},
    {"text": "bit-vector operations", "type": "Method"},
    {"text": "basic parsing operations", "type": "OtherScientificTerm"},
    {"text": "parser", "type": "Method"},
    {"text": "analyses", "type": "Generic"}
  ],
  "relations": [
    {
      "head": "parser",
      "tail": "bit-vector-based CKY-style parser",
      "ty

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.8181818181818182, 1.0, 0.9), 'relations': (0.16666666666666666, 0.25, 0.2)}
2.3181818181818183
2.3977272727272725
2.3333333333333335
Here are the extracted results:

{
  "ner": [
    {"text": "projective reconstruction", "type": "OtherScientificTerm"},
    {"text": "multiple images", "type": "OtherScientificTerm"},
    {"text": "camera", "type": "Material"},
    {"text": "straight line", "type": "OtherScientificTerm"},
    {"text": "determination", "type": "Generic"},
    {"text": "3D geometrical configuration", "type": "OtherScientificTerm"},
    {"text": "3D points", "type": "OtherScientificTerm"},
    {"text": "cameras", "type": "OtherScientificTerm"},
    {"text": "correspondences", "type": "OtherScientificTerm"},
    {"text": "image coordinates", "type": "OtherScientificTerm"},
    {"text": "configuration", "type": "OtherScientificTerm"},
    {"text": "projective transform", "type": "OtherScientificTerm"},
    {"text": "critical configuration", "type": "OtherScientificT

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Error in sample 3
'head'
Here are the extracted results:

{
  "ner": [
    {"text": "word sense disambigation models", "type": "Method"},
    {"text": "statistical machine translation quality", "type": "Generic"},
    {"text": "subject", "type": "Generic"},
    {"text": "Chinese word sense disambiguation model", "type": "Method"},
    {"text": "IBM statistical MT system", "type": "Material"},
    {"text": "statistical machine translation system", "type": "Material"},
    {"text": "error analysis", "type": "Method"},
    {"text": "statistical MT architectures", "type": "Material"}
  ],
  "relations": [
    {
      "head": "word sense disambigation models",
      "tail": "statistical machine translation quality",
      "type": "INVESTIGATES"
    },
    {
      "head": "Chinese word sense disambiguation model",
      "tail": "IBM statistical MT system",
      "type": "USES"
    },
    {
      "head": "word sense disambiguation models",
      "tail": "statistical machine translation system

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.875, 1.0, 0.9333333333333333), 'relations': (0.75, 0.6, 0.6666666666666665)}
3.9431818181818183
3.9977272727272726
3.9333333333333336
Here are the extracted results:

{
  "ner": [
    {"text": "problem", "type": "OtherScientificTerm"},
    {"text": "blind source separation", "type": "OtherScientificTerm"},
    {"text": "instantaneous linear mixtures", "type": "OtherScientificTerm"},
    {"text": "mixing matrix", "type": "OtherScientificTerm"},
    {"text": "sparsity", "type": "OtherScientificTerm"},
    {"text": "signal dictionary", "type": "OtherScientificTerm"},
    {"text": "multi scale transforms", "type": "OtherScientificTerm"},
    {"text": "wavelet", "type": "OtherScientificTerm"},
    {"text": "wavelet packets", "type": "OtherScientificTerm"},
    {"text": "signals", "type": "OtherScientificTerm"},
    {"text": "local features", "type": "OtherScientificTerm"},
    {"text": "algorithm", "type": "Method"},
    {"text": "noise-free", "type": "Material"},
    {"text": "n

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Error in sample 5
'head'
Here are the extracted results:

{
  "ner": [
    {"text": "English text", "type": "Material"},
    {"text": "American Sign Language", "type": "Material"},
    {"text": "animation", "type": "OtherScientificTerm"},
    {"text": "MT", "type": "Method"},
    {"text": "semantic representation", "type": "OtherScientificTerm"},
    {"text": "virtual reality", "type": "OtherScientificTerm"},
    {"text": "3D scene modeling software", "type": "Method"},
    {"text": "classifier predicates", "type": "OtherScientificTerm"},
    {"text": "interlingua", "type": "OtherScientificTerm"},
    {"text": "multi-pathway MT architecture design", "type": "Method"},
    {"text": "transfer", "type": "OtherScientificTerm"},
    {"text": "direct approaches", "type": "OtherScientificTerm"}
  ],
  "relations": [
    {
      "head": "English text",
      "tail": "American Sign Language",
      "type": "TRANSLATION-OF"
    },
    {
      "head": "semantic representation",
      "tail": "vir

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.8333333333333334, 0.7692307692307693, 0.8), 'relations': (0.0, 0.0, 0)}
4.776515151515151
4.766958041958042
4.733333333333333
Here are the extracted results in the required JSON-like structure:

{
  "ner": [
    {"text": "explanation system", "type": "Method"},
    {"text": "domain knowledge", "type": "Material"},
    {"text": "multisentential discourse plans", "type": "OtherScientificTerm"},
    {"text": "text", "type": "Material"},
    {"text": "complex phenomena", "type": "Generic"},
    {"text": "computational mechanisms", "type": "Method"},
    {"text": "explanation generation", "type": "OtherScientificTerm"},
    {"text": "semantically rich", "type": "OtherScientificTerm"},
    {"text": "large-scale knowledge bases", "type": "Material"},
    {"text": "botanical anatomy", "type": "Material"},
    {"text": "physiology", "type": "Material"},
    {"text": "development", "type": "Material"},
    {"text": "evaluation methodology", "type": "Method"},
    {"text": "domain expe

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.7857142857142857, 1.0, 0.88), 'relations': (0.16666666666666666, 0.1111111111111111, 0.13333333333333333)}
5.728896103896104
5.878069153069154
5.746666666666667
Here are the extracted results:

{
  "ner": [
    {"text": "program", "type": "Method"},
    {"text": "tagged text corpus", "type": "Material"},
    {"text": "subcategorization frames", "type": "OtherScientificTerm"},
    {"text": "verb", "type": "OtherScientificTerm"},
    {"text": "training corpus", "type": "Material"},
    {"text": "False positive rates", "type": "OtherScientificTerm"},
    {"text": "NLP community", "type": "Material"},
    {"text": "subcategorization dictionary", "type": "Material"}
  ],
  "relations": [
    {
      "head": "program",
      "tail": "tagged text corpus",
      "type": "USES"
    },
    {
      "head": "program",
      "tail": "subcategorization frames",
      "type": "GENERATES"
    },
    {
      "head": "completeness",
      "tail": "output list",
      "type": "ATTRIBUTE-OF"
  

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.75, 1.0, 0.8571428571428571), 'relations': (0.0, 0.0, 0)}
6.478896103896104
6.878069153069154
6.603809523809524
Here are the extracted results:

{
  "ner": [
    {"text": "sentence analysis", "type": "OtherScientificTerm"},
    {"text": "defeasible reasoning", "type": "OtherScientificTerm"},
    {"text": "Japanese sentence analyses", "type": "OtherScientificTerm"},
    {"text": "argumentation system", "type": "Method"},
    {"text": "Konolige", "type": "Author"},
    {"text": "formalization", "type": "OtherScientificTerm"},
    {"text": "arguments", "type": "OtherScientificTerm"},
    {"text": "defeat rules", "type": "OtherScientificTerm"}
  ],
  "relations": [
    {
      "head": "sentence analysis",
      "tail": "defeasible reasoning",
      "type": "TREATMENT-AS"
    },
    {
      "head": "Japanese sentence analyses",
      "tail": "argumentation system",
      "type": "USE-OF"
    },
    {
      "head": "defeasible reasoning",
      "tail": "argumentation system",
    

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.875, 1.0, 0.9333333333333333), 'relations': (0.3333333333333333, 0.125, 0.18181818181818182)}
7.687229437229437
8.003069153069154
7.718961038961039
Here are the extracted results:

{
  "ner": [
    {"text": "paper", "type": "Material"},
    {"text": "IE paradigm", "type": "Method"},
    {"text": "predicate-argument structures", "type": "OtherScientificTerm"},
    {"text": "novel", "type": "Generic"},
    {"text": "customizable", "type": "Generic"},
    {"text": "IE", "type": "Method"},
    {"text": "way", "type": "Generic"},
    {"text": "automatically identifying", "type": "Generic"},
    {"text": "predicate argument structures", "type": "OtherScientificTerm"},
    {"text": "central", "type": "Generic"},
    {"text": "IE paradigm", "type": "Method"},
    {"text": "extended set of features", "type": "OtherScientificTerm"},
    {"text": "inductive decision tree learning", "type": "Method"},
    {"text": "experimental results", "type": "Material"},
    {"text": "IE results", "

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.6666666666666666, 1.0, 0.8), 'relations': (0.0, 0.0, 0)}
8.353896103896103
9.003069153069154
8.51896103896104
Here are the extracted results:

{
  "ner": [
    {"text": "Named Entity task", "type": "OtherScientificTerm"},
    {"text": "corpora", "type": "OtherScientificTerm"},
    {"text": "statistical profile", "type": "OtherScientificTerm"},
    {"text": "algorithm", "type": "Method"},
    {"text": "lower bound estimation", "type": "OtherScientificTerm"},
    {"text": "Named Entity corpora", "type": "Material"},
    {"text": "cross-lingual comparisons", "type": "OtherScientificTerm"},
    {"text": "analysis", "type": "Method"}
  ],
  "relations": [
    {
      "head": "statistical profile",
      "tail": "Named Entity task",
      "type": "DESCRIPTION-OF"
    },
    {
      "head": "algorithm",
      "tail": "lower bound estimation",
      "type": "IMPLEMENTATION-OF"
    },
    {
      "head": "analysis",
      "tail": "Named Entity corpora",
      "type": "ANALYSIS-OF"
  

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.875, 1.0, 0.9333333333333333), 'relations': (0.5, 0.4, 0.4444444444444445)}
9.728896103896103
10.403069153069154
9.896738816738818
Here are the extracted results:

{
  "ner": [
    {"text": "PCFG-LA", "type": "Method"},
    {"text": "parse trees", "type": "OtherScientificTerm"},
    {"text": "PCFG", "type": "Method"},
    {"text": "non-terminal symbols", "type": "OtherScientificTerm"},
    {"text": "latent variables", "type": "OtherScientificTerm"},
    {"text": "CFG rules", "type": "OtherScientificTerm"},
    {"text": "parsed corpus", "type": "Material"},
    {"text": "EM-algorithm", "type": "Method"},
    {"text": "PCFG-LA", "type": "Method"},
    {"text": "Penn WSJ corpus", "type": "Material"},
    {"text": "PCFG parser", "type": "Method"}
  ],
  "relations": [
    {
      "head": "PCFG-LA",
      "tail": "parse trees",
      "type": "GENERATES"
    },
    {
      "head": "PCFG-LA",
      "tail": "CFG rules",
      "type": "INDUCES"
    },
    {
      "head": "EM-algorith

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 0.6875, 0.8148148148148148), 'relations': (0.2, 0.07692307692307693, 0.1111111111111111)}
10.928896103896102
11.16749222999223
10.822664742664744
Here are the extracted results:

{
  "ner": [
    {"text": "convolutional neural networks", "type": "Method"},
    {"text": "stochastic attention-based models", "type": "Method"},
    {"text": "image locations", "type": "OtherScientificTerm"},
    {"text": "computational efficiency", "type": "Generic"},
    {"text": "posterior inference", "type": "OtherScientificTerm"},
    {"text": "stochastic gradient estimates", "type": "OtherScientificTerm"},
    {"text": "Wake-Sleep Recurrent Attention Model", "type": "Method"},
    {"text": "stochastic attention networks", "type": "Method"},
    {"text": "image classification", "type": "Material"},
    {"text": "caption generation", "type": "Material"}
  ],
  "relations": [
    {
      "head": "convolutional neural networks",
      "tail": "image locations",
      "type": "USES"
    },
   

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 0.6666666666666666, 0.8), 'relations': (0.42857142857142855, 0.25, 0.3157894736842105)}
12.357467532467531
12.084158896658897
11.938454216348955
Here are the extracted results:

{
  "ner": [
    {"text": "classifying", "type": "Generic"},
    {"text": "high-dimensional sequence data", "type": "Material"},
    {"text": "traditional methods", "type": "OtherScientificTerm"},
    {"text": "HMMs", "type": "OtherScientificTerm"},
    {"text": "CRFs", "type": "OtherScientificTerm"},
    {"text": "training data", "type": "Material"},
    {"text": "dimensionality reduction", "type": "Method"},
    {"text": "low-dimensional representation", "type": "OtherScientificTerm"},
    {"text": "classification", "type": "Generic"},
    {"text": "Existing methods", "type": "OtherScientificTerm"},
    {"text": "supervised dimensionality reduction", "type": "Method"},
    {"text": "data", "type": "Material"},
    {"text": "neighborhood graph structure", "type": "OtherScientificTerm"},
    {"tex

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.7931034482758621, 0.92, 0.851851851851852), 'relations': (0.375, 0.3, 0.33333333333333326)}
13.525570980743392
13.304158896658898
13.12363940153414


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Here are the extracted results:

{
  "ner": [
    {"text": "Chart parsing", "type": "Method"},
    {"text": "directional", "type": "OtherScientificTerm"},
    {"text": "starting point", "type": "OtherScientificTerm"},
    {"text": "sentence", "type": "Material"},
    {"text": "chart", "type": "OtherScientificTerm"},
    {"text": "islands", "type": "OtherScientificTerm"},
    {"text": "fragments", "type": "OtherScientificTerm"},
    {"text": "heuristics", "type": "Method"}
  ],
  "relations": [
    {
      "head": "Chart parsing",
      "tail": "starting point",
      "type": "STARTS-FROM"
    },
    {
      "head": "Chart parsing",
      "tail": "sentence",
      "type": "EXTENDS-IN"
    },
    {
      "head": "chart",
      "tail": "islands",
      "type": "WORKS-OUTWARD-FROM"
    },
    {
      "head": "process",
      "tail": "fragments",
      "type": "EXTENDS-TO"
    },
    {
      "head": "heuristics",
      "tail": "missing fragments",
      "type": "PREDICTS"
    }
  ],
  "type

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.7777777777777778, 1.0, 0.8750000000000001), 'relations': (0.3333333333333333, 0.4, 0.3636363636363636)}
14.636682091854503
14.704158896658898
14.362275765170503
Here are the extracted results:

{
  "ner": [
    {"text": "single-image highlight removal method", "type": "Method"},
    {"text": "illumination-based constraints", "type": "OtherScientificTerm"},
    {"text": "image in-painting", "type": "OtherScientificTerm"},
    {"text": "highlight pixels", "type": "OtherScientificTerm"},
    {"text": "traditional inpainting", "type": "OtherScientificTerm"},
    {"text": "observed pixel colors", "type": "OtherScientificTerm"},
    {"text": "highlight color analysis", "type": "OtherScientificTerm"},
    {"text": "illumination color uniformity", "type": "OtherScientificTerm"},
    {"text": "underlying diffuse color", "type": "OtherScientificTerm"},
    {"text": "shading", "type": "OtherScientificTerm"},
    {"text": "textures", "type": "OtherScientificTerm"},
    {"text": "our met

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.9230769230769231, 0.9230769230769231, 0.9230769230769231), 'relations': (0.8, 0.5, 0.6153846153846154)}
16.359759014931427
16.12723581973582
15.900737303632042
Here are the extracted results:

{
  "ner": [
    {"text": "language genre", "type": "OtherScientificTerm"},
    {"text": "spoken language", "type": "OtherScientificTerm"},
    {"text": "lyrics in music", "type": "OtherScientificTerm"},
    {"text": "SMT-style stochastic transduction grammar", "type": "Method"},
    {"text": "hip hop lyrics", "type": "Material"},
    {"text": "challenge-response system", "type": "Method"},
    {"text": "rhyming lyrics", "type": "OtherScientificTerm"},
    {"text": "input", "type": "Generic"},
    {"text": "previous efforts", "type": "OtherScientificTerm"},
    {"text": "domain", "type": "Generic"},
    {"text": "hip hop lyrics", "type": "Material"},
    {"text": "novel feature", "type": "OtherScientificTerm"},
    {"text": "approach", "type": "Method"},
    {"text": "linguistic or pho

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.75, 1.0, 0.8571428571428571), 'relations': (0.14285714285714285, 0.1111111111111111, 0.125)}
17.25261615778857
17.23834693084693
16.8828801607749
Here are the extracted results:

{
  "ner": [
    {"text": "German pronouns", "type": "OtherScientificTerm"},
    {"text": "ensemble learning", "type": "Method"},
    {"text": "Boosting", "type": "Method"},
    {"text": "classifiers", "type": "OtherScientificTerm"},
    {"text": "decision-tree classifier", "type": "Method"},
    {"text": "system", "type": "Material"},
    {"text": "text", "type": "Material"},
    {"text": "preprocessing modules", "type": "Method"},
    {"text": "manual annotation process", "type": "Method"},
    {"text": "textual domain", "type": "Material"},
    {"text": "open-domain question answering", "type": "Task"},
    {"text": "text summarisation", "type": "Task"}
  ],
  "relations": [
    {
      "head": "ensemble learning",
      "tail": "German pronouns",
      "type": "APPLIES-TO"
    },
    {
      "he

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 0.8571428571428571, 0.923076923076923), 'relations': (0.4444444444444444, 0.3076923076923077, 0.3636363636363637)}
18.697060602233012
18.403182095682094
18.169593447488186
Here are the extracted results:

{
  "ner": [
    {"text": "framework", "type": "Method"},
    {"text": "distance", "type": "OtherScientificTerm"},
    {"text": "concepts", "type": "OtherScientificTerm"},
    {"text": "distributional measures", "type": "OtherScientificTerm"},
    {"text": "word co-occurrences", "type": "OtherScientificTerm"},
    {"text": "categories", "type": "OtherScientificTerm"},
    {"text": "thesaurus", "type": "Material"},
    {"text": "concept-concept matrix", "type": "OtherScientificTerm"},
    {"text": "traditional distributional word-distance measures", "type": "OtherScientificTerm"},
    {"text": "word pairs", "type": "Generic"},
    {"text": "semantic distance", "type": "OtherScientificTerm"},
    {"text": "tasks", "type": "Generic"},
    {"text": "ranking", "type": "Generi

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Error in sample 20
'head'
Here are the extracted results:

{
  "ner": [
    {"text": "paper", "type": "Material"},
    {"text": "correlation", "type": "OtherScientificTerm"},
    {"text": "dependency relation paths", "type": "OtherScientificTerm"},
    {"text": "candidate answers", "type": "OtherScientificTerm"},
    {"text": "answer extraction", "type": "Method"},
    {"text": "correlation measure", "type": "Method"},
    {"text": "dependency relations", "type": "OtherScientificTerm"},
    {"text": "candidate answer", "type": "OtherScientificTerm"},
    {"text": "mapped question phrases", "type": "OtherScientificTerm"},
    {"text": "sentence", "type": "OtherScientificTerm"},
    {"text": "question", "type": "OtherScientificTerm"},
    {"text": "previous studies", "type": "Material"},
    {"text": "approximate phrase mapping algorithm", "type": "Method"},
    {"text": "mapping score", "type": "OtherScientificTerm"},
    {"text": "correlation measure", "type": "Method"},
    {"text": "

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Error in sample 21
'head'
Here are the extracted results in the required JSON-like structure:

{
  "ner": [
    {"text": "Hidden Markov Model", "type": "Method"},
    {"text": "HMM", "type": "Method"},
    {"text": "chunk tagger", "type": "Method"},
    {"text": "named entity recognition", "type": "Task"},
    {"text": "NE", "type": "Task"},
    {"text": "NER", "type": "Task"},
    {"text": "names", "type": "OtherScientificTerm"},
    {"text": "times", "type": "OtherScientificTerm"},
    {"text": "numerical quantities", "type": "OtherScientificTerm"},
    {"text": "HMM", "type": "Method"},
    {"text": "internal and external evidences", "type": "OtherScientificTerm"},
    {"text": "simple deterministic internal feature", "type": "OtherScientificTerm"},
    {"text": "capitalization and digitalization", "type": "OtherScientificTerm"},
    {"text": "internal semantic feature", "type": "OtherScientificTerm"},
    {"text": "important triggers", "type": "OtherScientificTerm"},
    {"text": "

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.9130434782608695, 1.0, 0.9545454545454545), 'relations': (0.25, 0.3, 0.2727272727272727)}
19.860104080493883
19.703182095682095
19.396866174760913
Here are the extracted results:

{
  "ner": [
    {"text": "algorithm", "type": "Method"},
    {"text": "redundancy elimination problem", "type": "OtherScientificTerm"},
    {"text": "semantic representation", "type": "OtherScientificTerm"},
    {"text": "scope ambiguity", "type": "OtherScientificTerm"},
    {"text": "underspecified semantic representation", "type": "OtherScientificTerm"},
    {"text": "chart representations", "type": "OtherScientificTerm"},
    {"text": "dominance graphs", "type": "OtherScientificTerm"},
    {"text": "large-scale grammars", "type": "Material"},
    {"text": "corpus", "type": "Material"},
    {"text": "USRs", "type": "OtherScientificTerm"}
  ],
  "relations": [
    {
      "head": "algorithm",
      "tail": "redundancy elimination problem",
      "type": "SOLVES"
    },
    {
      "head": "algori

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 0.7692307692307693, 0.8695652173913044), 'relations': (0.5, 0.3333333333333333, 0.4)}
21.360104080493883
20.805746198246197
20.666431392152216
Here are the extracted results:

{
  "ner": [
    {"text": "SUMMIT", "type": "Material"},
    {"text": "project", "type": "OtherScientificTerm"},
    {"text": "phonetically-based spoken language understanding system", "type": "OtherScientificTerm"},
    {"text": "heuristic rules", "type": "OtherScientificTerm"},
    {"text": "knowledge engineering", "type": "OtherScientificTerm"},
    {"text": "speech knowledge", "type": "OtherScientificTerm"},
    {"text": "formal framework", "type": "OtherScientificTerm"},
    {"text": "mathematical tools", "type": "OtherScientificTerm"},
    {"text": "features", "type": "OtherScientificTerm"},
    {"text": "decision strategies", "type": "OtherScientificTerm"},
    {"text": "speech data", "type": "Material"},
    {"text": "system", "type": "Material"},
    {"text": "paper", "type": "Material"}
  

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.9230769230769231, 1.0, 0.9600000000000001), 'relations': (0.16666666666666666, 0.16666666666666666, 0.16666666666666666)}
22.449847670237475
21.972412864912865
21.79309805881888
Here are the extracted results:

{
  "ner": [
    {"text": "N-gram models", "type": "Method"},
    {"text": "statistical language modeling", "type": "FieldOfStudy"},
    {"text": "artificial neural networks", "type": "Method"},
    {"text": "language model", "type": "Entity"},
    {"text": "standard statistical methods", "type": "Method"}
  ],
  "relations": [
    {
      "head": "N-gram models",
      "tail": "statistical language modeling",
      "type": "RELATED_TO"
    },
    {
      "head": "artificial neural networks",
      "tail": "language model",
      "type": "USED_TO_CREATE"
    },
    {
      "head": "neural network",
      "tail": "standard statistical methods",
      "type": "COMPARES_TO"
    }
  ],
  "types": {
    "Method": "Methods, models, systems to use, or tools, components of a 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 1.0, 1.0), 'relations': (1.0, 1.0, 1.0)}
24.449847670237475
23.972412864912865
23.79309805881888
Here are the extracted results:

{
  "ner": [
    {"text": "automatic phonetic transcriptions", "type": "OtherScientificTerm"},
    {"text": "manually verified phonetic transcriptions", "type": "OtherScientificTerm"},
    {"text": "corpus-based study", "type": "OtherScientificTerm"},
    {"text": "classification experiment", "type": "Method"},
    {"text": "transcription types", "type": "Generic"},
    {"text": "speech processes", "type": "OtherScientificTerm"},
    {"text": "canonical transcription", "type": "OtherScientificTerm"},
    {"text": "classifiers", "type": "Method"},
    {"text": "unknown transcriptions", "type": "OtherScientificTerm"},
    {"text": "read speech", "type": "Generic"},
    {"text": "telephone dialogues", "type": "Generic"},
    {"text": "situational settings", "type": "OtherScientificTerm"},
    {"text": "classification accuracy", "type": "Metric"}
 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (1.0, 0.8125, 0.896551724137931), 'relations': (0.4, 0.10526315789473684, 0.16666666666666666)}
25.849847670237473
24.8901760228076
24.856316449623478
Here are the extracted results:

{
  "ner": [
    {"text": "Sources of training data", "type": "OtherScientificTerm"},
    {"text": "language modeling", "type": "OtherScientificTerm"},
    {"text": "conversational speech", "type": "OtherScientificTerm"},
    {"text": "training data", "type": "OtherScientificTerm"},
    {"text": "text from the web", "type": "OtherScientificTerm"},
    {"text": "target recognition task", "type": "OtherScientificTerm"},
    {"text": "class-dependent interpolation", "type": "OtherScientificTerm"},
    {"text": "N-grams", "type": "OtherScientificTerm"},
    {"text": "paper", "type": "Material"}
  ],
  "relations": [
    {
      "head": "training data",
      "tail": "text from the web",
      "type": "SUPPLEMENTATION"
    },
    {
      "head": "training data",
      "tail": "class-dependent interpola

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.5555555555555556, 1.0, 0.7142857142857143), 'relations': (0.0, 0.0, 0)}
26.40540322579303
25.8901760228076
25.570602163909193
Here are the extracted results:

{
  "ner": [
    {"text": "bispectral photo-metric stereo", "type": "Method"},
    {"text": "fluorescence", "type": "OtherScientificTerm"},
    {"text": "shape reconstruction", "type": "Task"},
    {"text": "fluorescence", "type": "OtherScientificTerm"},
    {"text": "natural gems and corals", "type": "Material"},
    {"text": "fluorescent dyes used in clothing", "type": "Material"},
    {"text": "wavelength-shifting behavior", "type": "OtherScientificTerm"},
    {"text": "fluorescent materials", "type": "OtherScientificTerm"},
    {"text": "light", "type": "OtherScientificTerm"},
    {"text": "wavelengths", "type": "OtherScientificTerm"},
    {"text": "fluorescence", "type": "OtherScientificTerm"},
    {"text": "algorithms", "type": "OtherScientificTerm"},
    {"text": "computer vision", "type": "Field"},
    {"text":

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.9130434782608695, 0.875, 0.8936170212765957), 'relations': (0.18181818181818182, 0.18181818181818182, 0.18181818181818182)}
27.50026488587208
26.946994204625785
26.64603736700397
Here are the extracted results in the required JSON-like structure:

{
  "ner": [
    {"text": "speech-based depression detection", "type": "Material"},
    {"text": "depression detection", "type": "Material"},
    {"text": "noise", "type": "OtherScientificTerm"},
    {"text": "reverberation", "type": "OtherScientificTerm"},
    {"text": "MFCCs", "type": "OtherScientificTerm"},
    {"text": "DOCCs", "type": "OtherScientificTerm"},
    {"text": "2014 AudioVisual Emotion Recognition Challenge", "type": "Material"},
    {"text": "AVEC", "type": "Material"},
    {"text": "additive noise", "type": "OtherScientificTerm"},
    {"text": "evaluation metrics", "type": "OtherScientificTerm"},
    {"text": "artificial neural networks", "type": "Method"},
    {"text": "support vector regression", "type": "Method

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


{'ner': (0.875, 0.7777777777777778, 0.823529411764706), 'relations': (0.5, 0.2222222222222222, 0.30769230769230765)}
28.87526488587208
27.946994204625785
27.777259086460983
Here are the extracted results:

{
  "ner": [
    {"text": "approach", "type": "Method"},
    {"text": "intrinsic texture properties", "type": "OtherScientificTerm"},
    {"text": "albedo", "type": "OtherScientificTerm"},
    {"text": "shading", "type": "OtherScientificTerm"},
    {"text": "normal", "type": "OtherScientificTerm"},
    {"text": "scenes", "type": "Material"},
    {"text": "multiple view acquisition", "type": "Method"},
    {"text": "unknown illumination conditions", "type": "OtherScientificTerm"},
    {"text": "intrinsic textures", "type": "OtherScientificTerm"},
    {"text": "pixel-resolution surface textures", "type": "OtherScientificTerm"},
    {"text": "intrinsic appearance parameters", "type": "OtherScientificTerm"},
    {"text": "previous video relighting methods", "type": "Method"},
    {"text"

## Kết quả

In [15]:
total_precision /= 2*total_count
total_recall /= 2*total_count
total_f1 /= 2*total_count

print(f"Average Precision: {total_precision:.4f}")
print(f"Average Recall: {total_recall:.4f}")
print(f"Average F1 Score: {total_f1:.4f}")


Average Precision: 0.5607
Average Recall: 0.5389
Average F1 Score: 0.5370
