In [None]:
!pip install transformers datasets stanza

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting stanza
  Downloading stanza-1.10.1-py3-none-any.whl.metadata (13 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.3.0->stanza)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from tor

In [None]:
!unzip lessons.zip -d lessons

Archive:  lessons.zip
   creating: lessons/lessons/
  inflating: lessons/lessons/v3_l13.pkl  
  inflating: lessons/lessons/v3_l27.pkl  
  inflating: lessons/lessons/v3_l1.pkl  
  inflating: lessons/lessons/v3_l28.pkl  
  inflating: lessons/lessons/v3_l37.pkl  
  inflating: lessons/lessons/v3_l12.pkl  
  inflating: lessons/lessons/v3_l47.pkl  
  inflating: lessons/lessons/v3_l57.pkl  
  inflating: lessons/lessons/v3_l23.pkl  
  inflating: lessons/lessons/v3_l2.pkl  
  inflating: lessons/lessons/v3_l42.pkl  
  inflating: lessons/lessons/v3_l41.pkl  
  inflating: lessons/lessons/v3_l7.pkl  
  inflating: lessons/lessons/v3_l52.pkl  
  inflating: lessons/lessons/v3_l29.pkl  
  inflating: lessons/lessons/v3_l6.pkl  
  inflating: lessons/lessons/v3_l60.pkl  
  inflating: lessons/lessons/v3_l56.pkl  
  inflating: lessons/lessons/v3_l38.pkl  
  inflating: lessons/lessons/v3_l14.pkl  
  inflating: lessons/lessons/v3_l17.pkl  
  inflating: lessons/lessons/v3_l16.pkl  
  inflating: lessons/lessons

# Pretraining

In [None]:
import os
import pickle
import random
import torch
import stanza
import torch.nn as nn
from datasets import Dataset
from transformers import (
    T5Tokenizer, T5Config, Trainer, TrainingArguments,
    T5ForConditionalGeneration
)

# ──────── Device ────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ──────── Stanza setup ────────
stanza.download("en", verbose=False)
nlp = stanza.Pipeline(
    lang="en",
    processors="tokenize,pos,lemma,depparse,ner",
    tokenize_no_ssplit=True,
    use_gpu=torch.cuda.is_available()
)

# ──────── Combo‑ID utils ────────
combo2id = {}
combo_counter = 1
MAX_COMBO_ID = 1024
UNKNOWN_COMBO_ID = 0

def get_combo_id(pos, dep, ner, morph):
    global combo_counter
    key = (pos, dep, ner, morph)
    if key not in combo2id:
        if combo_counter >= MAX_COMBO_ID:
            return UNKNOWN_COMBO_ID
        combo2id[key] = combo_counter
        combo_counter += 1
    return combo2id[key]

def get_char_offset_ner_map(doc):
    ner_map = {}
    for ent in doc.ents:
        for i in range(ent.start_char, ent.end_char):
            ner_map[i] = ent.type
    return ner_map

def extract_combo_ids_from_doc(doc, tokenizer, max_len=256):
    ner_map = get_char_offset_ner_map(doc)
    combo_ids = []
    for sent in doc.sentences:
        for word in sent.words:
            pos   = word.upos or "X"
            dep   = word.deprel or "dep"
            morph = word.feats or ""
            start = word.start_char or 0
            ner   = ner_map.get(start, "O")
            cid   = get_combo_id(pos, dep, ner, morph)
            sub_len = len(tokenizer(word.text).input_ids) - 1
            combo_ids.extend([cid] * sub_len)
    return combo_ids[:max_len]

# ──────── Simple span‑masking ────────
def simple_t5_mask(text):
    words = text.strip().split()
    if len(words) < 4:
        return text, text
    span_len = random.randint(1, min(3, len(words) - 1))
    start = random.randint(0, len(words) - span_len)
    masked = words[:start] + ["<extra_id_0>"] + words[start + span_len:]
    masked_text = " ".join(masked)
    target_text = "<extra_id_0> " + " ".join(words[start:start + span_len])
    return masked_text, target_text

# ──────── Model ────────
class SyntaxT5(T5ForConditionalGeneration):
    def __init__(self, config, combo_vocab_size=MAX_COMBO_ID):
        super().__init__(config)
        self.shared       = nn.Embedding(config.vocab_size, config.d_model)
        self.encoder.embed_tokens = self.shared
        self.decoder.embed_tokens = self.shared
        self.combo_embed  = nn.Embedding(combo_vocab_size, config.d_model)
        self.fuse_proj    = nn.Linear(2 * config.d_model, config.d_model)
        self.stored_combo_ids = None

    def prepare_inputs_for_generation(
        self, input_ids, past_key_values=None, attention_mask=None,
        use_cache=None, encoder_outputs=None, **kwargs
    ):
        full_combo = self.stored_combo_ids
        if past_key_values is None:
            combo_ids = full_combo
        else:
            combo_ids = full_combo[:, -1].unsqueeze(-1)
        return {
            "decoder_input_ids":  input_ids,
            "attention_mask":     attention_mask,
            "encoder_outputs":    encoder_outputs,
            "past_key_values":    past_key_values,
            "use_cache":          use_cache,
            "combo_ids":          combo_ids,
        }

    def forward(
        self, input_ids=None, inputs_embeds=None, attention_mask=None,
        decoder_input_ids=None, decoder_attention_mask=None,
        encoder_outputs=None, past_key_values=None, labels=None,
        use_cache=None, **kwargs
    ):
        # generation path
        if encoder_outputs is not None:
            return super().forward(
                encoder_outputs=encoder_outputs,
                past_key_values=past_key_values,
                attention_mask=attention_mask,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
                labels=labels,
                use_cache=use_cache
            )
        # training/encode path
        combo_ids = kwargs.get("combo_ids", self.stored_combo_ids)
        if combo_ids is None and input_ids is not None:
            combo_ids = torch.zeros_like(input_ids)
        tok_emb = inputs_embeds if inputs_embeds is not None else self.shared(input_ids)
        cmb_emb = self.combo_embed(combo_ids.to(tok_emb.device))
        fused  = torch.cat([tok_emb, cmb_emb], dim=-1)
        inputs_embeds = self.fuse_proj(fused)
        return super().forward(
            inputs_embeds=inputs_embeds,
            attention_mask=attention_mask,
            decoder_input_ids=decoder_input_ids,
            decoder_attention_mask=decoder_attention_mask,
            labels=labels,
            use_cache=use_cache,
            past_key_values=past_key_values
        )

# ──────── Data Collator ────────
class DataCollatorWithCombo:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def __call__(self, batch):
        return {
            "input_ids":      torch.tensor([ex["input_ids"]      for ex in batch], dtype=torch.long),
            "attention_mask": torch.tensor([ex["attention_mask"] for ex in batch], dtype=torch.long),
            "labels":         torch.tensor([ex["labels"]         for ex in batch], dtype=torch.long),
            "combo_ids":      torch.tensor([ex["combo_ids"]      for ex in batch], dtype=torch.long),
        }

# ──────── Preprocess ────────
def preprocess(example, tokenizer):
    src = tokenizer(
        example["input"],
        padding="max_length",
        truncation=True,
        max_length=256
    )
    tgt = tokenizer(
        example["output"],
        padding="max_length",
        truncation=True,
        max_length=64
    )
    combo = example["combo_ids"]
    combo += [UNKNOWN_COMBO_ID] * (256 - len(combo))
    src["combo_ids"] = combo[:256]
    src["labels"]    = [
        t if t != tokenizer.pad_token_id else -100
        for t in tgt["input_ids"]
    ]
    return src

# ──────── Prediction Cleaning ────────
def clean_prediction(raw_pred, tokenizer):
    return raw_pred.replace(tokenizer.pad_token, "")\
                   .replace(tokenizer.eos_token, "")\
                   .strip()

# ──────── Evaluation ────────
def evaluate(model, tokenizer, dataset):
    model.eval()
    correct = 0
    for ex in dataset:
        inp  = ex["input"]
        gold = ex["output"].strip()
        enc = tokenizer(
            inp, return_tensors="pt",
            padding=True, truncation=True, max_length=256
        ).to(device)
        combo = ex["combo_ids"] + [UNKNOWN_COMBO_ID] * (256 - len(ex["combo_ids"]))
        model.stored_combo_ids = torch.tensor([combo], device=device)
        with torch.no_grad():
            out_ids = model.generate(
                input_ids=enc["input_ids"],
                attention_mask=enc["attention_mask"],
                max_new_tokens=20,
                do_sample=False,
                use_cache=True
            )
        raw  = tokenizer.decode(out_ids[0], skip_special_tokens=False)
        pred = clean_prediction(raw, tokenizer)
        if pred == gold:
            correct += 1
    print(f"✅ Eval Accuracy: {correct}/{len(dataset)} = {correct/len(dataset):.2f}")
    torch.cuda.empty_cache()

# ──────── Training per‐level ────────
def train_level(dataset, tokenizer, model, args, level):
    print(f"\n🔁 Training curriculum level {level}")
    tokenized = dataset.map(
        lambda ex: preprocess(ex, tokenizer),
        remove_columns=["input", "output", "combo_ids"]
    )
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tokenized,
        tokenizer=tokenizer,
        data_collator=DataCollatorWithCombo(tokenizer)
    )
    trainer.train()
    evaluate(model, tokenizer, dataset)
    del trainer
    torch.cuda.empty_cache()

# ──────── Setup model/tokenizer/args ────────
tokenizer = T5Tokenizer.from_pretrained("t5-base")
config    = T5Config.from_pretrained("t5-base")
model     = SyntaxT5(config=config, combo_vocab_size=MAX_COMBO_ID).to(device)

args = TrainingArguments(
    output_dir="./t5_syntax_curriculum",
    per_device_train_batch_size=16,
    num_train_epochs=1,
    logging_steps=5,
    save_strategy="no",
    eval_strategy="no",
    report_to="none",
    remove_unused_columns=False
)

# ──────── Build curriculum from your lesson files ────────
pkl_folder = "./lessons/lessons"
curriculum = []

import re

def extract_vol_lesson(filename):
    match = re.match(r'v(\d+)_l(\d+)\.pkl', filename)
    return (int(match.group(1)), int(match.group(2))) if match else (999, 999)

all_files = os.listdir(pkl_folder)
pkl_files = sorted(
    [f for f in all_files if re.match(r'v\d+_l\d+\.pkl', f)],
    key=extract_vol_lesson
)

for fname in pkl_files:
    if not fname.endswith(".pkl"):
        continue
    with open(os.path.join(pkl_folder, fname), "rb") as f:
        docs = pickle.load(f)  # list of stanza.Document
    examples = []
    for d in docs:
        masked, target = simple_t5_mask(d.text)
        combo_ids = extract_combo_ids_from_doc(d, tokenizer, max_len=256)
        examples.append({
            "input":     masked,
            "output":    target,
            "combo_ids": combo_ids
        })
    ds = Dataset.from_list(examples)
    curriculum.append((fname, ds))
    print(f"✅ Loaded {fname} ({len(examples)} examples)")

# ──────── Run curriculum ────────
for i, (lesson_name, level_ds) in enumerate(curriculum, start=1):
    if i==3: break
    print(f"▶︎ Lesson {lesson_name}")
    train_level(level_ds, tokenizer, model, args, i)

# ──────── Final quick test ────────
def generate(text):
    model.eval()
    enc = tokenizer(
        text, return_tensors="pt",
        padding=True, truncation=True, max_length=256
    ).to(device)
    combo = extract_combo_ids_from_doc(nlp(text), tokenizer, max_len=256)
    combo += [UNKNOWN_COMBO_ID] * (256 - len(combo))
    model.stored_combo_ids = torch.tensor([combo], device=device)
    with torch.no_grad():
        out_ids = model.generate(
            input_ids=enc["input_ids"],
            attention_mask=enc["attention_mask"],
            max_new_tokens=20,
            do_sample=False,
            use_cache=True
        )
    raw = tokenizer.decode(out_ids[0], skip_special_tokens=False)
    return clean_prediction(raw, tokenizer)

print("\n🧪 Final Test Generation:")
test_text = "They are <extra_id_0> the car at the <extra_id_1>."
print("INPUT :", test_text)
print("OUTPUT:", generate(test_text))


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


✅ Loaded v3_l1.pkl (61 examples)
✅ Loaded v3_l10.pkl (59 examples)
✅ Loaded v3_l11.pkl (62 examples)
✅ Loaded v3_l12.pkl (71 examples)
✅ Loaded v3_l13.pkl (62 examples)
✅ Loaded v3_l14.pkl (60 examples)
✅ Loaded v3_l15.pkl (61 examples)
✅ Loaded v3_l16.pkl (65 examples)
✅ Loaded v3_l17.pkl (59 examples)
✅ Loaded v3_l18.pkl (50 examples)
✅ Loaded v3_l19.pkl (52 examples)
✅ Loaded v3_l2.pkl (62 examples)
✅ Loaded v3_l20.pkl (66 examples)
✅ Loaded v3_l21.pkl (73 examples)
✅ Loaded v3_l22.pkl (60 examples)
✅ Loaded v3_l23.pkl (64 examples)
✅ Loaded v3_l24.pkl (57 examples)
✅ Loaded v3_l25.pkl (67 examples)
✅ Loaded v3_l26.pkl (49 examples)
✅ Loaded v3_l27.pkl (63 examples)
✅ Loaded v3_l28.pkl (64 examples)
✅ Loaded v3_l29.pkl (60 examples)
✅ Loaded v3_l3.pkl (48 examples)
✅ Loaded v3_l30.pkl (66 examples)
✅ Loaded v3_l31.pkl (64 examples)
✅ Loaded v3_l32.pkl (62 examples)
✅ Loaded v3_l33.pkl (70 examples)
✅ Loaded v3_l34.pkl (60 examples)
✅ Loaded v3_l35.pkl (59 examples)
✅ Loaded v3_l36.p

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

  trainer = Trainer(
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Step,Training Loss


✅ Eval Accuracy: 0/61 = 0.00
▶︎ Lesson v3_l10.pkl

🔁 Training curriculum level 2


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

Step,Training Loss


✅ Eval Accuracy: 0/59 = 0.00

🧪 Final Test Generation:
INPUT : They are <extra_id_0> the car at the <extra_id_1>.
OUTPUT: <extra_id_0> <extra_id_0>


In [None]:
# Assuming `model` and `tokenizer` are already loaded/fine-tuned
save_dir = "my_syntax_gpt_model"

model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)


('my_syntax_gpt_model/tokenizer_config.json',
 'my_syntax_gpt_model/special_tokens_map.json',
 'my_syntax_gpt_model/spiece.model',
 'my_syntax_gpt_model/added_tokens.json')

In [None]:
save_dir = "my_syntax_gpt_model"

model = SyntaxT5.from_pretrained(save_dir).to(device)
tokenizer = T5Tokenizer.from_pretrained(save_dir)


# Trec classification

In [None]:
import os, pickle, torch
from datasets import load_dataset, Dataset
from torch.utils.data import Dataset as TorchDataset
from transformers import (
    T5Tokenizer, T5Config, T5ForConditionalGeneration,
    Trainer, TrainingArguments
)
from sklearn.metrics import accuracy_score, recall_score, f1_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ─── Tokenizer & Model ───
tokenizer = T5Tokenizer.from_pretrained("t5-base")
label_names = ["ABBR", "ENTY", "DESC", "HUM", "LOC", "NUM"]
label_tokens = [f"<LABEL_{i}>" for i in range(6)]
tokenizer.add_tokens(label_tokens)
label_map = {i: label_tokens[i] for i in range(6)}  # int → token
inv_label_map = {v: label_names[i] for i, v in label_map.items()}

# ─── Combo utils ───
combo2id, combo_counter = {}, 1
MAX_COMBO_ID, UNKNOWN_COMBO_ID = 1024, 0

def get_combo_id(pos, dep, ner, morph):
    global combo_counter
    key = (pos, dep, ner, morph)
    if key not in combo2id:
        if combo_counter >= MAX_COMBO_ID:
            return UNKNOWN_COMBO_ID
        combo2id[key] = combo_counter
        combo_counter += 1
    return combo2id[key]

def get_char_offset_ner_map(doc):
    ner_map = {}
    for ent in doc.ents:
        for i in range(ent.start_char, ent.end_char):
            ner_map[i] = ent.type
    return ner_map

def extract_combo_ids_from_doc(doc, tokenizer, max_len=64):
    ner_map = get_char_offset_ner_map(doc)
    combo_ids = []
    for sent in doc.sentences:
        for w in sent.words:
            pos, dep = w.upos or "X", w.deprel or "dep"
            morph = w.feats or ""
            start = w.start_char or 0
            ner = ner_map.get(start, "O")
            cid = get_combo_id(pos, dep, ner, morph)
            sub_len = len(tokenizer(w.text).input_ids) - 1
            combo_ids.extend([cid] * sub_len)
            if len(combo_ids) >= max_len:
                break
        if len(combo_ids) >= max_len:
            break
    combo_ids = combo_ids[:max_len]
    combo_ids += [UNKNOWN_COMBO_ID] * (max_len - len(combo_ids))
    return combo_ids

# ─── Load Dataset & Preparsed .pkl ───
trec = load_dataset("CogComp/trec")
with open("trec_train_docs.pkl", "rb") as f: train_docs = pickle.load(f)
with open("trec_test_docs.pkl", "rb") as f:  test_docs  = pickle.load(f)

train_exs, test_exs = [], []
for i, doc in enumerate(train_docs):
    train_exs.append({
        "text": doc.text,
        "coarse_label": trec["train"][i]["coarse_label"],
        "combo_ids": extract_combo_ids_from_doc(doc, tokenizer)
    })
for i, doc in enumerate(test_docs):
    test_exs.append({
        "text": doc.text,
        "coarse_label": trec["test"][i]["coarse_label"],
        "combo_ids": extract_combo_ids_from_doc(doc, tokenizer)
    })

train_ds = Dataset.from_list(train_exs)
test_ds = Dataset.from_list(test_exs)

# ─── Preprocessing ───
def preprocess_clf(examples):
    inputs = [f"{q}" for q in examples["text"]]
    targets = [label_map[l] for l in examples["coarse_label"]]

    model_inputs = tokenizer(inputs, max_length=64, padding="max_length", truncation=True)
    with tokenizer.as_target_tokenizer():
        lbl = tokenizer(targets, max_length=1, padding="max_length", truncation=False)

    model_inputs["labels"] = [
        [seq[0]] + [-100]*(len(seq)-1) for seq in lbl["input_ids"]
    ]
    model_inputs["combo_ids"] = examples["combo_ids"]
    return model_inputs

train_tok = train_ds.map(preprocess_clf, batched=True, remove_columns=["text", "coarse_label", "combo_ids"])
test_tok  = test_ds.map(preprocess_clf, batched=True, remove_columns=["text", "coarse_label", "combo_ids"])
train_tok.set_format("torch")
test_tok.set_format("torch")

# ─── Dataset wrapper ───
class ClfDataset(TorchDataset):
    def __init__(self, ds): self.ds = ds
    def __len__(self): return len(self.ds)
    def __getitem__(self, i):
        ex = self.ds[i]
        return {
            "input_ids": ex["input_ids"],
            "attention_mask": ex["attention_mask"],
            "labels": ex["labels"],
            "combo_ids": ex["combo_ids"]
        }

train_torch = ClfDataset(train_tok)
test_torch = ClfDataset(test_tok)

# ─── Data collator ───
def collate_batch(batch):
    return {
        "input_ids":      torch.stack([b["input_ids"] for b in batch]),
        "attention_mask": torch.stack([b["attention_mask"] for b in batch]),
        "labels":         torch.stack([b["labels"] for b in batch]),
        "combo_ids":      torch.stack([b["combo_ids"] for b in batch]),
    }

# ─── Train ───
args = TrainingArguments(
    output_dir="./t5_trec_labeltok",
    eval_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    save_strategy="no",
    logging_steps=5,
    report_to="none",
    remove_unused_columns=False,
    dataloader_pin_memory=False
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_torch,
    eval_dataset=test_torch,
    data_collator=collate_batch
)

trainer.train()
trainer.evaluate()

# ─── Inference: Predict top-1 class ───
def classify(question: str, combo_ids: list[int]) -> str:
    prompt = f"{question}"
    enc = tokenizer(prompt, return_tensors="pt", max_length=64, padding="max_length", truncation=True)
    model.stored_combo_ids = torch.tensor([combo_ids], device=device)
    with torch.no_grad():
        out = model.generate(
            input_ids=enc.input_ids.to(device),
            attention_mask=enc.attention_mask.to(device),
            max_new_tokens=1,
            use_cache=True
        )
    tok = tokenizer.decode(out[0][1], skip_special_tokens=False).strip()
    return inv_label_map.get(tok, "???")

# ─── Evaluation ───
y_true, y_pred = [], []
for ex in test_exs:
    y_true.append(label_names[ex["coarse_label"]])
    y_pred.append(classify(ex["text"], ex["combo_ids"]))
    print(y_true[-1], y_pred[-1])

print("Accuracy:", accuracy_score(y_true, y_pred))
print("Recall  :", recall_score(y_true, y_pred, average="macro"))
print("F1 Score:", f1_score(y_true, y_pred, average="macro"))


Using device: cuda


Map:   0%|          | 0/5452 [00:00<?, ? examples/s]



Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,0.9647,0.807304
2,0.4889,0.464246
3,0.5526,0.465286
4,0.435,0.474992
5,0.0545,0.646216
6,0.2098,0.744881
7,0.0375,0.82681
8,0.2126,0.963507
9,0.0452,1.093731
10,0.0002,1.081455


NUM HUM
LOC HUM
HUM HUM
DESC HUM
NUM HUM
NUM HUM
HUM HUM
ENTY HUM
DESC HUM
DESC HUM
LOC HUM
HUM HUM
NUM HUM
HUM HUM
NUM HUM
NUM HUM
ENTY HUM
HUM HUM
DESC HUM
NUM HUM
HUM HUM
DESC HUM
LOC HUM
DESC HUM
DESC HUM
HUM HUM
DESC HUM
LOC HUM
LOC HUM
LOC HUM
NUM HUM
LOC HUM
DESC HUM
NUM HUM
NUM HUM
NUM HUM
LOC HUM
NUM HUM
NUM HUM
NUM HUM
ENTY HUM
DESC HUM
DESC HUM
DESC HUM
ENTY HUM
ENTY HUM
NUM HUM
DESC HUM
NUM HUM
HUM HUM
DESC HUM
HUM HUM
HUM HUM
DESC HUM
DESC HUM
DESC HUM
NUM HUM
LOC HUM
LOC HUM
NUM HUM
LOC HUM
HUM HUM
LOC HUM
ENTY HUM
LOC HUM
HUM HUM
ENTY HUM
DESC HUM
NUM HUM
LOC HUM
NUM HUM
NUM HUM
LOC HUM
HUM HUM
LOC HUM
DESC HUM
ENTY HUM
NUM HUM
NUM HUM
HUM HUM
DESC HUM
NUM HUM
HUM HUM
NUM HUM
NUM HUM
DESC HUM
DESC HUM
HUM HUM
DESC HUM
LOC HUM
ENTY HUM
DESC HUM
NUM HUM
NUM HUM
HUM HUM
LOC HUM
NUM HUM
DESC HUM
DESC HUM
ENTY HUM
DESC HUM
HUM HUM
DESC HUM
HUM HUM
LOC HUM
DESC HUM
NUM HUM
ENTY HUM
NUM HUM
LOC HUM
ENTY HUM
DESC HUM
NUM HUM
HUM HUM
ENTY HUM
HUM HUM
NUM HUM
ENTY HUM
NUM HUM
LOC 