# Tagging Balanced COPA

In [1]:
import calamancy
import pandas as pd

In [4]:
tagger = calamancy.Tagger("tl_calamancy_md-0.2.0")

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [3]:
df_bcopa = pd.read_csv("../datasets/translated/google/google_translated_bcopa.csv")
df_bcopa.head()

Unnamed: 0,id,premise,choice1,choice2,question,label,mirrored
0,1283,Hirap na hirap na makatulog ang batang lalaki.,Binuksan niya ang nightlight niya.,Nanood siya ng pelikulang horror.,sanhi,1,1
1,207,Pinalayas ang mga nangungupahan sa apartment.,Nakaligtaan nilang bayaran ang kanilang upa.,Nagkasundo sila ng kanilang kasero.,sanhi,1,0
2,1317,Hindi mapakali ang lalaki kinaumagahan.,Nag-camping siya sa kakahuyan.,Nakatulog siya sa kanyang sopa.,sanhi,2,1
3,1006,Nanalo ang pulitiko sa eleksyon.,Walang bumoto sa kanya.,Nagpatakbo siya ng mga negatibong patalastas s...,sanhi,2,1
4,170,Natapon ang alikabok mula sa mesa.,Tinanggal ko ang mga libro mula sa mesa.,Pinunasan ko ang mesa gamit ang isang tela.,sanhi,2,0


In [5]:
def get_sentence_form(text: str, tagger=tagger) -> int:
    
    data = list(tagger(text))
    first_index = next((i for i, (word, (pos, _)) in enumerate(data) if word == "ay" and pos == "PART"), None)

    # 2. Logic to determine KA vs DKA
    if first_index is None:
        
        # SCENARIO A: No "ay" found
        print("Structure: Karaniwang Ayos (KA)")
        print("Reason: No inversion marker 'ay' detected.")
        return 1
        
    else:
        # SCENARIO B: "ay" found
        # We must ensure 'ay' isn't the very first word (which would be an interjection like "Ay! nauntog ako")
        if first_index > 0:
            print("Structure: Di-Karaniwang Ayos (DKA)")
            return 0
            
        else:
            print("Structure: Ambiguous (Likely KA with Interjection)")
            print("Reason: Found 'ay' but it was at the start of the sentence.")
            return 2

## Google

In [7]:
df_bcopa["premise_form"] = df_bcopa["premise"].apply(get_sentence_form)

Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Di-Karaniwang Ayos (DKA)
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay'

In [8]:
df_bcopa["choice1_form"] = df_bcopa["choice1"].apply(get_sentence_form)


Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayo

In [9]:
df_bcopa["choice2_form"] = df_bcopa["choice2"].apply(get_sentence_form)

Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayos (KA)
Reason: No inversion marker 'ay' detected.
Structure: Karaniwang Ayo

In [10]:
df_bcopa.to_csv("../datasets/labeled/google/google_labeled_bcopa.csv", index=False)