In [2]:
import pandas as pd
import numpy as np
import pyarrow as pa

In [3]:
# Load the saved parquet data
parquet_df = pd.read_parquet('../data/processed/messages.parquet')
print(parquet_df.head())

     ID                                         Clean_Text  \
0  7403  BARDEFU 2 IN 1 Multi purpose juicer ኳሊቲ የሆነ የጁ...   
1  7401  portable electrical water dispenser ባለ 3 press...   
2  7399  GROOMING SET ሶስት በአንድ የያዘ የፀጉር ማሽን እና ሼቨር የሚሰራ...   
3  7395  GROOMING SET ሶስት በአንድ የያዘ የፀጉር ማሽን እና ሼቨር የሚሰራ...   
4  7393  1L Water Bottle High Quality 1L water time sca...   

                                              Tokens  \
0  [BARDEFU, 2, IN, 1, Multi, purpose, juicer, ኳሊ...   
1  [portable, electrical, water, dispenser, ባለ, 3...   
2  [GROOMING, SET, ሶስት, በአንድ, የያዘ, የፀጉር, ማሽን, እና,...   
3  [GROOMING, SET, ሶስት, በአንድ, የያዘ, የፀጉር, ማሽን, እና,...   
4  [1L, Water, Bottle, High, Quality, 1L, water, ...   

                                           Processed  \
0  [BARDEFU, 2, IN, 1, Multi, purpose, juicer, ኳሊ...   
1  [portable, electrical, water, dispenser, ባለ, 3...   
2  [GROOMING, SET, ሶስት, በአንድ, የያዘ, የፀጉር, ማሽን, እና,...   
3  [GROOMING, SET, ሶስት, በአንድ, የያዘ, የፀጉር, ማሽን, እና,...   
4  [1L, Wa

In [None]:

# Select 30–50 messages to label
subset = parquet_df[['ID', 'Processed']].dropna().head(30)

# Convert stringified token lists (if necessary)
if isinstance(subset.iloc[0]['Processed'], str):
    import ast
    subset['Processed'] = subset['Processed'].apply(ast.literal_eval)

# Labeling function
def label_tokens(tokens):
    print("\nInstructions:")
    print(" - Use: B-Product, I-Product, B-LOC, I-LOC, B-PRICE, I-PRICE, O")
    print(" - Press Enter to assign 'O' (Outside) to a token\n")
    
    labeled_tokens = []
    for tok in tokens:
        label = input(f"{tok}: ").strip()
        if label == "":
            label = "O"
        labeled_tokens.append((tok, label))
    return labeled_tokens

# Save labeled data to CoNLL format
with open("../data/processed/amharic_ner_conll.txt", "w", encoding="utf-8") as f:
    for _, row in subset.iterrows():
        tokens = row['Processed']
        labeled = label_tokens(tokens)
        for token, tag in labeled:
            f.write(f"{token} {tag}\n")
        f.write("\n")  # Blank line separates messages

print("✅ Done! Saved 50 labeled messages to 'amharic_ner_conll.txt'")



Instructions:
 - Use: B-Product, I-Product, B-LOC, I-LOC, B-PRICE, I-PRICE, O
 - Press Enter to assign 'O' (Outside) to a token


Instructions:
 - Use: B-Product, I-Product, B-LOC, I-LOC, B-PRICE, I-PRICE, O
 - Press Enter to assign 'O' (Outside) to a token



In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd

subset = parquet_df[['ID', 'Processed']].dropna().head(30)  

labels_list = ['B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE', 'O','B-CONTACT','I-CONTACT']

# File path to save labeled data
output_path = '../data/processed/amharic_ner_conll_labeled.txt'

def label_message(tokens, idx=0):
    dropdowns = []
    
    # Create dropdowns for each token
    for tok in tokens:
        dd = widgets.Dropdown(options=labels_list, value='O', description=tok)
        dropdowns.append(dd)
    
    btn = widgets.Button(description="Submit Labels")
    out = widgets.Output()
    
    def on_submit(b):
        with out:
            clear_output()
            labels = [dd.value for dd in dropdowns]
            for token, label in zip(tokens, labels):
                print(f"{token} : {label}")

            # Save current labeled tokens to file (append mode)
            with open(output_path, 'a', encoding='utf-8') as f:
                for token, label in zip(tokens, labels):
                    f.write(f"{token} {label}\n")
                f.write('\n')  # blank line to separate messages
            
            print(f"\nLabeled message {idx+1}/{len(subset)} saved!")
            
            # Move to next message or finish
            if idx + 1 < len(subset):
                clear_output(wait=True)
                label_message(subset.iloc[idx+1]['Processed'], idx + 1)
            else:
                print("\n✅ All messages labeled and saved.")
    
    btn.on_click(on_submit)
    
    display(*dropdowns, btn, out)

# Clear the output file before starting labeling
with open(output_path, 'w', encoding='utf-8') as f:
    pass

# Start labeling from first message
label_message(subset.iloc[0]['Processed'])


Dropdown(description='BARDEFU', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PR…

Dropdown(description='2', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE', …

Dropdown(description='IN', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='1', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE', …

Dropdown(description='Multi', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='purpose', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PR…

Dropdown(description='juicer', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRI…

Dropdown(description='ኳሊቲ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='የጁስ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='መፍጫ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='የጀርመን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='ቴክኖሎጂ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='3', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE', …

Dropdown(description='ሌትር', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ጁስ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='የሚፈጭ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ጆግ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ያለው', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='የብና', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='እና', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='የቅመማ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ቅመም', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='መፍጫ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ያለው', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='8000Watt', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-P…

Dropdown(description='ምላጮቹ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ጠንካራ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='የሆኑ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ለቤት', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='እንዲሁም', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='ለስራ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='የሚሆን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='አሪፍ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='እቃ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ለአጠቃቀም', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRI…

Dropdown(description='ቀላል', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='በረዶ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ይፈጫል', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ዋጋ6800ብር', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-P…

Dropdown(description='ውስን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ፍሬ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ያለን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='Limited', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PR…

Dropdown(description='Stock', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='አድራሻ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ቁ1', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='መገናኛ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ታሜ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ጋስ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ህንፃ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ጎን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ስሪ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ኤም', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ሲቲ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ሞል', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ሁለተኛ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ፎቅ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ቢሮ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ቁ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE', …

Dropdown(description='SL05Aከ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRI…

Dropdown(description='ሊፍቱ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ፊት', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ለ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE', …

Dropdown(description='ፊት', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ቁ2', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ለቡ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='መዳህኒዓለም', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PR…

Dropdown(description='ቤተክርስቲያን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-P…

Dropdown(description='ወደ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ሙዚቃ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='ቤት', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ከፍ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ብሎ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ዛም_ሞል', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='2ኛ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ፎቅ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ቢሮቁ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='214', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE'…

Dropdown(description='0909522840', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I…

Dropdown(description='0923350054', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I…

Dropdown(description='ለቡ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE',…

Dropdown(description='ቅርንጫፍ0973611819', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE…

Dropdown(description='በTelegram', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-…

Dropdown(description='ለማዘዝ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='ይጠቀሙ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='shager_onlinestore', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PR…

Dropdown(description='ለተጨማሪ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='ማብራሪያ', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRIC…

Dropdown(description='የቴሌግራም', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRI…

Dropdown(description='ገፃችን', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC', 'B-PRICE', 'I-PRICE…

Dropdown(description='httpstmeShageronlinestore', index=6, options=('B-Product', 'I-Product', 'B-LOC', 'I-LOC'…

Button(description='Submit Labels', style=ButtonStyle())

Output()