In [99]:
from tortus import Tortus
import pandas as pd
import os

In [101]:
def annotate_tweets(path, text_column = "cleaned_content", num_records=100, prev_annotations = None, additional_labels = [], tweets_longer_than_num = 10):
    df = pd.read_csv(path, index_col = "id")
    # filter for cleaned tweet greater than length 10
    df = df[df[text_column].str.count(' ').gt(tweets_longer_than_num-1)]
    df["annotate_text"] = "<b>Raw:</b> " + df["rawContent"] + "<br><b>Clean</b>: " + df[text_column]
    
    temp_path_list = os.path.dirname(path).split('/')
    temp_path_list[0] = 'first_annotation'
    outdirs = '/'.join(temp_path_list)
    basename = os.path.basename(path)
    os.makedirs(outdirs, exist_ok = True)
    output_path = os.path.join(outdirs, basename)
    
    if os.path.exists(output_path):
        print(f"Annotations already exist for: {output_path}, adding to these annotations")
        prev_annotations = pd.read_csv(output_path, index_col = "Unnamed: 0")
        
    tortus = Tortus(df, "annotate_text", num_records=num_records, annotations=prev_annotations, labels=["full_standard_english", "not-syntactic_standard_english", "non_standard_english", "code-switched", "some_english", "not_english"] + additional_labels)
    tortus.annotate()
    return tortus, output_path

def save_annotations(tortus, output_path):
    tortus.annotations.to_csv(output_path)

In [102]:
original_path = "data/Singapore/tweets_over_period/24400_tweets_over_period/0.8_to_0.9_english_words.csv"
tortus, output_path = annotate_tweets(original_path, num_records = 10)

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

HTML(value='Click on the label corresponding with the text below. Each selection requires                 conf…

HTML(value='<h4><b>Raw:</b> John\nPallelai bedokwalk no49.misslohahhuat.misskitty.u are to take responsibility…

GridBox(children=(VBox(children=(Box(children=(Button(description='full_standard_english', layout=Layout(borde…

Output()

In [90]:
save_annotations(tortus, output_path)
tortus.annotations

Unnamed: 0,id_column,annotate_text,label,annotated_at
0,11422,"<b>Raw:</b> Alors euh, la je suis dans un mood...",not_english,2022-11-17 16:55:28
1,5110,<b>Raw:</b> Normalize lang nang crush mo sakon...,some_english,2022-11-17 16:55:38
2,3500,<b>Raw:</b> Muntik na ah .pwede pala umulit yu...,not_english,2022-11-17 16:55:42
3,7616,<b>Raw:</b> @el2andi @AgusPratono3 @03__nakula...,not_english,2022-11-17 16:55:44
4,23432,<b>Raw:</b> @ydnxc Super ! J’ai un peu regardé...,not_english,2022-11-17 16:55:48
5,8828,<b>Raw:</b> Je pourrais ne jamais me lasser de...,not_english,2022-11-17 16:55:55
6,11263,<b>Raw:</b> Tengok??? Orang luar lebih memberi...,code-switched,2022-11-17 16:56:01
7,22090,<b>Raw:</b> John\nPallelai bedokwalk no49.miss...,code-switched,2022-11-17 16:56:10
8,12779,<b>Raw:</b> Ohh para dun sa nagblock sa akin k...,some_english,2022-11-17 16:56:16
9,15222,<b>Raw:</b> dua2 janji melayu... kata meet 7:1...,some_english,2022-11-17 16:56:29
