In [46]:
from tortus import Tortus
import pandas as pd
import os

In [47]:
def annotate_tweets(path, text_column = "cleaned_content", num_records=100, prev_annotations = None, additional_labels = []):
    df = pd.read_csv(path, index_col = "id")
    df["annotate_text"] = "<b>Raw:</b> " + df["rawContent"] + "<br><b>Clean</b>: " + df[text_column]
    
    temp_path_list = os.path.dirname(path).split('/')
    temp_path_list[0] = 'first_annotation'
    outdirs = '/'.join(temp_path_list)
    basename = os.path.basename(path)
    os.makedirs(outdirs, exist_ok = True)
    output_path = os.path.join(outdirs, basename)
    
    if os.path.exists(output_path):
        print(f"Annotations already exist for: {output_path}, adding to these annotations")
        prev_annotations = pd.read_csv(output_path, index_col = "Unnamed: 0")
        
    tortus = Tortus(df, "annotate_text", num_records=num_records, annotations=prev_annotations, labels=["full_standard_english", "not-syntactic_standard_english", "non_standard_english", "code-switched", "some_english", "not_english"] + additional_labels)
    tortus.annotate()
    return tortus, output_path

def save_annotations(tortus, output_path):
    tortus.annotations.to_csv(output_path)

In [48]:
original_path = "data/Singapore/tweets_over_period/24400_tweets_over_period/0.7_to_0.8_english_words.csv"
tortus, output_path = annotate_tweets(original_path, num_records = 10)

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

In [49]:
save_annotations(tortus, output_path)
tortus.annotations

Unnamed: 0,id_column,annotate_text,label,annotated_at
0,20925,<b>Raw:</b> @Dziiingdziiing @DrDon97162070 @in...,code-switched,2022-11-17 15:13:02
1,12682,<b>Raw:</b> @Hazy961 Morning.. Me too cuti smp...,code-switched,2022-11-17 15:13:08
2,2754,<b>Raw:</b> @raphaelj9s Das ist halt immer die...,not_english,2022-11-17 15:13:12
3,7931,<b>Raw:</b> @amekoto6 Pete the catの日本語版は日本の書店で...,not_english,2022-11-17 15:13:16
4,19700,<b>Raw:</b> John\nPallelai bedokwalk no49.miss...,non_standard_english,2022-11-17 15:13:41
5,2533,<b>Raw:</b> I'm at Blk 210 Choa Chu Kang Centr...,non_standard_english,2022-11-17 15:13:46
6,22268,<b>Raw:</b> John\nPallelai bedokwalk no49.miss...,non_standard_english,2022-11-17 15:13:59
7,16014,<b>Raw:</b> my mr solo dolo 🥹<br><b>Clean</b>:...,non_standard_english,2022-11-17 15:14:06
8,22257,<b>Raw:</b> John\nPallelai bedokwalk no49.miss...,non_standard_english,2022-11-17 15:14:14
9,1877,<b>Raw:</b> Mas maganda pa din yung CLASSY at...,not_english,2022-11-17 15:14:29
