In [59]:
from tortus import Tortus
import pandas as pd
import os

In [60]:
def annotate_tweets(path, text_column = "cleaned_content", num_records=100, prev_annotations = None, additional_labels = []):
    df = pd.read_csv(path, index_col = "id")
    df["annotate_text"] = "<b>Raw:</b> " + df["rawContent"] + "<br><b>Clean</b>: " + df[text_column]
    
    temp_path_list = os.path.dirname(path).split('/')
    temp_path_list[0] = 'first_annotation'
    outdirs = '/'.join(temp_path_list)
    basename = os.path.basename(path)
    os.makedirs(outdirs, exist_ok = True)
    output_path = os.path.join(outdirs, basename)
    
    if os.path.exists(output_path):
        print(f"Annotations already exist for: {output_path}, adding to these annotations")
        prev_annotations = pd.read_csv(output_path, index_col = "Unnamed: 0")
        
    tortus = Tortus(df, "annotate_text", num_records=num_records, annotations=prev_annotations, labels=["full_standard_english", "not-syntactic_standard_english", "non_standard_english", "code-switched", "some_english", "not_english"] + additional_labels)
    tortus.annotate()
    return tortus, output_path

def save_annotations(tortus, output_path):
    tortus.annotations.to_csv(output_path)

In [67]:
original_path = "data/Singapore/tweets_over_period/24400_tweets_over_period/0.7_to_0.8_english_words.csv"
tortus, output_path = annotate_tweets(original_path, num_records = 10)

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

HTML(value='Click on the label corresponding with the text below. Each selection requires                 conf…

HTML(value='<h4><b>Raw:</b> Up in the “mountains” #Singapore @ Bukit Timah Nature Reserve https://t.co/kR7qPUF…

GridBox(children=(VBox(children=(Box(children=(Button(description='full_standard_english', layout=Layout(borde…

Output()

In [66]:
save_annotations(tortus, output_path)
tortus.annotations

Unnamed: 0,id_column,annotate_text,label,annotated_at
0,20925,<b>Raw:</b> @Dziiingdziiing @DrDon97162070 @in...,code-switched,2022-11-17 15:13:02
1,12682,<b>Raw:</b> @Hazy961 Morning.. Me too cuti smp...,code-switched,2022-11-17 15:13:08
2,2754,<b>Raw:</b> @raphaelj9s Das ist halt immer die...,not_english,2022-11-17 15:13:12
3,7931,<b>Raw:</b> @amekoto6 Pete the catの日本語版は日本の書店で...,not_english,2022-11-17 15:13:16
4,19700,<b>Raw:</b> John\nPallelai bedokwalk no49.miss...,non_standard_english,2022-11-17 15:13:41
...,...,...,...,...
65,5170,<b>Raw:</b> @tommyaltinnit awh Tommy it's ok<b...,non_standard_english,2022-11-17 16:09:02
66,22350,<b>Raw:</b> ♡♡♡🇸🇬♡🇮🇳♡♡♡\n\nリトルインディア\n\n大好きになった...,not_english,2022-11-17 16:09:11
67,11640,<b>Raw:</b> Joyceline a fait un tweet sur une ...,not_english,2022-11-17 16:09:15
68,17856,<b>Raw:</b> My nightDuty stress relief..\n#Ast...,non_standard_english,2022-11-17 16:09:22
