In [1]:
from tortus import Tortus
import pandas as pd
import os

Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00\xa5\xaf~d\x00\…

HTML(value="<h2 style='text-align:center'>        easy text annotation in a Jupyter Notebook</h2>")

In [2]:
def annotate_tweets(path, text_column = "cleaned_content", num_records=100, prev_annotations = None, additional_labels = [], tweets_longer_than_num = 10):
    df = pd.read_csv(path, index_col = "id")
    # filter for cleaned tweet greater than length 10
    df = df[df[text_column].str.count(' ').gt(tweets_longer_than_num-1)]
    df["annotate_text"] = "<b>Raw:</b> " + df["rawContent"] + "<br><b>Clean</b>: " + df[text_column]
    
    temp_path_list = os.path.dirname(path).split('/')
    temp_path_list[0] = 'first_annotation'
    outdirs = '/'.join(temp_path_list)
    basename = os.path.basename(path)
    os.makedirs(outdirs, exist_ok = True)
    output_path = os.path.join(outdirs, basename)
    
    if os.path.exists(output_path):
        print(f"Annotations already exist for: {output_path}, adding to these annotations")
        prev_annotations = pd.read_csv(output_path, index_col = "Unnamed: 0")
        
    tortus = Tortus(df, "annotate_text", num_records=num_records, annotations=prev_annotations, labels=["full_standard_english", "not-syntactic_standard_english", "non_standard_english", "code-switched", "some_english", "not_english"] + additional_labels)
    tortus.annotate()
    return tortus, output_path

def save_annotations(tortus, output_path):
    tortus.annotations.to_csv(output_path)

In [3]:
original_path = "data/Singapore/tweets_over_period/24400_tweets_over_period/0.8_to_0.9_english_words.csv"
tortus, output_path = annotate_tweets(original_path, num_records = 100)

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

In [4]:
save_annotations(tortus, output_path)
tortus.annotations

Unnamed: 0,id_column,annotate_text,label,annotated_at
0,6948,<b>Raw:</b> Have time that I just work for the...,code-switched,2022-11-18 12:28:22
1,14215,<b>Raw:</b> Good MEWning to this person who ma...,non_standard_english,2022-11-18 12:28:27
2,21662,<b>Raw:</b> StarMagicBeyondTheStars happening ...,full_standard_english,2022-11-18 12:28:35
3,6760,"<b>Raw:</b> Please go vot for L,he currently r...",non_standard_english,2022-11-18 12:28:41
4,23932,<b>Raw:</b> John\nPallelai bedokwalk no 49.mi...,not-syntactic_standard_english,2022-11-18 12:29:01
...,...,...,...,...
95,9622,<b>Raw:</b> Thank you for being part of the 5 ...,full_standard_english,2022-11-18 12:51:19
96,23521,<b>Raw:</b> Daw BGC ang SG hahaha times 5 lang...,not_english,2022-11-18 12:51:27
97,8934,"<b>Raw:</b> MewMew, I miss you! ❤️ I hope you ...",code-switched,2022-11-18 12:51:37
98,19103,<b>Raw:</b> @zonotrick These were on Pulau Ubi...,full_standard_english,2022-11-18 12:55:00
