In [91]:
from tortus import Tortus
import pandas as pd
import os

In [99]:
def annotate_tweets(path, text_column = "annotate_text", num_records=100, prev_annotations = None, additional_labels = []):
    df = pd.read_csv(path)
    output_path = create_path(path, "second_annotation", True)
    
    if os.path.exists(output_path):
        print(f"Annotations already exist for: {output_path}, adding to these annotations")
        prev_annotations = pd.read_csv(output_path, index_col = "Unnamed: 0")
        
    tortus = Tortus(df, "annotate_text", num_records=num_records, annotations=prev_annotations, labels=["full_standard_english", "not-syntactic_standard_english", "non_standard_english", "code-switched", "some_english", "not_english"] + additional_labels, id_column = "id_column")
    tortus.annotate()
    return tortus

def create_path(path, new_first_dir_name, makedirs = False):
    temp_path_list = os.path.dirname(path).split('/')
    temp_path_list[0] = new_first_dir_name
    outdirs = '/'.join(temp_path_list)
    basename = os.path.basename(path)
    if makedirs:
        os.makedirs(outdirs, exist_ok = True)
    return os.path.join(outdirs, basename)

def merge_and_save_annotations(tortus, first_annotation_path):
    
    first_annotation = pd.read_csv(first_annotation_path, index_col = "Unnamed: 0")
    first_annotation = first_annotation.rename(columns = {'label': 'label_1', 'annotated_at': 'annotated_at_1'})
    
    second_annotation = tortus.annotations
    second_annotation.to_csv(create_path(first_annotation_path, "second_annotation", True))
    second_annotation = second_annotation.rename(columns = {'label': 'label_2', 'annotated_at': 'annotated_at_2'})

    original = pd.read_csv(create_path(first_annotation_path, "data"), index_col = "Unnamed: 0")
    
    merged = first_annotation.merge(original, how = "left", left_on = "id_column", right_index = True)
    merged = merged.merge(second_annotation, how = "left", on = "id_column")
    merged = merged[["id_column", "rawContent","cleaned_content", "label_1", "label_2", "english_relative_frequency", "date", "annotated_at_1", "annotated_at_2"]]
    merged.to_csv(create_path(first_annotation_path, "complete_annotation", True))
    
    return merged

In [100]:
first_annotation_path = "first_annotation/Singapore/tweets_over_period/24400_tweets_over_period/0.7_to_0.8_english_words.csv"
tortus = annotate_tweets(first_annotation_path, num_records = 1)

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00#0\x00\x00\x06\xc4\x08\x06\x00\x00\x00…

Output()

In [101]:
merge_and_save_annotations(tortus, first_annotation_path)

Unnamed: 0,id_column,rawContent,cleaned_content,label_1,label_2,english_relative_frequency,date,annotated_at_1,annotated_at_2
0,3589,,,full_standard_english,,,,2022-11-17 15:04:27,NaT
1,20614,,,full_standard_english,,,,2022-11-17 15:04:28,NaT
2,17487,,,full_standard_english,full_standard_english,,,2022-11-17 15:04:29,2022-11-17 15:04:47
