In [1]:
import ipywidgets as widgets
import pandas as pd
import datetime

from IPython.display import display, clear_output

In [2]:
### This notebook has been created with the help of ChatGPT. ###

In [3]:
pd.set_option('display.max_colwidth', None)  # For older versions of pandas, use -1 instead of None
pd.set_option('display.max_columns', None)   # Ensure all columns are shown

In [4]:
df = pd.read_csv('data/full_isaiah_dataset.csv')  # Replace with your DataFrame
df = df.fillna("")

# converting 'dir_he' and 'dir_he_dss' from float to int 

df["dir_he"] = df["dir_he"].astype(str)
df["dir_he"] = df["dir_he"].replace("1.0", "1").replace("0.0", "0")

df["dir_he_dss"] = df["dir_he_dss"].astype(str)
df["dir_he_dss"] = df["dir_he_dss"].replace("1.0", "1").replace("0.0", "0")

df.columns

FileNotFoundError: [Errno 2] No such file or directory: 'data/full_isaiah_dataset.csv'

### Step 3: Define Columns to Annotate and Create Widgets

In [5]:
# List of columns of interest
columns = df.columns.to_list()
columns_for_info = [
    'verb_id',
    'lex',
    'scroll',
    'book',
    'chapter',
    'verse_num',
    'gcons_verb',
    'gcons_verse',
    'sign_info',
    'stem',
    'tense',
]
columns_to_annotate = [
    'gcons_clause',
    'subject',
    'complement',
    'cmpl_lex',
    'dir_he',
    'cmpl_constr', 
    'cmpl_nt', 
    'cmpl_anim', 
    'cmpl_det', 
    'cmpl_indiv',
    'motion_type',
    'preposition_1',
    'preposition_2',
    'preposition_3',
    'preposition_4',
]

In [6]:
annotation_input = widgets.Textarea(
    value='',
    placeholder='Type your annotation here',
    description='',
    disabled=False,
    layout=widgets.Layout(width='100%', height='100px')  # Adjust width and height as needed
)

submit_button = widgets.Button(
    description='Submit',
    disabled=False,
    button_style='',
    tooltip='Click to submit annotation',
    icon='check'
)

prev_row_button = widgets.Button(
    description='Previous Row',
    disabled=False,
    button_style='',
    tooltip='Go to the previous row',
    icon='arrow-left'
)

next_row_button = widgets.Button(
    description='Next Row',
    disabled=False,
    button_style='',
    tooltip='Go to the next row',
    icon='arrow-right'
)

### Step 4.1 Add function for tracking your progress

In [7]:
# Today's dataframe
df_name = "biblical_dss"

In [8]:
def save_annotation_details(df, current_index, fully_annotated_rows):
    file_path = 'data/annotation_df_history/annotation_tracks.txt'
    
    # Get the current date and time
    current_datetime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

    current_row_index = df.index[current_index]

    details = f"Date and Time: {current_datetime}\nDataFrame: {df_name}\nCurrent Index: {current_index}\nCurrent Row Index: {current_row_index}\nFully Annotated Rows: {fully_annotated_rows}\n\n"

    with open(file_path, 'a') as file:
        file.write(details)

        
def count_fully_annotated_rows(df, columns_to_annotate):
    return df[columns_to_annotate].dropna().shape[0]


def save_dataframe(df, df_name):
    # Get the current date in YYYY-MM-DD format
    current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M")

    # Construct the filename
    filename = f"data/annotation_df_history/{df_name}_{current_date}.csv"

    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)  # Set index=False if you don't want to save the index

    print(f"DataFrame saved as {filename}")

### Step 4.2 Functions to Display Data and Widgets with Navigation and Iteration Logic

In [9]:
current_index = 0  # Start with the first row
current_column_index = 0  # Start with the first column to annotate

def display_row(row_index, col_index):
    global current_index, current_column_index
    current_index, current_column_index = row_index, col_index
    
    # Get the value from the DataFrame, convert NaN or non-string values to string
    cell_value = df.at[row_index, columns_to_annotate[col_index]]
    if pd.isna(cell_value):
        cell_value = ''  # Convert NaN to an empty string
    else:
        cell_value = str(cell_value)  # Convert non-string values to string

    clear_output(wait=True)

    # Set the value of the annotation input and display it along with other elements
    annotation_input.value = cell_value
    display(df.iloc[row_index][columns_for_info])
    display(df.iloc[row_index][columns_to_annotate])
    display(widgets.HTML(value=f"<b>Annotate '{columns_to_annotate[col_index]}':</b>"))
    display(annotation_input, submit_button)
    display(widgets.HBox([prev_row_button, next_row_button]))

def navigate_row(offset):
    new_index = current_index + offset
    if 0 <= new_index < len(df):
        display_row(new_index, 0)

### Step 5: Handle Annotation Submission and Row Navigation

In [10]:
def on_submit_clicked(b):
    df.at[current_index, columns_to_annotate[current_column_index]] = annotation_input.value
    if current_column_index + 1 < len(columns_to_annotate):
        display_row(current_index, current_column_index + 1)
    else:
        print("All annotations for this row are complete.")

def on_prev_row_clicked(b):
    navigate_row(-1)

def on_next_row_clicked(b):
    navigate_row(1)

submit_button.on_click(on_submit_clicked)
prev_row_button.on_click(on_prev_row_clicked)
next_row_button.on_click(on_next_row_clicked)

### Step 6: Annotate!

In [11]:
display_row(current_index, current_column_index)

verb_id                                                         1895129
lex                                                                SWR[
scroll                                                           1Qisaa
book                                                             Isaiah
chapter                                                               1
verse_num                                                            16
gcons_verb                                                        HSJRW
gcons_verse         RXYW W HZKW W HSJRW RW< M<LLJKM M NGD <JNJ XDLW HR<
sign_info      0000 0 0000 0 00000 000 00000000 0 000 00000 0000 000 00
stem                                                              hifil
tense                                                              impv
Name: 5, dtype: object

gcons_clause     W HSJRW RW< M<LLJKM M NGD <JNJ
subject                                        
complement                           M NGD <JNJ
cmpl_lex                          no complement
dir_he                                        0
cmpl_constr                                    
cmpl_nt                                        
cmpl_anim                                      
cmpl_det                                       
cmpl_indiv                                     
motion_type                                    
preposition_1                                MN
preposition_2                                  
preposition_3                                  
preposition_4                                  
Name: 5, dtype: object

HTML(value="<b>Annotate 'cmpl_lex':</b>")

Textarea(value='no complement', layout=Layout(height='100px', width='100%'), placeholder='Type your annotation…

Button(description='Submit', icon='check', style=ButtonStyle(), tooltip='Click to submit annotation')

HBox(children=(Button(description='Previous Row', icon='arrow-left', style=ButtonStyle(), tooltip='Go to the p…

### Step 7: Save your progress in a text file

In [12]:
fully_annotated_rows = count_fully_annotated_rows(df, columns_to_annotate)
save_annotation_details(df, current_index, fully_annotated_rows)

In [13]:
# Example call - replace 'df' and 'DataFrame_Name' with your actual DataFrame variable and its name
save_dataframe(df, df_name)

DataFrame saved as data/annotation_df_history/biblical_dss_2023-11-30_16_58.csv
