In [30]:
import os
import re
import pandas as pd
import numpy as np
from ast import literal_eval

In [31]:
# Read in bounding box files
box_files = os.listdir("../bounding_boxes/word_coordinates_split_6_4")
box_files.sort()

# Read in AST files
annotated_files = os.listdir("annotations_6_4")
annotated_files.sort()

# Sanity check to test whether the files line up by index after sorting
for i, file in enumerate(box_files):
    box_name = re.sub("_boxes.csv", "", file)
    ann_name = re.sub("_annotated.csv", "", annotated_files[i])
    if box_name != ann_name:
        print("mismatch: ", box_name, ann_name)

In [36]:
#####################################
### FUNCTIONS USED TO MERGE FILES ###
#####################################


# function that takes in the original dataframe and the dataframe 
# only containing comments/strings. This merges the multi-line comments/strings,
# puts the merged string in the word column, and the original word in the word column
# returns the merged dataframe
def merge_words(og_df, new_df):
    # stops pandas from interpreting 'NaN' as a float
    new_df['word'] = new_df['word'].astype(str) 

    # actual joining based on the occurence number
    mid_df = new_df.groupby('occurrence')['word'].apply(' '.join).reset_index()
    mid_df.columns = ['occurrence', 'word']
    mid_df = pd.merge(og_df, mid_df, on='occurrence', how="left")
    
    # moving merged string to word column
    mid_df['word'] = mid_df['word_y'].where(mid_df['word_y'].notna(), mid_df['word_x'])
    mid_df = mid_df.drop(columns = ['word_x', 'word_y']) # dropping extra columns that get created
    
    return mid_df


# Using this function to merge the bounding boxes and AST files
def merge_files(bb, ast):
    # separating because merge based on word column for strings/comments 
    # and word + occurrence column for the rest
    strings = bb[bb['occurrence'] >= 100]
    therest = bb[bb['occurrence'] < 100]
    
    # two separate merges
    strdf = pd.merge(strings, ast, on='word', how='left')
    strdf = strdf.rename(columns = {'occurrence_x': 'occurrence'})
    # FIXME need to fix column names so occurrence doesn't get lost
    # print(strdf['occurrence_x'])
    # print(strdf['occurrence_y'])
    restdf = pd.merge(therest, ast, on=['word', 'occurrence'], how='left')

    # merging the merges, sorting to original order, and reordering the columns
    merged = pd.concat([strdf, restdf])
    merged = merged.sort_values(by=['y', 'x'])
    merged = merged[['word', 'occurrence', 'original', 'tree_parts', 'x', 'y', 'width', 'height',
                    'tobii_x', 'tobii_y', 'tobii_width', 'tobii_height']]
    
    return merged

# For cases where a comment or string isn't matched to something from the ast file,
# this function fills in that row with the contents above or below, and logs whenever that happens
def fillna_and_log(df, filename, method=None):
    df_before = df.copy()  # Copy of the DataFrame before filling
    df_after = df.copy()
    df_after['tree_parts'] = df['tree_parts'].fillna(method=method)  # DataFrame after filling tree parts column

    before = df_before['tree_parts']
    after = df_after['tree_parts']

    # Find the indices where a NaN was filled
    filled_indices = np.where(before.isna() & ~after.isna())[0]

    # For each index where a NaN was filled
    for idx in filled_indices:
        parts = literal_eval(df_after.iloc[idx, 3]) # interpreting tree parts as a list, and not a string
        kind = df_after.iloc[idx, 1] # whether it's a string or comment, or neither
        parts.pop() # removing last element. Not a perfect solution, but an ok one
        
        if kind >= 200: # if it's a comment
            parts.append('comment')
        elif kind >= 100 and kind < 200: # if it's a string
            parts.append('literal')
        else: # could be a random thing like ) or };
            parts = []
            #print(f"Tree parts missing for {df.iloc[idx, 0]}-------------")
        df_after.iat[idx, 3] = parts
        #print(f"Filled in tree parts for {df.iloc[idx, 0]} at index {idx} with value {parts}")
    return df_after


In [37]:
# Actual work of merging happens here 

for i, file in enumerate(box_files):
    print(file)
    boxes = pd.read_csv(f"../bounding_boxes/word_coordinates_split_6_4/{file}")
    ast = pd.read_csv(f"annotations_6_4/{annotated_files[i]}")
    name = re.sub("_boxes", "", file)
    # moving strings and comments to 'original' column
    boxes['original'] = boxes['word'].where(boxes['occurrence'] >= 100) #, boxes['original'])

    # filtering to get just comments and just strings
    comments = boxes[(boxes['occurrence'] >= 200)]
    strings = boxes[(boxes['occurrence'] >= 100) & (boxes['occurrence'] < 200)]
    
    merged_comments = merge_words(boxes, comments)
    merged_strings = merge_words(merged_comments, strings)

    # rearranging columns
    merged_strings = merged_strings[['word', 'occurrence', 'original', 'x', 'y', 'width', 
                                     'height', 'tobii_x', 'tobii_y', 'tobii_width', 'tobii_height']]

    # actual merging
    wholedf = merge_files(merged_strings, ast) 
    wholedf = fillna_and_log(wholedf, name, 'ffill') # for strings/comments that don't equal strings exactly
    wholedf.to_csv(f'fully_annotated/{name}', index=False)
    

BFSdist_boxes.csv
abstractMatrix3D_boxes.csv
actionLoadNotes_boxes.csv
actionPerformed_boxes.csv
Tree parts missing for ]-------------
Tree parts missing for ))-------------
Tree parts missing for ]-------------
addDbArgs_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

addDigit_boxes.csv
addErrors_boxes.csv
addLimit_boxes.csv
addPKColumn_boxes.csv
addRelativeHeaders_boxes.csv
addRotation_boxes.csv
addToTechnicalComment_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)


addUIDeployment_boxes.csv
add_boxes.csv
appendDeclarations_boxes.csv
Tree parts missing for )-------------
asMap_boxes.csv
atomToExpression_boxes.csv
capitalizeString_boxes.csv
checkDescendantM_boxes.csv
checkSetLayout_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

clearFieldersFromField_boxes.csv
clone_boxes.csv
close_boxes.csv
compareTo_boxes.csv
configBalanceRanking_boxes.csv
Tree parts missing for {	-------------
connectionErrorOccurred_boxes.csv
countQuery_boxes.csv
createCacheFile_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

createChecklistItem_boxes.csv
createDataset_boxes.csv
createGraphModel_boxes.csv
createNewServerProcess_boxes.csv
Tree parts missing for ))-------------
createServerChooser_boxes.csv
deleteCascade_boxes.csv
doOutput_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

documentLoadingCompleted_boxes.csv
draw_boxes.csv
encode_boxes.csv
equals_boxes.csv
evaluate_boxes.csv
exit_boxes.csv
exportXVRL_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)


fScore_boxes.csv
fetchString_boxes.csv
Tree parts missing for ]-------------
genIdents_boxes.csv
genSql_boxes.csv
getAllSourceRoots_boxes.csv
getAll_boxes.csv
getApplicableLaw_boxes.csv
getBackCommand12_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

getBackCommand22_boxes.csv
getBackCommand25_boxes.csv
getBackCommand2_boxes.csv
getClassNameForLookAndFeel_boxes.csv
getCollaborateCollection_boxes.csv
getConnectionPanel_boxes.csv
getEffect_boxes.csv
getFastestGenerator_boxes.csv
getFiles_boxes.csv
Tree parts missing for )-------------
getGlobalElements_boxes.csv
getImageWithSource_boxes.csv
Tree parts missing for ]-------------
getInstitutions_boxes.csv
getJSplitPane_boxes.csv
getMenuAdministracion_boxes.csv
getMessage_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)


getNAGString_boxes.csv
getOkCommand18_boxes.csv
getRelFeedItems_boxes.csv
getRemoteObject_boxes.csv
getRodinDBStatus_boxes.csv
getScopePartnerLinks_boxes.csv
getStatusFlagFields_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

getTargetServiceName_boxes.csv
getUnitCombo_boxes.csv
getUserNameFromCookie_boxes.csv
goToRegistration_boxes.csv
go_boxes.csv
handleHalt_boxes.csv
iamNear_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

iconify_boxes.csv
indexToLetter_boxes.csv
init2_boxes.csv
initOptionsMenu_boxes.csv
init_boxes.csv
Tree parts missing for };	-------------
invalidateSession_boxes.csv
isInvalidEmailLink_boxes.csv
Tree parts missing for )-------------
jbInit_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)


Tree parts missing for 0-------------
Tree parts missing for 0-------------
joinTableRender_boxes.csv
leaveAll_boxes.csv
liesBetween_boxes.csv
listen_boxes.csv
loadSystems_boxes.csv
load_boxes.csv
makeCenter_boxes.csv
makeUniqueParagraphForGlobalWithLength_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)


markStart_boxes.csv
messageSent_boxes.csv
Tree parts missing for ))-------------
modifyRoleInCache_boxes.csv
moveUpTarget_boxes.csv
onAttach_boxes.csv
openCursor_boxes.csv
openNewContainerWindow_boxes.csv
play_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

pluginRegistryContains_boxes.csv
populateNewList_boxes.csv
print2_boxes.csv
print_boxes.csv
process_boxes.csv
readFromFile_boxes.csv
readResponse_boxes.csv
refreshTreePanel_boxes.csv
removeRedundantOperations_boxes.csv
removeSelection_boxes.csv
removeService_boxes.csv
removeView_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

resetSchedConflict_boxes.csv
resolveValue_boxes.csv
saveCRCReply_boxes.csv
saveSetting_boxes.csv
saveToFile_boxes.csv
searchRecipe_boxes.csv
selectBracketingEntries_boxes.csv
Tree parts missing for )-------------
Tree parts missing for )-------------
setBinWidth_boxes.csv
setButtonText_boxes.csv
setCombo_Value_boxes.csv
setGenJarDir_boxes.csv
Tree parts missing for ]-------------
setNamedItem_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

Tree parts missing for )-------------
setPhoto_boxes.csv
setSecurityMode_boxes.csv
setSelectedScript_boxes.csv
setTopAttributes_boxes.csv
setUpMrj_boxes.csv
Tree parts missing for ))-------------
setUrls_boxes.csv
showLatestPlan_boxes.csv
show_boxes.csv
split_boxes.csv
store_boxes.csv
swapItems_boxes.csv
tellAgain_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

testAddCountryWithSequenceGenerator_boxes.csv
testCookieGreaterThan_boxes.csv
testGetClassification_boxes.csv
testGetCodigo_boxes.csv
testGetEmail_boxes.csv
testGetEvtIDs_boxes.csv
testInvoke_boxes.csv
testLogin_boxes.csv
testMoveRFWithNullContainer_boxes.csv
testNegativeParseCases_boxes.csv
testOccThm_boxes.csv
testOneTwoThreeCreateCycle_boxes.csv
testSetChkEvtID_boxes.csv
testSetEvtID_boxes.csv
testSetExample_boxes.csv
testSetWelcomeMsg_boxes.csv
testValidateSeparationCount_boxes.csv
test_10_bug2689872_boxes.csv
toString_boxes.csv
updateGain_boxes.csv
updateSchema_boxes.csv
visitRetStmt_boxes.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['word'] = new_df['word'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

wndPreferences_boxes.csv
