In [1]:
best_model_path = "../labels/res/dev.txt"
base_model_path = "../labels/res/dev-base.txt"

In [2]:
import sys
import os
import pandas as pd
sys.path.append(os.path.abspath(".."))

from utils.load_data import DontPatronizeMe

DATA_DIR = '../data/'
TEST_PATH = f'{DATA_DIR}task4_test.tsv'

dpm = DontPatronizeMe(DATA_DIR, TEST_PATH)
dpm.load_task1()

data = dpm.train_task1_df

devids = pd.read_csv(f'{DATA_DIR}dev_semeval_parids-labels.csv')

devids['par_id'] = devids.par_id.astype(str)

cols = ['par_id', 'text', 'label_y', 'keyword']

devdf = devids.merge(data, on='par_id', how='left')[cols]

In [3]:
baseline_preds = pd.read_csv(base_model_path, header=None, names=['baseline_pred'])
best_preds = pd.read_csv(best_model_path, header=None, names=['best_pred'])

if len(baseline_preds) != len(devdf) or len(best_preds) != len(devdf):
    raise ValueError(
        f"Prediction lengths must match devdf length. "
        f"devdf={len(devdf)}, baseline={len(baseline_preds)}, best={len(best_preds)}"
    )

analysis_df = devdf.copy().reset_index(drop=True)
analysis_df['baseline_pred'] = baseline_preds['baseline_pred'].astype(int)
analysis_df['best_pred'] = best_preds['best_pred'].astype(int)
analysis_df['gold'] = analysis_df['label_y'].astype(int)

analysis_df['baseline_correct'] = analysis_df['baseline_pred'] == analysis_df['gold']
analysis_df['best_correct'] = analysis_df['best_pred'] == analysis_df['gold']

analysis_df.head()

Unnamed: 0,par_id,text,label_y,keyword,baseline_pred,best_pred,gold,baseline_correct,best_correct
0,4046,We also know that they can benefit by receivin...,1,hopeless,0,0,1,False,False
1,1279,Pope Francis washed and kissed the feet of Mus...,1,refugee,1,1,1,True,True
2,8330,Many refugees do n't want to be resettled anyw...,1,refugee,0,0,1,False,False
3,4063,"""Budding chefs , like """" Fred """" , """" Winston ...",1,in-need,1,1,1,True,True
4,4089,"""In a 90-degree view of his constituency , one...",1,homeless,0,0,1,False,False


In [16]:
tp_df = analysis_df[analysis_df['gold'] == 1].copy()

cases = {
    'both_correct': tp_df[tp_df['baseline_correct'] & tp_df['best_correct']],
    'both_incorrect': tp_df[(~tp_df['baseline_correct']) & (~tp_df['best_correct'])],
    'best_only_correct': tp_df[(~tp_df['baseline_correct']) & tp_df['best_correct']],
    'baseline_only_correct': tp_df[tp_df['baseline_correct'] & (~tp_df['best_correct'])],
}

print(f"Total ground-truth positive examples: {len(tp_df)}")
for case_name, case_df in cases.items():
    print(f"{case_name}: {len(case_df)} examples")

Total ground-truth positive examples: 199
both_correct: 107 examples
both_incorrect: 65 examples
best_only_correct: 20 examples
baseline_only_correct: 7 examples


In [None]:
display_cols = [
    'par_id', 'keyword', 'text', 'gold', 'baseline_pred', 'best_pred',
    'baseline_correct', 'best_correct'
 ]

for case_name, case_df in cases.items():
    print('\n' + '=' * 100)
    print(f"{case_name}")
    print('=' * 100)
    display(case_df[display_cols].head(5))

    for sample_num, (_, row) in enumerate(case_df.head(5).iterrows(), start=1):
        print(f"Example {sample_num}")
        print(f"Text: {row['text']}")
        print()
    
    avg_length = case_df['text'].apply(lambda x: len(x.split())).mean()
    print(f"Average text length: {avg_length:.2f} words")


both_correct


Unnamed: 0,par_id,keyword,text,gold,baseline_pred,best_pred,baseline_correct,best_correct
1,1279,refugee,Pope Francis washed and kissed the feet of Mus...,1,1,1,True,True
3,4063,in-need,"""Budding chefs , like """" Fred """" , """" Winston ...",1,1,1,True,True
6,4177,hopeless,The Word of God is truth that 's living and ab...,1,1,1,True,True
7,3963,in-need,"Chantelle Owens , Mrs Planet 2016 , hosted the...",1,1,1,True,True
9,369,women,""""""" People do n't understand the hurt , people...",1,1,1,True,True


Example 1
Text: Pope Francis washed and kissed the feet of Muslim , Orthodox , Hindu and Catholic refugees in a moving ceremony during Holy Thursday Mass at a refugee centre on the outskirts of Rome . ( March 24 )

Example 2
Text: "Budding chefs , like "" Fred "" , "" Winston "" and "" Angela "" in the kitchen are either homeless or recently re-housed - and desperately in need of a helping hand to enable them to re-build their shattered lives ."

Example 3
Text: The Word of God is truth that 's living and able to penetrate human souls ( Heb. 4:12 ) . Consider how powerful Scripture is : it can change hearts , save lives from eternal condemnation , and give hope to the hopeless .

Example 4
Text: Chantelle Owens , Mrs Planet 2016 , hosted the day and the ladies had the chance to share their compassion for those in need .

Example 5
Text: """ People do n't understand the hurt , people do n't understand the pain . I 've read about women with their children sleeping in cars , sleeping in h

Unnamed: 0,par_id,keyword,text,gold,baseline_pred,best_pred,baseline_correct,best_correct
0,4046,hopeless,We also know that they can benefit by receivin...,1,0,0,False,False
2,8330,refugee,Many refugees do n't want to be resettled anyw...,1,0,0,False,False
4,4089,homeless,"""In a 90-degree view of his constituency , one...",1,0,0,False,False
8,2001,poor-families,t is remiss not to mention here that not all s...,1,0,0,False,False
10,773,immigrant,Sheepherding in America has always been an imm...,1,0,0,False,False


Example 1
Text: We also know that they can benefit by receiving counseling from someone who can help them understand that their feelings are normal and that their situation is not hopeless ; someone who can help them put their situation in perspective and help them communicate with others who could provide support ; someone knowledgeable about resources they can access ; someone who can help them plan for their needs and the needs of their child by developing either a parenting plan or an adoption plan .

Example 2
Text: Many refugees do n't want to be resettled anywhere , let alone in the US .

Example 3
Text: "In a 90-degree view of his constituency , one can see a high rise and a flyover while underneath it , homeless people sleep on pavements . "" As you can see , my constituency is not confined to one socio-economic group , "" he said , adding that apart from water , another major concern for him is to improve the standard of public schools in the area ."

Example 4
Text: t is rem

Unnamed: 0,par_id,keyword,text,gold,baseline_pred,best_pred,baseline_correct,best_correct
31,1847,hopeless,TREVOR HAGAN/WINNIPEG FREE PRESS John Donovan ...,1,0,1,False,True
54,3213,immigrant,Hollywood star Leo Di Caprio urges help for re...,1,0,1,False,True
60,4560,hopeless,Hundreds of thousand Africans are graduating p...,1,0,1,False,True
75,1674,disabled,"Kyle really your a pig , lol youre also very i...",1,0,1,False,True
115,1090,homeless,Mum living in homeless shelter has ' nowhere t...,1,0,1,False,True


Example 1
Text: TREVOR HAGAN/WINNIPEG FREE PRESS John Donovan , northern region director of the Addictions Foundation of Manitoba : ? Many of them are feeling pretty hopeless. ?

Example 2
Text: Hollywood star Leo Di Caprio urges help for reuniting immigrant children with their families

Example 3
Text: Hundreds of thousand Africans are graduating per year . Different from 1980s and early 1990s when college outpours got immediately absorbed in the labour market , many today are jobless and hopeless .

Example 4
Text: Kyle really your a pig , lol youre also very ignorant do nt like over weight women , well have u looked in the mirror recently your FAT , YOU AND YOUR BOSS SHOULD BE SACKED never to return to radio , , how dare you say go for the disabled , your more disabled than the disabled olympians , they have a genuine heart , you do nt have one ,

Example 5
Text: Mum living in homeless shelter has ' nowhere to bring boys on Christmas day ' <h> ' panicked '

Average text length: 51.6

Unnamed: 0,par_id,keyword,text,gold,baseline_pred,best_pred,baseline_correct,best_correct
5,432,refugee,He depicts demonstrations by refugees at the b...,1,1,0,True,False
33,8839,homeless,""""""" A vagrant even told me that he did not wan...",1,1,0,True,False
58,2196,refugee,""""""" It 's the largest humanitarian tragedy of ...",1,1,0,True,False
64,9980,hopeless,From mother ( played by Bollywood actress Seem...,1,1,0,True,False
79,4056,hopeless,President Muhammadu Buhari is on a divine miss...,1,1,0,True,False


Example 1
Text: He depicts demonstrations by refugees at the border post , their catastrophic living conditions and the desperate attempt of several hundred to cross a river a few kilometres from the camp to get into Macedonia on 14 March 2016 .

Example 2
Text: """ A vagrant even told me that he did not want to return to the streets ever again because of my movie . I mean , how much money can you pay to get a homeless person back on the right track ? """

Example 3
Text: """ It 's the largest humanitarian tragedy of our time , "" Ninette Kelley , the UN high commissioner for refugees ' representative to Lebanon , told the Guardian earlier this month in an interview . "" We do not want the world to forget that people are suffering here . """

Example 4
Text: From mother ( played by Bollywood actress Seema Biswas of Bandit Queen fame ) who rejected him and a society that offered no respite , Siva was , in a nutshell , a hopeless street vagabond .

Example 5
Text: President Muhammadu Buh