In [None]:
!pip install sentence-transformers -q
!pip install metric4coref -q

In [None]:
from google.colab import files, drive
import pandas as pd
import re
from tqdm.auto import tqdm
import json


drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import nltk
from sklearn.metrics.pairwise import cosine_similarity
from metric4coref import muc, ceaf
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('bert-base-nli-mean-tokens') 


# metrics

In [None]:
def evaluate_muc_ceaf_bcubed_f1(df, answer_aspect, answer_objects):
  muc_objects, muc_aspect = [], []
  ceaf_objects, ceaf_aspect = [], []

  for i in range(len(df)):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    objs = [obj+'_' if len(obj)==1 else obj for obj in objs]
    aspect = df.iloc[i,3].lower()

    pred_objects = [ans.lower() for ans in answer_objects[i]]
    pred_objects = [obj+'_' if len(obj)==1 else obj for obj in pred_objects]
    pred_aspect = answer_aspect[i].lower()

    if aspect == '':
      split_aspect = [['_', '_']]
    elif len(aspect) == 1:
      split_aspect = [[i for i in aspect]]
      split_aspect[0].append('_')
    else:
      split_aspect = [[i for i in aspect]]

    if pred_aspect == '':
      split_pred_aspect = [['_', '_']]
    elif len(pred_aspect) == 1:
      split_pred_aspect = [[i for i in pred_aspect]]
      split_pred_aspect[0].append('_')
    else:
      split_pred_aspect = [[i for i in pred_aspect]]

    split_objs = [[i for i in obj] for obj in objs]
    if pred_objects == []:
      split_pred_objs = [['_', '_'], ['_', '_']]
    elif len(pred_objects) == 1:
      split_pred_objs = [[i for i in obj] for obj in pred_objects]
      split_pred_objs.append(['_', '_'])
    else:
      split_pred_objs = [[i for i in obj] for obj in pred_objects]

    muc_objects.append(muc(split_pred_objs, split_objs))
    muc_aspect.append(muc(split_pred_aspect, split_aspect))

    ceaf_objects.append(ceaf(split_pred_objs, split_objs))
    ceaf_aspect.append(ceaf(split_pred_aspect, split_aspect))


  print('MUC')
  print('Objects:')
  print(f'''  precision: {sum([m[0] for m in muc_objects])/len(muc_objects)}
  recall: {sum([m[1] for m in muc_objects])/len(muc_objects)}
  f-score: {sum([m[2] for m in muc_objects])/len(muc_objects)}''')
  print('Aspect:')
  print(f'''  precision: {sum([m[0] for m in muc_aspect])/len(muc_aspect)}
  recall: {sum([m[1] for m in muc_aspect])/len(muc_aspect)}
  f-score: {sum([m[2] for m in muc_aspect])/len(muc_aspect)}''')
  
  print('\nCEAF')
  print('Objects:')
  print(f'''  precision: {sum([c[0] for c in ceaf_objects])/len(ceaf_objects)}
  recall: {sum([c[1] for c in ceaf_objects])/len(ceaf_objects)}
  f-score: {sum([c[2] for c in ceaf_objects])/len(ceaf_objects)}''')
  print('Aspect:')
  print(f'''  precision: {sum([c[0] for c in ceaf_aspect])/len(ceaf_aspect)}
  recall: {sum([c[1] for c in ceaf_aspect])/len(ceaf_aspect)}
  f-score: {sum([c[2] for c in ceaf_aspect])/len(ceaf_aspect)}''')
  



In [None]:
def evaluate_full_match(df, answer_aspect, answer_objects):
  correct_answers = 0
  correct_aspects = 0

  for i in range(len(df)):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()
    pred_objects = [ans.lower() for ans in answer_objects[i]]

    objs = [re.sub('[^a-z0-9]+the ', ' ', obj) for obj in objs]
    objs = [re.sub(' +', ' ', obj) for obj in objs]

    if set(objs) == set(pred_objects):
      correct_answers += 1

    if pred_aspect == aspect:
      correct_aspects += 1

  print(f'Objects: {correct_answers/len(df)}')
  print(f'Aspect: {correct_aspects/len(df)}')

In [None]:
def cosine_sim(text1, text2):
    emb1 = [model.encode(text1)]
    emb2 = [model.encode(text2)]
    return cosine_similarity(emb1, emb2)[0][0]

In [None]:
unnes_asps = ['better', 'difference', 'worse', 'different', 'win', 'best', 
              'worst', 'differences', 'bad', 'good', 'common', 'same', 
              'alike', 'like', 'likes', 'do', '-', 'than', 'as']

In [None]:
def delete_unnes_aspects(df, unnes_asps):
  df['aspect'].fillna('', inplace=True)
  for asp in unnes_asps:
    df['aspect'].replace(asp, '', inplace=True)
  return df

In [None]:
def evaluate_edit_distance(df, answer_aspect, answer_objects):
  object_distance = []
  aspect_distance = []

  for i in range(len(df)):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()    
    pred_objects = [ans.lower() for ans in answer_objects[i]]

    if set(objs) == set(pred_objects):
      object_distance.append(0)
    elif len(objs) == len(pred_objects):
      ed_obj1 = nltk.edit_distance(objs[0], pred_objects[0], transpositions=False)
      ed_obj2 = nltk.edit_distance(objs[1], pred_objects[1], transpositions=False)
      object_distance.append((ed_obj1+ed_obj2)/2)

    if pred_aspect: 
      aspect_distance.append(nltk.edit_distance(pred_aspect, aspect, transpositions=False))
    if not pred_aspect and not aspect:
      aspect_distance.append(0)

  print(f'Objects: {sum(object_distance)/len(object_distance)}')
  print(f'Aspect: {sum(aspect_distance)/len(aspect_distance)}')

In [None]:
def evaluate_full_match(df, answer_aspect, answer_objects):
  correct_answers = 0
  correct_aspects = 0

  for i in range(len(df)):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()
    pred_objects = [ans.lower() for ans in answer_objects[i]]

    objs = [re.sub('[^a-z0-9]+the ', ' ', obj) for obj in objs]
    objs = [re.sub(' +', ' ', obj) for obj in objs]

    if set(objs) == set(pred_objects):
      correct_answers += 1

    if pred_aspect == aspect:
      correct_aspects += 1

  print(f'Objects: {correct_answers/len(df)}')
  print(f'Aspect: {correct_aspects/len(df)}')

In [None]:
def evaluate_cos_sim(df, answer_aspect, answer_objects):
  cos_objects = []
  cos_aspects = []

  for i in tqdm(range(len(df))):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()
    pred_objs = [ans.lower() for ans in answer_objects[i]]

    if len(objs) == len(pred_objs):
      ob1_cos = cosine_sim(pred_objs[0], objs[0])
      ob2_cos = cosine_sim(pred_objs[1], objs[1])
      cos_objects.append((ob1_cos+ob2_cos)/2)
    else:
      cos_objects.append(0)

    if aspect and pred_aspect:
      cos_aspects.append(cosine_sim(pred_aspect, aspect))
    elif not aspect and not pred_aspect:
      cos_aspects.append(1)
    else:
      cos_aspects.append(0)

  print(f'Objects: {sum(cos_objects)/len(cos_objects)}')
  print(f'Aspect: {sum(cos_aspects)/len(cos_aspects)}')

In [None]:
def evaluate_cos_sim_found(df, answer_aspect, answer_objects):
  cos_objects = []
  cos_aspects = []

  for i in tqdm(range(len(df))):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()
    pred_objs = [ans.lower() for ans in answer_objects[i]]

    if len(objs) == len(pred_objs):
      ob1_cos = cosine_sim(pred_objs[0], objs[0])
      ob2_cos = cosine_sim(pred_objs[1], objs[1])
      cos_objects.append((ob1_cos+ob2_cos)/2) 
    if len(pred_objs) == 1:
      ob1_cos = cosine_sim(pred_objs[0], objs[0])
      ob2_cos = cosine_sim(pred_objs[0], objs[1])
      cos_objects.append(max(ob1_cos, ob2_cos))

    if aspect and pred_aspect:
      cos_aspects.append(cosine_sim(pred_aspect, aspect))
    elif not aspect and not pred_aspect:
      cos_aspects.append(1)

  print(f'Objects: {sum(cos_objects)/len(cos_objects)}')
  print(f'Aspect: {sum(cos_aspects)/len(cos_aspects)}')

In [None]:
def evaluate_full_match_relaxed(df, answer_aspect, answer_objects):
  correct_answers = 0
  correct_aspects = 0

  for i in range(len(df)):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()
    pred_objects = [ans.lower() for ans in answer_objects[i]]

    objs = [re.sub('[^a-z0-9]+the ', ' ', obj) for obj in objs]
    objs = [re.sub(' +', ' ', obj) for obj in objs]
  
    for obj in pred_objects:
      if obj in objs:
        correct_answers += 0.5

    if pred_aspect == aspect:
      correct_aspects += 1

  print(f'Objects: {correct_answers/len(df)}')
  print(f'Aspect: {correct_aspects/len(df)}')

In [None]:
def evaluate_cos_sim_relaxed(df, answer_aspect, answer_objects):
  cos_objects = []
  cos_aspects = []

  for i in tqdm(range(len(df))):
    objs = [df.iloc[i,1].lower(), df.iloc[i,2].lower()]
    aspect = df.iloc[i,3].lower()

    pred_aspect = answer_aspect[i].lower()
    pred_objs = [ans.lower() for ans in answer_objects[i]]

    # if one obects predicted
    if len(pred_objs) == 1:
      max_cos = 0
      for obj in objs:
        obj_cos = cosine_sim(pred_objs[0], obj)
        if obj_cos > max_cos:
          max_cos = obj_cos
      cos_objects.append(max_cos*0.5)

  # if two objects predicted
    elif len(objs) == len(pred_objs):
      ob1_cos = cosine_sim(pred_objs[0], objs[0])
      ob2_cos = cosine_sim(pred_objs[1], objs[1])
      cos_objects.append((ob1_cos+ob2_cos)/2)

  # if no objects predicted
    else:
      cos_objects.append(0)

    if aspect and pred_aspect:
      cos_aspects.append(cosine_sim(pred_aspect, aspect))
    elif not aspect and not pred_aspect:
      cos_aspects.append(1)
    else:
      cos_aspects.append(0)

  print(f'Objects: {sum(cos_objects)/len(cos_objects)}')
  print(f'Aspect: {sum(cos_aspects)/len(cos_aspects)}')

# Dolly

## zero shot

In [None]:
dolly_zeroshot_df = pd.read_csv('./drive/My Drive/dolly_zeroshot.csv', sep='\t', encoding='utf-8')

In [None]:
dolly_zeroshot_df = delete_unnes_aspects(dolly_zeroshot_df, unnes_asps)

In [None]:
def clear(text):
  clear_text = re.sub("""[^ \'\"a-zа-яё0-9,\:\n]""", '', text.lower())
  return clear_text

In [None]:
clear_answers = []

for i in range(len(dolly_zeroshot_df)):
  words = ['and', 'the', 'objects', 'are', 'aspect', 'is', '\n']

  answer = dolly_zeroshot_df.iloc[i,4].lower()
  sentence = dolly_zeroshot_df.iloc[i,0].lower()

  clear_sentence = clear(sentence)
  words.extend(clear_sentence.split(' '))
  clear_answer = clear(answer)
  clear_answer = re.sub(clear_sentence, '', clear_answer)

  answer_lines = clear_answer.split('\n')
  new_answer = []

  for line in answer_lines:
    line_words = line.split(' ')

    for word in line_words:
      clear_word = re.sub('[^a-zа-яё0-9]', '', word)
      if clear_word in words:
        new_answer.append(word)

    new_answer.append('\n')
  new_answer = re.sub('  ', '\n', ' '.join(new_answer))
  new_answer = re.sub('objects [a-z]+ are', 'objects are', new_answer)
  new_answer = re.sub(' ?the ', ' ', new_answer) # убрать и в ответе
  new_answer = re.sub('objects +are', 'objects are', new_answer)
  new_answer = re.sub('aspect +is', 'aspect is', new_answer)
  new_answer = re.sub('aspect of is', 'aspect is', new_answer)
  new_answer = re.sub('objects of +are', 'objects are', new_answer)
  new_answer = re.sub('objects in +are', 'objects are', new_answer)

  clear_answers.append(new_answer)


In [None]:
answer_objects = []
answer_aspect = []

for answer in clear_answers:
  new_objects = []
  aspect = ''
  
  if 'aspect is' in answer:
    aspect = answer.split('aspect is')[1]
    objects = re.sub('objects are', '', answer.split('aspect is')[0])
    objects = re.sub('[\' ]and[\' ]', '##[SEP]##', objects)
    objects = objects.split('##[SEP]##')

    new_objects = []
    for obj in objects:
      obj = re.sub('\n+', '\n', obj)
      obj = obj.split('\n')
      for o in obj:
        if_real = re.findall('[a-z0-9]+', o)
        if if_real:
          o = o.strip(':').strip(' ').strip(':').strip('\'').strip(',').strip(' ').strip('\'')
          o = re.sub(' +', ' ', o)
          new_objects.append(o)

    new_objects = new_objects[:2]
    aspect = aspect.split('\n')[0].strip(' ').strip(':').strip('\'')

  answer_objects.append(new_objects)
  answer_aspect.append(aspect)



In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

In [None]:
dolly_zeroshot_df['aspect'].fillna('', inplace=True)

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(dolly_zeroshot_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.23962926407956733
  recall: 0.26416288609735544
  f-score: 0.22212915320429308
Aspect:
  precision: 0.4444782845543873
  recall: 0.47129780592215953
  f-score: 0.4448340105874929

CEAF
Objects:
  precision: 0.19508140568873977
  recall: 0.199291691169092
  f-score: 0.18110407510990284
Aspect:
  precision: 0.24139596911993477
  recall: 0.2636915542959594
  f-score: 0.24261458963960045


### edit_distance

In [None]:
evaluate_edit_distance(dolly_zeroshot_df, answer_aspect, answer_objects)

Objects: 10.818548387096774
Aspect: 8.007057546145495


### strict

Полное совпадение

In [None]:
evaluate_full_match(dolly_zeroshot_df, answer_aspect, answer_objects)

Objects: 0.06444226365718024
Aspect: 0.4252535165194635


Косинусная близость

In [None]:
evaluate_cos_sim(dolly_zeroshot_df, answer_aspect, answer_objects) 

  0%|          | 0/3057 [00:00<?, ?it/s]

Objects: 0.21910809277318294
Aspect: 0.4765690020268791


In [None]:
evaluate_cos_sim_found(dolly_zeroshot_df, answer_aspect, answer_objects)

  0%|          | 0/3057 [00:00<?, ?it/s]

Objects: 0.7683682315603703
Aspect: 0.9362926987121911


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(dolly_zeroshot_df, answer_aspect, answer_objects)

Objects: 0.0979718678442918
Aspect: 0.4252535165194635


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(dolly_zeroshot_df, answer_aspect, answer_objects)

  0%|          | 0/3057 [00:00<?, ?it/s]

Objects: 0.24691526279082515
Aspect: 0.4765690020268791


# NeoGPT

## few shot

In [None]:
neo_fewshot_df = pd.read_csv('./drive/My Drive/neogpt_fewshot.csv', sep='\t', encoding='utf-8')

In [None]:
neo_fewshot_df = delete_unnes_aspects(neo_fewshot_df, unnes_asps)

In [None]:
answer_objects = []
answer_aspect = []

for i in range(len(neo_fewshot_df)):
  sent = neo_fewshot_df.iloc[i, 0]
  ans = neo_fewshot_df.iloc[i, 4]
  start_ans = ans[885:]
  lines = start_ans.split('\n')[:3]

  obj_line = ''
  asp_line = ''

  for l in lines:
    if 'Objects:' in l:
      obj_line = l
    if 'Aspect:' in l:
      asp_line = l

  if "'" in obj_line:

    obj_line = re.sub("n't", 'n’t', obj_line) 
    obj_line = re.sub("y're", 'y’re', obj_line)
    obj_line = re.sub("r's", 'r’s', obj_line)
    obj_line = re.sub("n's", 'n’s', obj_line)

    obj_line = re.sub('Objects:', '', obj_line)
    obj_line = re.sub("[^\']+$", '', obj_line.strip(' '))
    obj_line = re.findall("(\'[^\']+\')+,? ?", obj_line)


    try:
      objects = eval('['+','.join(obj_line)+']')
    except:
      print(objects)
      objects = []


  aspect = re.sub('Aspect:', '', asp_line.strip(' ')).strip(' ')

  answer_aspect.append(aspect) 
  answer_objects.append(objects[:2]) 

In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(neo_fewshot_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.7925926228263231
  recall: 0.571089102717853
  f-score: 0.6041202583477795
Aspect:
  precision: 0.39119540644710693
  recall: 0.4212292002974707
  f-score: 0.3699521845101842

CEAF
Objects:
  precision: 0.7121806951764321
  recall: 0.5456507028833348
  f-score: 0.5855248529496002
Aspect:
  precision: 0.3211435842283013
  recall: 0.33636357397933186
  f-score: 0.3088975551398772


### edit_distance

In [None]:
evaluate_edit_distance(neo_fewshot_df, answer_aspect, answer_objects)

Objects: 6.751677852348993
Aspect: 7.625531914893617


### strict

Полное совпадение

In [None]:
evaluate_full_match(neo_fewshot_df, answer_aspect, answer_objects)

Objects: 0.276
Aspect: 0.274


Косинусная близость

In [None]:
evaluate_cos_sim(neo_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.6946715359836817
Aspect: 0.49603297978639604


In [None]:
evaluate_cos_sim_found(neo_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.7828716721856045
Aspect: 0.8350723565427542


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(neo_fewshot_df, answer_aspect, answer_objects)

Objects: 0.392
Aspect: 0.274


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(neo_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.7379887324124574
Aspect: 0.49603297978639604


# GPT-2

## few shot

In [None]:
gpt2_fewshot_df = pd.read_csv('./drive/My Drive/gpt2_fewshot.csv', sep='\t', encoding='utf-8')

In [None]:
gpt2_fewshot_df = delete_unnes_aspects(gpt2_fewshot_df, unnes_asps)

In [None]:
answer_objects = []
answer_aspect = []

for i in range(len(gpt2_fewshot_df)):
  sent = gpt2_fewshot_df.iloc[i, 0]
  ans = gpt2_fewshot_df.iloc[i, 4]
  start_ans = ans[955:] 
  lines = start_ans.split('\n')[:7]

  obj_line = ''
  asp_line = ''

  for l in lines:
    if 'Objects:' in l:
      obj_line = l
    if 'Aspect:' in l:
      asp_line = l

  if "'" in obj_line:

    obj_line = re.sub("n't", 'n’t', obj_line)  
    obj_line = re.sub("y're", 'y’re', obj_line)
    obj_line = re.sub("r's", 'r’s', obj_line)
    obj_line = re.sub("n's", 'n’s', obj_line)

    obj_line = re.sub('Objects:', '', obj_line)
    obj_line = re.sub("[^\']+$", '', obj_line.strip(' '))
    obj_line = re.findall("(\'[^\']+\')+,? ?", obj_line)

    try:
      objects = eval('['+','.join(obj_line)+']')
    except:
      objects = []

  else:
    objects = obj_line.split(',')
  aspect = re.sub('Aspect:', '', asp_line.strip(' ')).strip(' ')

  answer_aspect.append(aspect)
  answer_objects.append(objects)

In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(gpt2_fewshot_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.6003954909639969
  recall: 0.5154569556651308
  f-score: 0.49657592201109435
Aspect:
  precision: 0.3151160670113458
  recall: 0.29264490822988726
  f-score: 0.272327764698042

CEAF
Objects:
  precision: 0.5191492559275426
  recall: 0.47487744127969544
  f-score: 0.463551219528617
Aspect:
  precision: 0.29389381045572477
  recall: 0.2803705393850202
  f-score: 0.2662387005613715


### edit_distance

In [None]:
evaluate_edit_distance(gpt2_fewshot_df, answer_aspect, answer_objects)

Objects: 7.58879781420765
Aspect: 9.435950413223141


### strict

Полное совпадение

In [None]:
evaluate_full_match(gpt2_fewshot_df, answer_aspect, answer_objects)

Objects: 0.162
Aspect: 0.2


Косинусная близость

In [None]:
evaluate_cos_sim(gpt2_fewshot_df, answer_aspect, answer_objects) 

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.5528916113376617
Aspect: 0.39701573592424394


In [None]:
evaluate_cos_sim_found(gpt2_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.7410191232948217
Aspect: 0.7694103409384572


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(gpt2_fewshot_df, answer_aspect, answer_objects)

Objects: 0.323
Aspect: 0.2


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(gpt2_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.6047172772884369
Aspect: 0.39701573592424394


# LLaMA

## zero shot

In [None]:
llama_zeroshot_df = pd.read_csv('./drive/My Drive/llama_zeroshot.csv', sep='\t', encoding='utf-8')

In [None]:
llama_zeroshot_df = delete_unnes_aspects(llama_zeroshot_df, unnes_asps)

In [None]:
starts = {}
for i in range(len(llama_zeroshot_df)):
  answer = llama_zeroshot_df.iloc[i,4]
  start = ' '.join(answer.split(' ')[:2])
  if start in starts.keys():
    starts[start].append(i)
  else:
    starts[start] = [i]

In [None]:
starts.keys()

dict_keys(['The objects', 'What are', '\\begin{itemize} \\item', 'HIV stands', 'Do deaf', 'What place', 'Who should', 'Who would', 'Billie Joe', 'Procedural Due', '### Instruction:', 'What exactly', 'IRS sefine', 'Cold air', 'Would you', 'Humans prefer', 'We bury', 'The USA', 'Bond investors', 'Wealth is', 'When talking', 'Iraq is', 'Convection ovens', 'Are records', 'BMI is', "Netflix's New", 'Salt trucks', "Aren't r", 'People in', 'In the', 'Bartenders and', 'Cooked food', 'The actual', 'There are', 'Sound travels', 'Was the', 'Are the', 'What cars', 'Can anyone', 'What type', 'Who is', 'What works', 'Are you', 'PCDATA and', 'People with', 'Airplanes take', 'x265 format', 'The core', 'What caused', 'Sleeping for', 'Malwarebytes Anti', "Aren't telescopes", 'Vegetarians and', 'Ireland and', 'Do you', "Aren't electric", 'Don\'t "', 'Would riding', 'What web', 'Do most', 'Passion and', 'Employer and', 'Do percoset', 'What TV', 'Humans and', 'What You', 'What game', 'Computer architecture

In [None]:
for i in range(len(llama_zeroshot_df)):
  answer = llama_zeroshot_df.iloc[i,4]
  if '\\begin{itemize}' in answer:
    answer = re.sub('\\\\begin{itemize}', '', answer)
    answer = re.sub('\\\\item', '', answer)
    answer = re.sub('\\\\end{itemize}', '', answer)
    llama_zeroshot_df.iloc[i,4] = answer.strip(' ')
  

In [None]:
for i in range(len(llama_zeroshot_df)):
  answer = llama_zeroshot_df.iloc[i,4]
  if '\\end{code}' in answer:
    answer = answer.split('\\end{code}')[0].strip(' ')
    llama_zeroshot_df.iloc[i,4] = answer

In [None]:
answer_objects = []
answer_aspect = []

for i in range(len(llama_zeroshot_df)):
  answer = llama_zeroshot_df.iloc[i,4].lower()
  answer = re.sub('n\'t', 'nt', answer)
  answer = re.sub('y\'re', 'yre', answer)
  answer = re.sub('t\'s', 'ts', answer)
  objects = []
  asp = ''
  a = re.findall("[\'s] ?are ?\'?[^\.\?\!]*", answer) 
  for item in a:
    if 'and' in item:
      item = re.sub('[\'s] ?are ?\'?', '###', item)
      item = ' '.join(item.split('###')[1:])
      item = re.sub('[\' ]and[ \']', '###', item)
      objects = item.split('###')
      if len(objects) > 1:
        if objects[1][:8] == ' the asp':
          objects = [objects[0]]
      

  b = re.findall("[\'n] ?is ?\'?[^\.]*", answer)
  for item in b:
    item = re.sub('[\' ]is[\' ]', '###', item)
    item = ' '.join(item.split('###')[1:])
    asp = item

  objects = [obj.strip(' ').strip('\'') for obj in objects]
  asp = asp.strip(' ').strip('\'')

  answer_objects.append(objects[:2])
  answer_aspect.append(asp)


In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(llama_zeroshot_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.5837961301855369
  recall: 0.5982216280764697
  f-score: 0.5487790854174059
Aspect:
  precision: 0.4771576797663178
  recall: 0.5612893743183209
  f-score: 0.47461481741248807

CEAF
Objects:
  precision: 0.4804917650288479
  recall: 0.4794943318818906
  f-score: 0.4552751527545003
Aspect:
  precision: 0.3259584996932001
  recall: 0.38813750297470734
  f-score: 0.32732445381237263


### edit_distance

In [None]:
evaluate_edit_distance(llama_zeroshot_df, answer_aspect, answer_objects)

Objects: 7.646606334841629
Aspect: 19.51785028790787


### strict

Полное совпадение

In [None]:
evaluate_full_match(llama_zeroshot_df, answer_aspect, answer_objects)

Objects: 0.29506051684658163
Aspect: 0.4118416748446189


Косинусная близость

In [None]:
evaluate_cos_sim(llama_zeroshot_df, answer_aspect, answer_objects) 

  0%|          | 0/3057 [00:00<?, ?it/s]

Objects: 0.6174171389168019
Aspect: 0.5749024662360096


In [None]:
evaluate_cos_sim_found(llama_zeroshot_df, answer_aspect, answer_objects)

  0%|          | 0/3057 [00:00<?, ?it/s]

Objects: 0.8523624507784039
Aspect: 0.8666059365303161


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(llama_zeroshot_df, answer_aspect, answer_objects)

Objects: 0.354105332024861
Aspect: 0.4118416748446189


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(llama_zeroshot_df, answer_aspect, answer_objects)

  0%|          | 0/3057 [00:00<?, ?it/s]

Objects: 0.6187599418056647
Aspect: 0.5749024662360096


# T5

## few shot+

In [None]:
t5_fewshot_large_df = pd.read_csv('./drive/My Drive/t5_fewshot_big.csv', sep='\t', encoding='utf-8')

In [None]:
t5_fewshot_large_df = delete_unnes_aspects(t5_fewshot_large_df, unnes_asps)

In [None]:
answer_aspect = []
answer_objects = []

for i in range(len(t5_fewshot_large_df)):
  answer = t5_fewshot_large_df.iloc[i, 4]
  lines = answer.split('Objects')

  asps = []
  objs = []

  for l in lines:

    if 'Aspect' in l:
      aspect = l.split('Aspect')[1].strip(':').strip(' ')
      if aspect and aspect not in asps:
        asps.append(aspect.split(' Wh')[0].strip('\''))

      obj = l.split('Aspect')[0].strip(':').strip(' ')
      obj = re.sub('[\' ],[ \']', '\',\'', obj)
      if obj:
        obj = obj.split(',')
        for o in obj:
          if o not in objs:
            objs.append(o.strip('\''))
  if asps:
    answer_aspect.append(asps[0]) 
  else:
    answer_aspect.append('')
  answer_objects.append(objs) 



In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(t5_fewshot_large_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.7806698138102716
  recall: 0.7685025361264732
  f-score: 0.711594713220498
Aspect:
  precision: 0.36329584867662246
  recall: 0.4145039413912354
  f-score: 0.35102016969256766

CEAF
Objects:
  precision: 0.44891587511997183
  recall: 0.6726700738715234
  f-score: 0.4988518558849245
Aspect:
  precision: 0.2991233473948296
  recall: 0.34597046023234584
  f-score: 0.29523168981043124


### edit_distance

In [None]:
evaluate_edit_distance(t5_fewshot_large_df, answer_aspect, answer_objects)

Objects: 3.8754578754578755
Aspect: 10.34020618556701


### strict

Полное совпадение

In [None]:
evaluate_full_match(t5_fewshot_large_df, answer_aspect, answer_objects)

Objects: 0.356
Aspect: 0.284


Косинусная близость

In [None]:
evaluate_cos_sim(t5_fewshot_large_df, answer_aspect, answer_objects) 

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.24649174040555955
Aspect: 0.46939751715958117


In [None]:
evaluate_cos_sim_found(t5_fewshot_large_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.7927860410884022
Aspect: 0.7797300949494704


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(t5_fewshot_large_df, answer_aspect, answer_objects)

Objects: 0.961
Aspect: 0.284


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(t5_fewshot_large_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.25009163677692414
Aspect: 0.46939751715958117


## few shot

In [None]:
t5_fewshot_df = pd.read_csv('./drive/My Drive/t5_fewshot.csv', sep='\t', encoding='utf-8')

In [None]:
t5_fewshot_df = delete_unnes_aspects(t5_fewshot_df, unnes_asps)

In [None]:
answer_aspect = []
answer_objects = []

for i in range(len(t5_fewshot_df)):
  answer = t5_fewshot_df.iloc[i, 4]
  lines = answer.split('Objects')

  asps = []
  objs = []

  for l in lines:

    if 'Aspect' in l:
      aspect = l.split('Aspect')[1].strip(':').strip(' ')
      if aspect and aspect not in asps:
        asps.append(aspect.split(' Wh')[0].strip('\''))

      obj = l.split('Aspect')[0].strip(':').strip(' ')
      obj = re.sub('[\' ],[ \']', '\',\'', obj)
      if obj:
        obj = obj.split(',')
        for o in obj:
          if o not in objs:
            objs.append(o.strip('\''))

  if asps:
    answer_aspect.append(asps[0]) 
  else:
    answer_aspect.append('') 
  answer_objects.append(objs)



In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(t5_fewshot_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.8486767545819883
  recall: 0.7252750444825334
  f-score: 0.7248236230433327
Aspect:
  precision: 0.4055111325711769
  recall: 0.4433006119037392
  f-score: 0.3861198951243622

CEAF
Objects:
  precision: 0.6730975921818193
  recall: 0.644654583346271
  f-score: 0.6239526844838434
Aspect:
  precision: 0.3300141100290674
  recall: 0.35904912496951463
  f-score: 0.3207179109290249


### edit_distance

In [None]:
evaluate_edit_distance(t5_fewshot_df, answer_aspect, answer_objects)

Objects: 4.643356643356643
Aspect: 8.560165975103734


### strict

Полное совпадение

In [None]:
evaluate_full_match(t5_fewshot_df, answer_aspect, answer_objects)

Objects: 0.418
Aspect: 0.318


Косинусная близость

In [None]:
evaluate_cos_sim(t5_fewshot_df, answer_aspect, answer_objects) 

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.6995942766666412
Aspect: 0.4976346382051706


In [None]:
evaluate_cos_sim_found(t5_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.8629814709348632
Aspect: 0.8052340424031887


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(t5_fewshot_df, answer_aspect, answer_objects)

Objects: 0.649
Aspect: 0.318


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(t5_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.7001676155328751
Aspect: 0.4976346382051706


# Flan-T5

## zero shot

In [None]:
flant5_zeroshot_df = pd.read_csv('./drive/My Drive/flant5_zeroshot.csv', sep='\t', encoding='utf-8')

In [None]:
words = ['and', 'or', 'vs', 'versus', 'than']
separators = [',', '&', '/']

In [None]:
answer_objects = []

for i in range(len(flant5_zeroshot_df)):
  answer = flant5_zeroshot_df.iloc[i,4].lower()

  if 'and' in answer:
    objects = answer.split(' and ')
  elif 'or' in answer:
    objects = answer.split(' or ')
  elif 'vs' in answer:
    objects = answer.split(' vs ')
  elif 'versus' in answer:
    objects = answer.split(' versus ')
  elif 'than' in answer:
    objects = answer.split(' than ')

  elif ',' in answer:
    objects = answer.split(',')
    objects = [obj.strip(' ') for obj in objects]
  elif '&' in answer:
    objects = answer.split('&')
    objects = [obj.strip(' ') for obj in objects]
  elif '/' in answer:
    objects = answer.split('/')
    objects = [obj.strip(' ') for obj in objects]

  elif len(answer.split(' ')) % 2 == 0:
    objects_ = answer.split(' ')
    obj1 = ' '.join(objects_[:int(len(answer.split(' '))/2)])
    obj2 = ' '.join(objects_[int(len(answer.split(' '))/2):])
    objects = [obj1, obj2]

  else:
    objects = []
  
  answer_objects.append(objects)

### MUC, CEAF

In [None]:
muc_objects = []
ceaf_objects = []

for i in range(len(flant5_zeroshot_df)):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]
  objs = [obj+'_' if len(obj)==1 else obj for obj in objs]

  pred_objects = [ans.lower() for ans in answer_objects[i]]
  pred_objects = [obj+'_' if len(obj)==1 else obj for obj in pred_objects]

  split_objs = [[i for i in obj] for obj in objs]
  if pred_objects == []:
    split_pred_objs = [['_', '_'], ['_', '_']]
  elif len(pred_objects) == 1:
    split_pred_objs = [[i for i in obj] for obj in pred_objects]
    split_pred_objs.append(['_', '_'])
  else:
    split_pred_objs = [[i for i in obj] for obj in pred_objects]

  muc_objects.append(muc(split_pred_objs, split_objs))
  ceaf_objects.append(ceaf(split_pred_objs, split_objs))

print('MUC')
print(f'''precision: {sum([m[0] for m in muc_objects])/len(muc_objects)}
recall: {sum([m[1] for m in muc_objects])/len(muc_objects)}
f-score: {sum([m[2] for m in muc_objects])/len(muc_objects)}''')
  
print('\nCEAF')
print(f'''precision: {sum([c[0] for c in ceaf_objects])/len(ceaf_objects)}
recall: {sum([c[1] for c in ceaf_objects])/len(ceaf_objects)}
f-score: {sum([c[2] for c in ceaf_objects])/len(ceaf_objects)}''')

MUC
precision: 0.6632730944075892
recall: 0.7507671274079546
f-score: 0.6162572704993214

CEAF
precision: 0.5555744922430175
recall: 0.568095864820556
f-score: 0.517266530844181


### edit_distance

In [None]:
object_distance = []

for i in range(len(flant5_zeroshot_df)):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]

  pred_objects = [ans.lower() for ans in answer_objects[i]]

  if set(objs) == set(pred_objects):
    object_distance.append(0)
  elif len(objs) == len(pred_objects):
    ed_obj1 = nltk.edit_distance(objs[0], pred_objects[0], transpositions=False)
    ed_obj2 = nltk.edit_distance(objs[1], pred_objects[1], transpositions=False)
    object_distance.append((ed_obj1+ed_obj2)/2)
  # else: # добавить, если считаем и те, которые не нашлись
  #   object_distance.append((len(objs[0])+len(objs[1]))/2)


print(sum(object_distance)/len(object_distance))


8.919293820933165


### strict

Полное совпадение

In [None]:
correct_answers = 0

for i in range(len(flant5_zeroshot_df)):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]


  pred_objects = [ans.lower() for ans in answer_objects[i]]


  if set(objs) == set(pred_objects):
    correct_answers += 1

print(correct_answers/len(flant5_zeroshot_df))


0.20543016028786393


Косинусная близость

In [None]:
cos_objects = []

for i in tqdm(range(len(flant5_zeroshot_df))):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]
  pred_objs = [ans.lower() for ans in answer_objects[i]]

  if len(objs) == len(pred_objs):
    ob1_cos = cosine_sim(pred_objs[0], objs[0])
    ob2_cos = cosine_sim(pred_objs[1], objs[1])
    cos_objects.append((ob1_cos+ob2_cos)/2)
  else:
    cos_objects.append(0)


print(sum(cos_objects)/len(cos_objects))


  0%|          | 0/3057 [00:00<?, ?it/s]

0.5955132797145944


In [None]:
# если не учитывать ненайденные 

cos_objects = []

for i in tqdm(range(len(flant5_zeroshot_df))):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]

  pred_objs = [ans.lower() for ans in answer_objects[i]]

  if len(objs) == len(pred_objs):
    ob1_cos = cosine_sim(pred_objs[0], objs[0])
    ob2_cos = cosine_sim(pred_objs[1], objs[1])
    cos_objects.append((ob1_cos+ob2_cos)/2) # 0.835279246163902, 0.9263402552981126, если без того что ниже
  if len(pred_objs) == 1:
    ob1_cos = cosine_sim(pred_objs[0], objs[0])
    ob2_cos = cosine_sim(pred_objs[0], objs[1])
    cos_objects.append(max(ob1_cos, ob2_cos))



print(sum(cos_objects)/len(cos_objects))


  0%|          | 0/3057 [00:00<?, ?it/s]

0.7571863607395135


### relaxed

Полное совпадение

In [None]:
correct_answers = 0

for i in range(len(flant5_zeroshot_df)):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]


  pred_objects = [ans.lower() for ans in answer_objects[i]]

  for o in pred_objects:
    if o in objs:
      correct_answers += 0.5


eval_df['flant5_zero_1'] = [correct_answers/len(flant5_zeroshot_df), 0]

print(correct_answers/len(flant5_zeroshot_df))


0.32106640497219496


Косинусная близость

In [None]:
cos_objects = []

for i in tqdm(range(len(flant5_zeroshot_df))):
  objs = [flant5_zeroshot_df.iloc[i,1].lower(), flant5_zeroshot_df.iloc[i,2].lower()]

  pred_objs = [ans.lower() for ans in answer_objects[i]]

  # if one obects predicted
  if len(pred_objs) == 1:
    max_cos = 0
    for obj in objs:
      obj_cos = cosine_sim(pred_objs[0], obj)
      if obj_cos > max_cos:
        max_cos = obj_cos
    cos_objects.append(max_cos*0.5)

  # if two objects predicted
  elif len(objs) == len(pred_objs):
    ob1_cos = cosine_sim(pred_objs[0], objs[0])
    ob2_cos = cosine_sim(pred_objs[1], objs[1])
    cos_objects.append((ob1_cos+ob2_cos)/2)

  # if no objects predicted
  else:
    cos_objects.append(0)


print(sum(cos_objects)/len(cos_objects))

  0%|          | 0/3057 [00:00<?, ?it/s]

0.6565346721270953


## few shot

In [None]:
flant5_fewshot_df = pd.read_csv('./drive/My Drive/flant5xl_fewshot.csv', sep='\t', encoding='utf-8')

In [None]:
flant5_fewshot_df = delete_unnes_aspects(flant5_fewshot_df, unnes_asps)

In [None]:
answer_objects = []
answer_aspect = []
for i in range(len(flant5_fewshot_df)):
  answer = flant5_fewshot_df.iloc[i,4]
  if 'Aspect' in answer:
    aspect = answer.split('Aspect')[1].strip(' ').strip(':')
    objj = answer.split('Aspect')[0]
    objj = re.sub('Objects', '', objj).strip(' ').strip(':')
  else:
    aspect = ''
    if 'Objects' in answer:
      obj = re.sub('Objects', '', answer).strip(' ').strip(':')
    else:
      obj = ''
  if "'" in objj:
    obj = re.sub("' ?, '", "','", objj)
    obj = re.sub("'", '', obj).strip(' ').strip('\'').split("','")
  else:
    obj = re.sub(' ?, ', ',', objj).strip(' ').strip('\'').split(',')

  answer_objects.append(obj)
  answer_aspect.append(aspect.strip(' '))

In [None]:
clear_answer_aspect = []
for a in answer_aspect:
  if a in unnes_asps:
    clear_answer_aspect.append('')
  else:
    clear_answer_aspect.append(a)   

answer_aspect = clear_answer_aspect 

### MUC, CEAF

In [None]:
evaluate_muc_ceaf_bcubed_f1(flant5_fewshot_df, answer_aspect, answer_objects)

MUC
Objects:
  precision: 0.764060021321861
  recall: 0.7912010197185368
  f-score: 0.7243328431483609
Aspect:
  precision: 0.6833814707404906
  recall: 0.6242309679677882
  f-score: 0.629668329280108

CEAF
Objects:
  precision: 0.6362241026235022
  recall: 0.5906614364062405
  f-score: 0.5917628493228793
Aspect:
  precision: 0.4773852481415366
  recall: 0.43068851055216667
  f-score: 0.44012712445590363


### edit-distance

In [None]:
evaluate_edit_distance(flant5_fewshot_df, answer_aspect, answer_objects)

Objects: 4.4
Aspect: 3.011013215859031


### strict

Полное совпадение

In [None]:
evaluate_full_match(flant5_fewshot_df, answer_aspect, answer_objects)

Objects: 0.378
Aspect: 0.59


Косинусная близость

In [None]:
evaluate_cos_sim(flant5_fewshot_df, answer_aspect, answer_objects) 

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.5596370953321457
Aspect: 0.7040185940563679


In [None]:
evaluate_cos_sim_found(flant5_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.8273048615912226
Aspect: 0.9263402553373261


### relaxed

Полное совпадение

In [None]:
evaluate_full_match_relaxed(flant5_fewshot_df, answer_aspect, answer_objects)

Objects: 0.435
Aspect: 0.59


Косинусная близость

In [None]:
evaluate_cos_sim_relaxed(flant5_fewshot_df, answer_aspect, answer_objects)

  0%|          | 0/500 [00:00<?, ?it/s]

Objects: 0.6794067958146334
Aspect: 0.7040185940563679
