# Begin

In [None]:
!conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
# !pip install torchvision 

In [None]:
!pip install pycocoevalcap

In [1]:
import pandas as pd
import inspect
import math
import os
import json

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

# import losses
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.meteor.meteor import Meteor
from pycocotools.coco import COCO
from pycocoevalcap.eval import COCOEvalCap


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def calc_scores(ref, hypo):
    """
    ref, dictionary of reference sentences (id, sentence)
    hypo, dictionary of hypothesis sentences (id, sentence)
    score, dictionary of scores
    """
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(),"METEOR"),
        (Rouge(), "ROUGE_L"),
        (Cider(), "CIDEr")
    ]
    final_scores = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(ref, hypo)
        if type(score) == list:
            for m, s in zip(method, score):
                final_scores[m] = s
        else:
            final_scores[method] = score
    return final_scores 

In [3]:
def score(ref, hypo):
    scorers = [
        (Bleu(4),["Bleu_1","Bleu_2","Bleu_3","Bleu_4"])

    ]
    final_scores = {}
    for scorer,method in scorers:
        score,scores = scorer.compute_score(ref,hypo)
        if type(score)==list:
            for m,s in zip(method,score):
                final_scores[m] = s
        else:
            final_scores[method] = score

    return final_scores

In [10]:
path_original_pt = ('C:/Users/rbdf/Documents/Codes/Projeto3/Coco_translation/COCO_annotation_pt.json')
path_generated = ('C:/Users/rbdf/Documents/Codes/Projeto3/Coco_translation/final/COCO_annotations_gen_beamsearch.csv')
path_COCO_val = ('C:/Users/rbdf/Documents/Codes/Projeto3/Coco_translation/COCO_Annotations_val_pt.json')

In [8]:
df_COCO_val = pd.read_json(path_COCO_val)
df_COCO_val

Unnamed: 0.1,Unnamed: 0,image_id,id,caption,caption_pt
0,0,179765,38,A black Honda motorcycle parked in front of a ...,Uma motocicleta Honda preta estacionada em fre...
1,1,179765,182,A Honda motorcycle parked in a grass driveway,Uma motocicleta Honda estacionada em uma entra...
2,2,190236,401,An office cubicle with four different types of...,Um cubículo de escritório com quatro tipos dif...
3,3,331352,441,A small closed toilet in a cramped space.,Um pequeno banheiro fechado em um espaço apert...
4,4,517069,447,Two women waiting at a bench next to a street.,Duas mulheres esperando em um banco ao lado de...
...,...,...,...,...,...
25009,25009,9590,821635,A group of men sipping drinks and talking at a...,Um grupo de homens bebendo bebidas e conversan...
25010,25010,84664,822557,"A plate of food with some eggs, potatoes, brea...","Um prato de comida com alguns ovos, batatas, p..."
25011,25011,331569,824852,The strawberries was sitting beside the tall g...,Os morangos estavam sentados ao lado do copo a...
25012,25012,231237,825902,A bunch of small red flowers in a barnacle enc...,Um monte de pequenas flores vermelhas em um va...


In [11]:
df_gen = pd.read_csv(path_generated)
df_gen

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,image_id,id,caption,caption_pt,gen_caption
0,0,0,179765,38,A black Honda motorcycle parked in front of a ...,Uma motocicleta Honda preta estacionada em fre...,Uma motocicleta preta estacionada em um quinta...
1,1,1,179765,182,A Honda motorcycle parked in a grass driveway,Uma motocicleta Honda estacionada em uma entra...,Uma motocicleta preta estacionada em um quinta...
2,2,2,190236,401,An office cubicle with four different types of...,Um cubículo de escritório com quatro tipos dif...,Uma mesa de computador com dois monitores e um...
3,3,3,331352,441,A small closed toilet in a cramped space.,Um pequeno banheiro fechado em um espaço apert...,Um banheiro branco com um vaso sanitário branc...
4,4,4,517069,447,Two women waiting at a bench next to a street.,Duas mulheres esperando em um banco ao lado de...,Uma mulher sentada em um banco de parque olhan...
...,...,...,...,...,...,...,...
25009,25009,25009,9590,821635,A group of men sipping drinks and talking at a...,Um grupo de homens bebendo bebidas e conversan...,Um grupo de homens sentados em uma mesa de mad...
25010,25010,25010,84664,822557,"A plate of food with some eggs, potatoes, brea...","Um prato de comida com alguns ovos, batatas, p...",Um prato branco coberto com diferentes tipos d...
25011,25011,25011,331569,824852,The strawberries was sitting beside the tall g...,Os morangos estavam sentados ao lado do copo a...,Um smoothie de banana e morango está em um prato.
25012,25012,25012,231237,825902,A bunch of small red flowers in a barnacle enc...,Um monte de pequenas flores vermelhas em um va...,Um vaso de flores em uma mesa com uma flor.


In [None]:
# df_gen['caption_pt'][0]
score(df_gen['caption_pt'][0], df_gen['gen_caption'][0] )

In [None]:
type(df_gen['caption_pt'][0])

In [12]:
dataDir='.'
dataType='val2014'
algName = 'fakecap'
annFile='%s/annotations/captions_%s.json'%(dataDir,dataType)
annFile

'./annotations/captions_val2014.json'

In [64]:
subtypes=['results', 'evalImgs', 'eval']
[resFile, evalImgsFile, evalFile]= \
['%s/results/captions_%s_%s_%s.json'%(dataDir,dataType,algName,subtype) for subtype in subtypes]
resFile

'./results/captions_val2014_fakecap_results.json'

In [None]:
original_COCO_path = 'C:/Users/rbdf/Documents/Codes/Projeto3/Coco_translation/captions_train2017.json'
with open(original_COCO_path) as json_data:
    COCO_original = json.load(json_data)
COCO_original.keys()

In [None]:
# path_original_pt
# with open(path_original_pt) as json_data:
#     COCO_original_pt = json.load(json_data)
# COCO_original_pt.keys()

# Gerando json para as métricas

Mudando o formato dos dataset para o Original do COCO

In [13]:
path_val_coco_original = 'C:/Users/rbdf/Documents/Codes/Projeto3/Coco_translation/Original/captions_val2017.json'

In [54]:
with open(path_val_coco_original) as json_data:
    coco_val_original = json.load(json_data)

In [55]:
coco_val_original.keys()


dict_keys(['info', 'licenses', 'images', 'annotations'])

In [56]:
coco_val_original['annotations'][0]

{'image_id': 179765,
 'id': 38,
 'caption': 'A black Honda motorcycle parked in front of a garage.'}

In [57]:
import copy

coco_val_gen = copy.deepcopy(coco_val_original)
coco_val_pt = copy.deepcopy(coco_val_original)

In [58]:
for i in range(len(coco_val_original['annotations'])):
    coco_val_gen['annotations'][i]['caption'] = df_gen['gen_caption'].iloc[i]
    coco_val_pt['annotations'][i]['caption'] = df_gen['caption_pt'].iloc[i]

In [59]:
# df_gen['gen_caption'].iloc[0]
coco_val_gen['annotations'][0]

{'image_id': 179765,
 'id': 38,
 'caption': 'Uma motocicleta preta estacionada em um quintal com um carro lateral.'}

In [60]:
coco_val_pt['annotations'][0]

{'image_id': 179765,
 'id': 38,
 'caption': 'Uma motocicleta Honda preta estacionada em frente a uma garagem.'}

In [84]:
# import json

# with open('COCO_val_gen.json', 'w') as fp:
#     json.dump(coco_val_gen, fp)
    
# with open('COCO_val_pt.json', 'w') as fp:
#     json.dump(coco_val_pt, fp)

# with open('captions_val2014_fakecap_results.json', 'r') as j:
     # contents = json.loads(j.read())
    
# with open("captions_val2014_fakecap_results.json", encoding='utf-8', errors='ignore') as json_data:
     # data = json.load(json_data, strict=False)
    
# with open("captions_val2014_fakecap_results.json", "r") as read_file:
   # data = json.load(read_file)
    
with open('captions_val2014_fakecap_results.json', 'r') as j:
     contents = json.loads(j.read())
     # print(contents)

In [86]:
contents[0]

{'image_id': 404464,
 'caption': 'black and white photo of a man standing in front of a building'}

# Testing functions

In [70]:
# score(coco_val_pt['annotations'][0], coco_val_gen['annotations'][0])

In [71]:
coco = COCO('COCO_val_pt.json')
cocoRes = coco.loadRes(coco_val_gen['annotations'])

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!


In [92]:
# create cocoEval object by taking coco and cocoRes
cocoEval = COCOEvalCap(coco, cocoRes)
# evaluate on a subset of images by setting
# cocoEval.params['image_id'] = cocoRes.getImgIds()
# please remove this line when evaluating the full validation set
cocoEval.params['image_id'] = cocoRes.getImgIds()
len(cocoEval.params['image_id'])

# evaluate results
# SPICE will take a few minutes the first time, but speeds up due to caching
# cocoEval.evaluate()

5000

In [75]:
# print output evaluation scores
for metric, score in cocoEval.eval.items():
    print(metric, score)

In [98]:
coco_val_pt['annotations'][0]['caption']

{'image_id': 179765,
 'id': 38,
 'caption': 'Uma motocicleta Honda preta estacionada em frente a uma garagem.'}

In [104]:
score(contents,contents)

AttributeError: 'list' object has no attribute 'keys'

In [106]:
scorers = [
        # (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        # (Rouge(), "ROUGE_L"),
        # (Cider(), "CIDEr")        
    ]
score_dict = {}
for scorer, method in scorers:
    print('computing %s score...' % (scorer.method()))
    score, scores = scorer.compute_score(contents, contents)

computing METEOR score...


AttributeError: 'list' object has no attribute 'keys'

In [None]:
Meteor().compute_score(contents[0], contents[0])

In [None]:
df_original = df_gen[['image_id', 'id', 'caption_pt']]
df_original

In [None]:
df_generated = df_gen[['image_id', 'id']]
df_generated['caption_pt'] = df_gen['gen_caption']
df_generated

In [14]:
# # salvando
# df_list = df_original.to_dict(orient='records')

# with open('COCO_Annotations_pt_original.json', 'w') as f:
#     json.dump(df_list, f)

In [None]:
# # salvando
# df_list = df_generated.to_dict(orient='records')

# with open('COCO_Annotations_pt_generated.json', 'w') as f:
#     json.dump(df_list, f)

In [None]:
json_original = 'COCO_Annotations_pt_original.json'
json_generated = 'COCO_Annotations_pt_generated.json'

In [None]:

# with open(json_original) as json_data:
#     coco_pt_original = json.load(json_data)
    

# with open(json_generated) as json_data:
#     coco_pt_generated = json.load(json_data)

In [None]:
len(coco_pt_original)

In [None]:
# df_gen['caption_pt'][0]
# score(coco_pt_original[0], coco_pt_generated[0] )

In [None]:
# COCO_original
coco = COCO(original_COCO_path)
# cocoRes = coco.loadRes(resFile)

In [None]:
# type(COCO_original)
COCO_original.keys()
# len(COCO_original['annotations'])

In [None]:
# len(COCO_original['annotations'])
COCO_original['annotations'][0]