# Load generated captions and its metadata

In [13]:
import pandas as pd

test_caption_gen = pd.read_csv("./test_caption_gen.csv")

# Evaluation Metrics
- BLEU
- ROUGE-1 and ROUGE-L
- METEOR

In [14]:
import evaluate

bleu = evaluate.load("bleu")
rouge = evaluate.load('rouge')
meteor = evaluate.load('meteor')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/hyeongkyunkim/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/hyeongkyunkim/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/hyeongkyunkim/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [15]:
from tqdm import tqdm
from nltk.tokenize import word_tokenize

bleu_score_lst = []
rouge1_score_lst = []
rougeL_score_lst = []
met_score_lst = []

for index, item in tqdm(test_caption_gen.iterrows(), total=test_caption_gen.shape[0]):
    
    predictions = [ item['gen_caption'] ]
    references = [ item['ref_caption'] ]

    # Calculate BLEU Score
    results = bleu.compute(predictions=predictions, references=references, smooth=True, tokenizer=word_tokenize)
    # print(results)
    bleu_score_lst.append(results['bleu'])

    # Calculate ROUGE-1 and ROUGE-L Score
    results = rouge.compute(predictions=predictions,
                            references=references,
                            rouge_types=['rouge1','rougeL'],
                            use_aggregator=False)
    rouge1_score_lst.append(results['rouge1'][0])
    rougeL_score_lst.append(results['rougeL'][0])

    # Calculate METEOR Score
    results = meteor.compute(predictions=[item['gen_caption']], references=[item['ref_caption']])
    met_score_lst.append(results['meteor'])

test_caption_gen["BLEU"] = bleu_score_lst
test_caption_gen["ROUGE-1"] = rouge1_score_lst
test_caption_gen["ROUGE-L"] = rougeL_score_lst
test_caption_gen["METEOR"] = met_score_lst

test_caption_gen.head()

100%|██████████| 1199/1199 [00:20<00:00, 58.23it/s]


Unnamed: 0,image_id,gen_caption,ref_caption,chartType,chartElement,dataType,BLEU,ROUGE-1,ROUGE-L,METEOR
0,0,This statistic shows the market shares of inte...,The statistic depicts a breakdown of the inte...,bar,FullCover,SingleColumn,0.229303,0.631579,0.442105,0.362734
1,1,This statistic shows Kia's operating profit fr...,This statistic shows Kia Motors' operating pr...,line,FullCover,SingleColumn,0.248913,0.676923,0.584615,0.654699
2,2,This statistic shows the best-paid Indian acto...,"In 2019, the Hindi film star Akshay Kumar was...",bar,FullCover,SingleColumn,0.039385,0.314815,0.203704,0.119522
3,3,This statistic shows the annual exchange rate ...,This statistic displays the annual exchange r...,line,FullCover,SingleColumn,0.003807,0.270916,0.199203,0.120063
4,4,This statistic shows the most common baby name...,What are typical names given to baby boys in ...,bar,FullCover,SingleColumn,0.070199,0.421053,0.252632,0.252264


## Overall Perforamnce

Evaluate on the dataset which has

either __FullCover (SingleColumn)__ or __OnePlot (MultiColumn)__ 

as a type of 'chartElement'

In [4]:
chartElement = ["FullCover", "OnePlot"]
overall_perf_data = test_caption_gen.loc[test_caption_gen["chartElement"].isin(chartElement)][["image_id", "dataType", "BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]

overall_single_perf_data = overall_perf_data.loc[overall_perf_data["dataType"]=="SingleColumn"]
overall_multi_perf_data = overall_perf_data.loc[overall_perf_data["dataType"]=="MultiColumn"]

In [5]:
print("Overall Performance of Single Column Data")
print(f"{overall_single_perf_data.describe().loc[['mean', 'min', '50%', 'max']][['BLEU', 'ROUGE-1', 'ROUGE-L', 'METEOR']]}\n")
print("Overall Performance of Multi Column Data")
print(f"{overall_multi_perf_data.describe().loc[['mean', 'min', '50%', 'max']][['BLEU', 'ROUGE-1', 'ROUGE-L', 'METEOR']]}\n")
print("Overall Performance")
print(f"{overall_perf_data.describe().loc[['mean', 'min', '50%', 'max']][['BLEU', 'ROUGE-1', 'ROUGE-L', 'METEOR']]}\n")

Overall Performance of Single Column Data
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.267242  0.536979  0.451379  0.441117
min   0.001760  0.192893  0.113636  0.082645
50%   0.229637  0.541176  0.441518  0.409099
max   0.935370  0.945946  0.945946  0.974934

Overall Performance of Multi Column Data
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.242866  0.515102  0.428061  0.418457
min   0.000066  0.156863  0.098039  0.074998
50%   0.188290  0.500000  0.405405  0.382908
max   0.894470  0.945946  0.945946  0.959059

Overall Performance
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.255085  0.526068  0.439749  0.429816
min   0.000066  0.156863  0.098039  0.074998
50%   0.202802  0.518519  0.424242  0.394318
max   0.935370  0.945946  0.945946  0.974934



In [6]:
_ = overall_single_perf_data[["BLEU","ROUGE-1","ROUGE-L","METEOR"]].mean(axis=1).sort_values(ascending=False)
best = test_caption_gen.iloc[_.index[4]]
print(best)
print()
print(f"Generated caption\n{best['gen_caption']}\n\nReference caption\n{best['ref_caption']}")

image_id                                                      162
gen_caption     This statistic shows gross domestic product (G...
ref_caption      The statistic shows gross domestic product (G...
chartType                                                    line
chartElement                                            FullCover
dataType                                             SingleColumn
BLEU                                                     0.819075
ROUGE-1                                                  0.915033
ROUGE-L                                                  0.888889
METEOR                                                     0.8885
Name: 162, dtype: object

Generated caption
This statistic shows gross domestic product (GDP) per capita in Bulgaria from 1984 to 2019, with projections up until 2024. GDP is the total value of all goods and services produced in a country in a year. It is considered to be a very important indicator of the economic strength of a country an

In [7]:
_ = overall_multi_perf_data[["BLEU","ROUGE-1","ROUGE-L","METEOR"]].mean(axis=1).sort_values(ascending=False)
best = test_caption_gen.iloc[_.index[4]]
print(best)
print()
print(f"Generated caption\n{best['gen_caption']}\n\nReference caption\n{best['ref_caption']}")

image_id                                                      836
gen_caption     This statistic shows the total population of S...
ref_caption      This statistic shows the total population of ...
chartType                                                    line
chartElement                                              OnePlot
dataType                                              MultiColumn
BLEU                                                     0.802447
ROUGE-1                                                  0.918919
ROUGE-L                                                  0.918919
METEOR                                                   0.945694
Name: 835, dtype: object

Generated caption
This statistic shows the total population of Somalia from 2009 to 2019, by gender. In 2019, Somalia's female population amounted to approximately 7.56 million, while the male population amounted to approximately 6.56 million inhabitants. 

Reference caption
 This statistic shows the total popula

## Effect of graph elements

Evaluate the effect of graph elements on graph captioning quality

.

- Single Column Element Variables
    - Title
    - Axis Label
    - Grid

.

- Multi Column Element Variables
    - One vs Subplots

In [8]:
single_fullcover_data = test_caption_gen.loc[test_caption_gen["chartElement"] == "FullCover"][["BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]
single_notitle_data = test_caption_gen.loc[test_caption_gen["chartElement"] == "NoTitle"][["BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]
single_noaxislabel_data = test_caption_gen.loc[test_caption_gen["chartElement"] == "NoAxisLabel"][["BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]
single_nogrid_data = test_caption_gen.loc[test_caption_gen["chartElement"] == "NoGrid"][["BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]

In [9]:
print("Single Column : Full Cover")
print(f"{single_fullcover_data.describe().loc[['mean', 'min', '50%', 'max']]}\n")
print("Single Column : No Title")
print(f"{single_notitle_data.describe().loc[['mean', 'min', '50%', 'max']]}\n")
print("Single Column : No Axis Label")
print(f"{single_noaxislabel_data.describe().loc[['mean', 'min', '50%', 'max']]}\n")
print("Single Column : No Grid")
print(f"{single_nogrid_data.describe().loc[['mean', 'min', '50%', 'max']]}\n")

Single Column : Full Cover
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.267242  0.536979  0.451379  0.441117
min   0.001760  0.192893  0.113636  0.082645
50%   0.229637  0.541176  0.441518  0.409099
max   0.935370  0.945946  0.945946  0.974934

Single Column : No Title
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.083562  0.293311  0.240739  0.227331
min   0.000873  0.077922  0.057143  0.046593
50%   0.067114  0.288908  0.230769  0.214353
max   0.567802  0.711864  0.628571  0.672950

Single Column : No Axis Label
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.263523  0.535754  0.451878  0.442444
min   0.001760  0.192893  0.113636  0.085456
50%   0.210206  0.535047  0.443275  0.421257
max   0.935370  0.945946  0.945946  0.974934

Single Column : No Grid
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.268740  0.538726  0.453640  0.443518
min   0.001760  0.192893  0.113636  0.085456
50%   0.233717  0.542193  0.446934  0.421777
max   0.935370  0.945946  0.945946  

In [10]:
multi_oneplot_data = test_caption_gen.loc[test_caption_gen["chartElement"] == "OnePlot"][["BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]
multi_subplots_data = test_caption_gen.loc[test_caption_gen["chartElement"] == "SubPlot"][["BLEU", "ROUGE-1", "ROUGE-L", "METEOR"]]

In [11]:
print("Multi Column : One plot")
print(f"{multi_oneplot_data.describe().loc[['mean', 'min', '50%', 'max']]}\n")
print("Multi Column : Sub plots")
print(f"{multi_subplots_data.describe().loc[['mean', 'min', '50%', 'max']]}\n")

Multi Column : One plot
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.242866  0.515102  0.428061  0.418457
min   0.000066  0.156863  0.098039  0.074998
50%   0.188290  0.500000  0.405405  0.382908
max   0.894470  0.945946  0.945946  0.959059

Multi Column : Sub plots
          BLEU   ROUGE-1   ROUGE-L    METEOR
mean  0.234401  0.514414  0.428158  0.415818
min   0.000061  0.140704  0.100503  0.074415
50%   0.186255  0.518934  0.416940  0.390969
max   0.894470  0.934783  0.934783  0.959059

