In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import sys
import warnings
import pandas as pd
import numpy as np
import scipy
import scipy.stats
from analysis import ReportAnalysis

warnings.filterwarnings('ignore')

logger = logging.getLogger()
logger.level = logging.DEBUG
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

# https://www.kite.com/python/answers/how-to-display-float-values-in-a-pandas-dataframe-to-two-decimal-places-in-python
pd.options.display.float_format = "{:,.2f}".format

# Stack Overflow

In [3]:
# DS android


In [4]:
_analysis = ReportAnalysis()

_analysis.report_IR_metrics('output/answerbot', source_type = 'so', has_filters=False)
_analysis.report_IR_metrics('output/answerbot_text', source_type = 'so', has_filters=False)

_analysis.report_IR_metrics('output/w2v', source_type = 'so', has_filters=True)

_analysis.report_BERT_metrics('output/bert_ds_android', source_type = 'so')
_analysis.report_BERT_metrics('output/bert_ds_synthetic', source_type = 'so')

_analysis.pd_table()

Unnamed: 0,technique,precision,recall,f1-score
0,answerbot,0.62,0.62,0.59
1,answerbot_text,0.54,0.53,0.52
2,w2v,0.41,0.47,0.36
3,w2v w/ frame-elements,0.54,0.52,0.47
4,w2v w/ frame-associations,0.44,0.48,0.43
5,bert_ds_android,0.58,0.64,0.57
6,bert_ds_android w/ frame-elements,0.57,0.64,0.55
7,bert_ds_android w/ frame-associations,0.58,0.65,0.56
8,bert_ds_synthetic,0.58,0.64,0.56
9,bert_ds_synthetic w/ frame-elements,0.56,0.63,0.53


### Effect size comparison

In [5]:
_, bert_recall_base, _ = _analysis.get_fold_metrics_to_list('output/bert_ds_android_base.json', type = 'so')
_, bert_recall_fe, _ = _analysis.get_fold_metrics_to_list('output/bert_ds_android_fe.json', type = 'so')
_, bert_recall_fa, _ = _analysis.get_fold_metrics_to_list('output/bert_ds_android_fa.json', type = 'so')

_, syn_bert_recall_base, _ = _analysis.get_fold_metrics_to_list('output/bert_ds_synthetic_base.json', type = 'so')
_, syn_bert_recall_fe, _ = _analysis.get_fold_metrics_to_list('output/bert_ds_synthetic_fe.json', type = 'so')
_, syn_bert_recall_fa, _ = _analysis.get_fold_metrics_to_list('output/bert_ds_synthetic_fa.json', type = 'so')

_, w2v_recall_base, _ = _analysis.get_ir_metrics_to_list('output/w2v_base.json', type = 'so')
_, w2v_recall_fe, _ = _analysis.get_ir_metrics_to_list('output/w2v_fe.json', type = 'so')
_, w2v_recall_fa, _ = _analysis.get_ir_metrics_to_list('output/w2v_fa.json', type = 'so')


_, answerbot_recall_base, _ = _analysis.get_ir_metrics_to_list('output/answerbot_base.json', type = 'so')
_, answerbot_recall_text, _ = _analysis.get_ir_metrics_to_list('output/answerbot_text_base.json', type = 'so')

In [6]:
#### AnswerBot & BERT_android

In [7]:
scipy.stats.mannwhitneyu(answerbot_recall_base, bert_recall_fa, alternative='two-sided')

MannwhitneyuResult(statistic=36.0, pvalue=0.3074894566186813)

In [8]:
_analysis.cohend(answerbot_recall_base, bert_recall_fa)

-0.2794033939673506

In [9]:
#### AnswerBot & BERT_synthetic

In [10]:
scipy.stats.mannwhitneyu(answerbot_recall_base, syn_bert_recall_base, alternative='two-sided')

MannwhitneyuResult(statistic=40.0, pvalue=0.47267559351158717)

In [11]:
_analysis.cohend(answerbot_recall_base, syn_bert_recall_base)

-0.20714669536114358

In [12]:
#### AnswerBot & word2vec

In [13]:
scipy.stats.mannwhitneyu(answerbot_recall_base, w2v_recall_fe, alternative='two-sided')

MannwhitneyuResult(statistic=69.0, pvalue=0.16197241048012612)

In [14]:
_analysis.cohend(answerbot_recall_base, w2v_recall_fe)

0.7248316252905125

### Effect size comparison - without meta-data

In [15]:
#### AnswerBot & BERT_android

In [16]:
scipy.stats.mannwhitneyu(answerbot_recall_text, bert_recall_fa, alternative='two-sided')

MannwhitneyuResult(statistic=9.0, pvalue=0.0022022199424970783)

In [17]:
_analysis.cohend(answerbot_recall_text, bert_recall_fa)

-1.7054538144862432

In [18]:
#### AnswerBot & BERT_synthetic

In [19]:
scipy.stats.mannwhitneyu(answerbot_recall_text, syn_bert_recall_base, alternative='two-sided')

MannwhitneyuResult(statistic=14.0, pvalue=0.00728455700947966)

In [20]:
_analysis.cohend(answerbot_recall_text, syn_bert_recall_base)

-1.6886687246851708

In [21]:
#### AnswerBot & word2vec

In [22]:
scipy.stats.mannwhitneyu(answerbot_recall_text, w2v_recall_fe, alternative='two-sided')

MannwhitneyuResult(statistic=53.0, pvalue=0.8501067391385259)

In [23]:
_analysis.cohend(answerbot_recall_text, w2v_recall_fe)

0.05066666084743783