# Cohen's Kappa Statistics

### Agreement Rate Calculation

**Note:** : this notebook assumes the use of **Python 3**

## Preamble: Settings Django Environment

In [2]:
%load preamble_directives.py

## Weighted Cohen's Kappa Function

$kappa = 1 - \frac{\sum W*X}{\sum W*M}$ where $*$ indicates the element-wise matrix multiplication.

$X$: Is the matrix of Observed Scores

$M$: Is the matrix of Score Agreement by Chance

$W$: Is the Weight Matrix.


In [3]:
from django.contrib.auth.models import User
from source_code_analysis.models import SoftwareProject

In [4]:
from evaluations import Judge

## Calculate Agreement Score (Function Definition)

In [5]:
from evaluations import calculate_agreement_scores

## Cohens' Kappa Function (definition)

In [6]:
from evaluations import cohens_kappa

## Cohens' Kappa 3 WITHOUT logs

In [8]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f & %.3f' % (unweighted_k, weighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f & %.3f' % (unweighted_k, weighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f & %.3f' % (unweighted_k, weighted_k))

j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f & %.3f' % (unweighted_k, weighted_k))

CoffeeMaker & 1.000 & 1.000
JFreechart (0.6.0) & 1.000 & 1.000
JFreechart (0.7.1) & 1.000 & 1.000
JHotDraw (7.4.1) & 0.999 & 0.999


## Cohen's Kappa 3 WITH logs

In [56]:
print('-'*80)
print('\t\t CoffeeMaker')
print('-'*80)
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

print('-'*80)
print('\t\t JfreeChart 0.6.0')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

print('-'*80)
print('\t\t JfreeChart 0.7.1')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

print('-'*80)
print('\t\t JHotDraw')
print('-'*80)
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

--------------------------------------------------------------------------------
		 CoffeeMaker
--------------------------------------------------------------------------------
J: 
 [[19  0  2]
 [ 0  0  0]
 [ 0  0 26]]
X: 
 [[ 0.40425532  0.          0.04255319]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.55319149]]
W: 
 [[ 0.  1.  1.]
 [ 1.  0.  1.]
 [ 1.  1.  0.]]
AG: 
 [[ 21.   0.  26.]
 [ 19.   0.  28.]]
J_sum: 
 [[ 0.44680851  0.          0.55319149]
 [ 0.40425532  0.          0.59574468]]
M: 
 [[ 0.18062472  0.          0.26618379]
 [ 0.          0.          0.        ]
 [ 0.2236306   0.          0.32956089]]
J: 
 [[19  0  2]
 [ 0  0  0]
 [ 0  0 26]]
X: 
 [[ 0.40425532  0.          0.04255319]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.55319149]]
W: 
 [[ 0.  1.  4.]
 [ 1.  0.  1.]
 [ 4.  1.  0.]]
AG: 
 [[ 21.   0.  26.]
 [ 19.   0.  28.]]
J_sum: 
 [[ 0.44680851  0.          0.55319149]
 [ 0.40425532  0.          0.59574468]]
M: 
 [

## Cohen's Kappa 5 WITHOUT logs

In [57]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f & %.3f' % (unweighted_k, weighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f & %.3f' % (unweighted_k, weighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f & %.3f' % (unweighted_k, weighted_k))

j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J)
weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f & %.3f' % (unweighted_k, weighted_k))

CoffeeMaker & 0.282 & 0.807
JFreechart (0.6.0) & 0.202 & 0.657
JFreechart (0.7.1) & 0.163 & 0.669
JHotDraw (7.4.1) & 0.585 & 0.564


## Cohen's Kappa 5 WITH logs

In [58]:
print('-'*80)
print('\t\t CoffeeMaker')
print('-'*80)
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

print('-'*80)
print('\t\t JFreechart 0.6.0')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

print('-'*80)
print('\t\t JFreechart 0.7.1')
print('-'*80)
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

print('-'*80)
print('\t\t JHotDraw')
print('-'*80)
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=5)
unweighted_k = cohens_kappa(J, log=True)
weighted_k = cohens_kappa(J, weighted=True, log=True)
print('Kappa: %.3f & %.3f' % (unweighted_k, weighted_k))

--------------------------------------------------------------------------------
		 CoffeeMaker
--------------------------------------------------------------------------------
J: 
 [[ 1  5  0  1  0]
 [ 0 13  0  0  1]
 [ 0  0  0  0  0]
 [ 0  0  0  0  5]
 [ 0  0  0 12  9]]
X: 
 [[ 0.0212766   0.10638298  0.          0.0212766   0.        ]
 [ 0.          0.27659574  0.          0.          0.0212766 ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.10638298]
 [ 0.          0.          0.          0.25531915  0.19148936]]
W: 
 [[ 0.  1.  1.  1.  1.]
 [ 1.  0.  1.  1.  1.]
 [ 1.  1.  0.  1.  1.]
 [ 1.  1.  1.  0.  1.]
 [ 1.  1.  1.  1.  0.]]
AG: 
 [[  7.  14.   0.   5.  21.]
 [  1.  18.   0.  13.  15.]]
J_sum: 
 [[ 0.14893617  0.29787234  0.          0.10638298  0.44680851]
 [ 0.0212766   0.38297872  0.          0.27659574  0.31914894]]
M: 
 [[ 0.00316885  0.05703938  0.          0.04119511  0.04753282]
 [ 0.00633771  0.1

## Cohens' Kappa 2 WITHOUT logs

In [59]:
# ------
# NOTE: In this case Weighted and Unweighted are exactly the same
# ------

j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f' % (unweighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f' % (unweighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f' % (unweighted_k,))

j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J)
# weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f' % (unweighted_k))

CoffeeMaker & 0.913
JFreechart (0.6.0) & 0.912
JFreechart (0.7.1) & 0.975
JHotDraw (7.4.1) & 0.686


## Cohen's Kappa 2 WITH logs

In [60]:
# ------
# NOTE: In this case Weighted and Unweighted are exactly the same
# ------

j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('CoffeeMaker & %.3f' % (unweighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.6.0) & %.3f' % (unweighted_k))

j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('JFreechart (0.7.1) & %.3f' % (unweighted_k,))

j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
J = calculate_agreement_scores(j1, j2, k=2)
unweighted_k = cohens_kappa(J, log=True)
# weighted_k = cohens_kappa(J, weighted=True)
print('JHotDraw (7.4.1) & %.3f' % (unweighted_k))

J: 
 [[19  2]
 [ 0 26]]
X: 
 [[ 0.40425532  0.04255319]
 [ 0.          0.55319149]]
W: 
 [[ 0.  1.]
 [ 1.  0.]]
AG: 
 [[ 21.  26.]
 [ 19.  28.]]
J_sum: 
 [[ 0.44680851  0.55319149]
 [ 0.40425532  0.59574468]]
M: 
 [[ 0.18062472  0.26618379]
 [ 0.2236306   0.32956089]]
CoffeeMaker & 0.913
J: 
 [[ 47   0]
 [  8 406]]
X: 
 [[ 0.10195228  0.        ]
 [ 0.01735358  0.88069414]]
W: 
 [[ 0.  1.]
 [ 1.  0.]]
AG: 
 [[  47.  414.]
 [  55.  406.]]
J_sum: 
 [[ 0.10195228  0.89804772]
 [ 0.11930586  0.88069414]]
M: 
 [[ 0.0121635   0.08978877]
 [ 0.10714235  0.79090537]]
JFreechart (0.6.0) & 0.912
J: 
 [[ 65   0]
 [  3 520]]
X: 
 [[ 0.11054422  0.        ]
 [ 0.00510204  0.88435374]]
W: 
 [[ 0.  1.]
 [ 1.  0.]]
AG: 
 [[  65.  523.]
 [  68.  520.]]
J_sum: 
 [[ 0.11054422  0.88945578]
 [ 0.11564626  0.88435374]]
M: 
 [[ 0.01278403  0.09776019]
 [ 0.10286223  0.78659355]]
JFreechart (0.7.1) & 0.975
J: 
 [[808   0]
 [289 705]]
X: 
 [[ 0.44839068  0.        ]
 [ 0.16037736  0.39123196]]
W: 
 [[ 0.  1.]

## Calculate the Mean Precision of Judges' evaluations

In [8]:
from evaluations import mean_precision

## Mean Precision "Coherent"

In [61]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('CoffeeMaker & %.3f & %.3f & %.3f' % (pj1, pj2, f))

j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.6.0) & %.3f & %.3f & %.3f' % (pj1, pj2, f))

j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.7.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))

j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
j1_eval = j1.three_codes_evaluations[2]
j2_eval = j2.three_codes_evaluations[2]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JHotDraw (7.4.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))

CoffeeMaker & 1.000 & 0.929 & 0.963
JFreeChart (0.6.0) & 0.981 & 1.000 & 0.990
JFreeChart (0.7.1) & 0.994 & 1.000 & 0.997
JHotDraw (7.4.1) & 0.708 & 1.000 & 0.829


## Mean Precision "Non Coherent"

In [62]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('CoffeeMaker & %.3f & %.3f & %.3f' % (pj1, pj2, f))

j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.6.0) & %.3f & %.3f & %.3f' % (pj1, pj2, f))

j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JFreeChart (0.7.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))

j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')
j1_eval = j1.three_codes_evaluations[0]
j2_eval = j2.three_codes_evaluations[0]
pj1, pj2, f = mean_precision(j1_eval, j2_eval)
print('JHotDraw (7.4.1) & %.3f & %.3f & %.3f' % (pj1, pj2, f))

CoffeeMaker & 0.905 & 1.000 & 0.950
JFreeChart (0.6.0) & 1.000 & 0.855 & 0.922
JFreeChart (0.7.1) & 1.000 & 0.956 & 0.977
JHotDraw (7.4.1) & 1.000 & 0.737 & 0.848


### Check the Differences

In [5]:
from source_code_analysis.models import AgreementEvaluation, SoftwareProject
from django.contrib.auth.models import User

#### CoffeeMaker

In [6]:
j1 = Judge('leonardo.nole', 'CoffeeMaker')
j2 = Judge('rossella.linsalata', 'CoffeeMaker')

j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations

neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])

leo = User.objects.get(username='leonardo.nole')
ros = User.objects.get(username='rossella.linsalata')

# -------------------------
# NEG
# -------------------------

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=ros)
    neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))

# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))

pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=ros)
    pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))

J1: 
J2: 
J1: 
J2: 


#### JFreeChart 0.6.0

In [7]:
j1 = Judge('leonardo.nole', 'JFreechart', '0.6.0')
j2 = Judge('antonio.petrone', 'JFreechart', '0.6.0')

j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations

neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])

leo = User.objects.get(username='leonardo.nole')
anto = User.objects.get(username='antonio.petrone')

# -------------------------
# NEG
# -------------------------

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
    neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))

# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))

pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
    pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))

J1: 
J2: 
J1: 
J2: 


##### JFreeChart 0.7.1

In [8]:
j1 = Judge('leonardo.nole', 'JFreechart', '0.7.1')
j2 = Judge('antonio.petrone', 'JFreechart', '0.7.1')

j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations

neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])

leo = User.objects.get(username='leonardo.nole')
anto = User.objects.get(username='antonio.petrone')

# -------------------------
# NEG
# -------------------------

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
    neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))

# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))

pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
    pos_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(pos_id_list))

J1: 
J2: 
J1: 
J2: 


#### JHotDraw 7.4.1

In [10]:
j1 = Judge('leonardo.nole', 'JHotDraw', '7.4.1')
j2 = Judge('rossella.linsalata', 'JHotDraw', '7.4.1')

j1_evals = j1.two_codes_evaluations
j2_evals = j2.two_codes_evaluations

neg_diff = j1_evals[0].intersection(j2_evals[1])
pos_diff = j1_evals[1].intersection(j2_evals[0])

leo = User.objects.get(username='leonardo.nole')
anto = User.objects.get(username='rossella.linsalata')

# -------------------------
# NEG
# -------------------------

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    neg_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(neg_id_list))

neg_id_list = list()
for meth_id in neg_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
    neg_id_list.append(str(ag_eval.pk))
print('J2:', ','.join(neg_id_list))

# -------------------------
# POS
# -------------------------
pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=leo)
    pos_id_list.append(str(ag_eval.pk))
print('J1:', ','.join(pos_id_list))

pos_id_list = list()
for meth_id in pos_diff:
    ag_eval = AgreementEvaluation.objects.get(reference_method__id=meth_id, evaluator=anto)
    pos_id_list.append(str(ag_eval.pk))
    
print('J2:', ','.join(pos_id_list))

J1: 
J2: 
J1: 
J2: 


### TEST: Lexical overlap considering ONLY the Intersection agreement

In [33]:
from sklearn.feature_extraction.text import TfidfVectorizer


judges_combinations = (('leonardo.nole', 'rossella.linsalata'),
                       ('leonardo.nole', 'rossella.linsalata'),
                       ('leonardo.nole', 'antonio.petrone'),
                       ('leonardo.nole', 'antonio.petrone'),)

CODES_Labels = ('NC', 'DK', 'CO')
from collections import defaultdict
stats_results = defaultdict(list)

for pno, project in enumerate(projects):
    
    if not pno == 1:
        continue

    # Get Methods
    code_methods = project.code_methods.all()

    # Populate the Doc Collection
    document_collection = list()
    method_ids_map = dict()  # Map (dict) to store the association method.pk --> Row index in Tfidf Matrix
    for mno, method in enumerate(code_methods):
        clexicon_info = method.lexical_info
        document_collection.append(clexicon_info.normalized_comment)
        document_collection.append(clexicon_info.normalized_code)
        method_ids_map[method.id] = mno * 2

    vectorizer = TfidfVectorizer(input='content', sublinear_tf=True, lowercase=False)
    tfidf_values = vectorizer.fit_transform(document_collection)

    j1_usrname, j2_usrname = judges_combinations[pno]
    j1 = Judge(j1_usrname, project.name, project.version)
    j2 = Judge(j2_usrname, project.name, project.version)
    
    j1_evals = j1.three_codes_evaluations
    j2_evals = j2.three_codes_evaluations
    
    project_stats = list()
    method_ids = list()
    for code in range(3):
        j1_evals_code = j1_evals[code]
        j2_evals_code = j2_evals[code]
        
        method_ids.extend(j1_evals_code.intersection(j2_evals_code))
        
    cosine_sim_vals = list()
    for mid in method_ids:
        i = method_ids_map[mid]
        assert i % 2 == 0, print(i, mid)
        dotprod = tfidf_values[i].dot(tfidf_values[i+1].T)[0,0]
        cosine_sim_vals.append(dotprod)
        if dotprod == 1.0:
            print('MID: ', mid)
    
    vals = np.array(cosine_sim_vals)
    print('{proj} ({ver}) & {total} & {min:.3} & {max:.3} & {median:.3} & {mean:.3} & {variance:.3} & {devstd:.3} \\\\'.format(
                                                                                 proj = project.name.title(), 
                                                                                 ver=project.version,
                                                                                 total=vals.size, 
                                                                                 min=vals.min(), 
                                                                                 max=vals.max(), 
                                                                                 median=median(vals), 
                                                                                 mean=vals.mean(), 
                                                                                 variance=var(vals), 
                                                                                 devstd=std(vals)))

MID:  980
Jhotdraw (7.4.1) & 2189 & 0.0 & 1.0 & 0.408 & 0.397 & 0.0635 & 0.252 \\


### TEST: Lexical overlap considering ONLY the Methods were Judges Did not Agree on their Coherence Value

In [30]:
from sklearn.feature_extraction.text import TfidfVectorizer


judges_combinations = (('leonardo.nole', 'rossella.linsalata'),
                       ('leonardo.nole', 'rossella.linsalata'),
                       ('leonardo.nole', 'antonio.petrone'),
                       ('leonardo.nole', 'antonio.petrone'),)

CODES_Labels = ('NC', 'DK', 'CO')
from collections import defaultdict
stats_results = defaultdict(list)

for pno, project in enumerate(projects):

    # Get Methods
    code_methods = project.code_methods.all()

    # Populate the Doc Collection
    document_collection = list()
    method_ids_map = dict()  # Map (dict) to store the association method.pk --> Row index in Tfidf Matrix
    for mno, method in enumerate(code_methods):
        clexicon_info = method.lexical_info
        document_collection.append(clexicon_info.normalized_comment)
        document_collection.append(clexicon_info.normalized_code)
        method_ids_map[method.id] = mno * 2

    vectorizer = TfidfVectorizer(input='content', sublinear_tf=True, lowercase=False)
    tfidf_values = vectorizer.fit_transform(document_collection)

    j1_usrname, j2_usrname = judges_combinations[pno]
    j1 = Judge(j1_usrname, project.name, project.version)
    j2 = Judge(j2_usrname, project.name, project.version)
    
    j1_evals = j1.three_codes_evaluations
    j2_evals = j2.three_codes_evaluations
    
    project_stats = list()
    method_ids = list()
    for code in range(3):
        j1_evals_code = j1_evals[code]
        j2_evals_code = j2_evals[code]
        
        method_ids.extend(j1_evals_code.intersection(j2_evals_code))
        
    cosine_sim_vals = list()
    for mid in method_ids_map:
        if not mid in method_ids:
            i = method_ids_map[mid]
            cosine_sim_vals.append(tfidf_values[i].dot(tfidf_values[i+1].T)[0,0])
    
    vals = np.array(cosine_sim_vals)
    print('{proj} ({ver}) & {total} & {min:.3} & {max:.3} & {median:.3} & {mean:.3} & {variance:.3} & {devstd:.3} \\\\'.format(
                                                                                 proj = project.name.title(), 
                                                                                 ver=project.version,
                                                                                 total=vals.size, 
                                                                                 min=vals.min(), 
                                                                                 max=vals.max(), 
                                                                                 median=median(vals), 
                                                                                 mean=vals.mean(), 
                                                                                 variance=var(vals), 
                                                                                 devstd=std(vals)))

Coffeemaker (1.0) & 2 & 0.628 & 0.703 & 0.665 & 0.665 & 0.0014 & 0.0375 \\
Jhotdraw (7.4.1) & 504 & 0.0 & 0.927 & 0.396 & 0.397 & 0.0612 & 0.247 \\
Jfreechart (0.6.0) & 14 & 0.0 & 0.582 & 0.266 & 0.251 & 0.0553 & 0.235 \\
Jfreechart (0.7.1) & 11 & 0.0 & 0.258 & 0.0 & 0.0792 & 0.0112 & 0.106 \\
