# AMT Label for Sentences of Movie Reviews

## Data Results

The results of all batches of labeling were saved in a CSV files including a document and sentences id (from docid split) 

## Objective

We want to determine how resolving conflicts will affect the label distribution of the results 

## Methods

### 1. Default to Neutral

This method solves conflicts by default to neutral label, whether there is a neutral answer or conflict the label becomes neutral. 

### 2. Default to Neutral with Coin Tie-breaker

This method return neutral if at least one answer is neutral, if the answers are not neutral it flips a coin to answer. 

### 3. Default to Label with Coint Tie-breaker

This method returns a label if at least one non-neutral answer is available, otherwise flip a coin to answer.


In [1]:
## Imports 
%matplotlib inline

STRUCTURED = '/Users/maru/MyCode/structured'
DATA= 'C:/Users/mramire8/Dropbox/My Papers/Structured Reading/Code/Data/sample3_v3_merge/'
DATA= '/Users/maru/Dropbox/My Papers/Structured Reading/Code/Data/sample3_v3_merge/'


import sys
import os

import numpy as np
import nltk
from sklearn import metrics
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.style.use('bmh')

In [2]:
def load_data_results(filename):
    import csv 
    from collections import defaultdict
    results = defaultdict(lambda : [])
    header = []
    with open(filename, 'rb') as csvfile:
        sents = csv.DictReader(csvfile, delimiter=',', quotechar='"')
        for row in sents:
            
            for k,v in row.items():
                results[k].append(v)
            
    return results

amt = load_data_results(DATA + "amt.results.csv")


In [3]:
print "\n".join(sorted(amt.keys()))

Agreement
Answer
Answer1
Answer2
DOCID
Date
HITID
ID
SENTID
TARGET
TEXT
Worker1
Worker2


In [4]:
def label_distribution(targets, label=None ):
    ''' Calculate the counts of label in targets'''
    from collections import Counter
    c = Counter(targets)
    return c

def answer_to_label(ans):
    if isinstance(ans, int):
        return ans 
    if 'Negative' in ans:
        return 0
    elif 'Positive'in ans:
        return 1
    elif 'Neutral'in ans:
        return 2
    else: 
        return 3

def to_label(targets):
    return [answer_to_label(t) for t in targets]
    
def print_dist(dist):
    return "\n".join(["%s: %s - %.3f" % (k,v, 1.*v/sum(dist.values())) for k,v in dist.items()])


def to_answers(data, conflict_solver, rnd):
    return np.array([solve_conflict(a,b,conflict_solver, rnd) for a,b in zip(data['Answer1'], data['Answer2'])])

def solve_conflict(a1, a2, conflict_fn, rnd):
    if a1 != a2:
        a11 = answer_to_label(a1)
        a22 = answer_to_label(a2)
        return conflict_fn(a11, a22, rnd)
    else:
        return answer_to_label(a1)

def solver_allneutral(a1, a2, rnd):
    if a1 != a2:
        return answer_to_label('Neutral')
    else: 
        return a1

def solver_neutral(a1, a2, rnd):
    if (a1 + a2) < 2: 
        #flip a coin
        return rnd.randint(2)
    else: # if  there is a neutral in the answers
        return 2 # return neutral

def solver_label(a1, a2, rnd):
    if (a1 + a2) < 2: 
        #flip a coin
        return rnd.randint(2)
    else: # if there is a neutral in the answer
        return min(a1, a2) # return label


In [5]:
def accuracy(true, pred):
    pred
    nonneutral= pred < 2
    return metrics.accuracy_score(true[nonneutral], pred[nonneutral])


## Label Distribution 

We tested the conflict resolution methods and computed the label distribution. Base is the original distribution before resolving conflicts. 

In [7]:
# print amt['Answer1']
base =  to_label(amt['Answer'])
print "\n === True Distribution ==="
c = label_distribution(amt['TARGET'])
print print_dist(c)
print "\n=== Base distribution ==="
true = to_label(amt['Answer'])
print print_dist(label_distribution(true))
rnd = np.random.RandomState(123)
allneu = to_answers(amt, solver_allneutral, rnd)

true = np.array(to_label(int(a) for a in amt['TARGET']))

print "\n== All neutral =="
print print_dist(label_distribution(allneu))
print "Accuracy:", accuracy(true, allneu)

rnd = np.random.RandomState(555)
neu = to_answers(amt, solver_neutral, rnd)
print "\n== Neutral-Coin neutral =="
print print_dist(label_distribution(neu))
print "Accuracy:", accuracy(true, neu)

rnd = np.random.RandomState(123)
lbls = to_answers(amt, solver_label, rnd)
print "\n== Label-Coin neutral =="
print print_dist(label_distribution(lbls))
print "Accuracy:", accuracy(true, lbls)



 === True Distribution ===
1: 1460 - 0.512
0: 1392 - 0.488

=== Base distribution ===
0: 682 - 0.239
1: 702 - 0.246
2: 719 - 0.252
3: 749 - 0.263

== All neutral ==
0: 682 - 0.239
1: 702 - 0.246
2: 1468 - 0.515
Accuracy: 0.865606936416

== Neutral-Coin neutral ==
0: 736 - 0.258
1: 753 - 0.264
2: 1363 - 0.478
Accuracy: 0.840161182001

== Label-Coin neutral ==
0: 1076 - 0.377
1: 1057 - 0.371
2: 719 - 0.252
Accuracy: 0.791842475387


## Confusion Matrix

Confusion matrix of the labels after resolving conflicts for each methods 

In [8]:
# Confusion Matrix of each method
# print amt['Answer1']
def print_cm(cm):
    labels =['neg', 'pos', 'neu']

    row_format ="{:>9}" * (len(labels) + 1)
    print row_format.format("", *labels)
    for lbl, row in zip(labels, cm):
        print row_format.format(lbl, *["{:.2f}".format(r) for r in row])
    #     return "\n".join(["{0:.2f}\t{1:.2f}\t{2:.2f}".format(*r) for r in cm])

print "\n=== CONFUSION MATRIX ==="
print "Predicted -->\n"
print "\n{0:^45}".format("== All neutral ==")
cm = metrics.confusion_matrix(true, allneu, labels=[0,1,2])
print_cm(cm)
print 
print_cm(1. * cm / cm.sum())

print "\n{0:^45}".format("== Neutral-Coin neutral ==")
cm = metrics.confusion_matrix(true, neu, labels=[0,1,2])
print_cm(cm)
print 
print_cm(1. * cm / cm.sum())

cm = metrics.confusion_matrix(true, lbls, labels=[0,1,2])
print "\n{0:^45}".format("== Label-Coin neutral ==")
print_cm(cm)
print 
print_cm(1. * cm / cm.sum())



=== CONFUSION MATRIX ===
Predicted -->


              == All neutral ==              
               neg      pos      neu
      neg   607.00   111.00   674.00
      pos    75.00   591.00   794.00
      neu     0.00     0.00     0.00

               neg      pos      neu
      neg     0.21     0.04     0.24
      pos     0.03     0.21     0.28
      neu     0.00     0.00     0.00

         == Neutral-Coin neutral ==          
               neg      pos      neu
      neg   642.00   144.00   606.00
      pos    94.00   609.00   757.00
      neu     0.00     0.00     0.00

               neg      pos      neu
      neg     0.23     0.05     0.21
      pos     0.03     0.21     0.27
      neu     0.00     0.00     0.00

          == Label-Coin neutral ==           
               neg      pos      neu
      neg   870.00   238.00   284.00
      pos   206.00   819.00   435.00
      neu     0.00     0.00     0.00

               neg      pos      neu
      neg     0.31     0.08     0.10
 

In [24]:
from collections import defaultdict

ordered = defaultdict(lambda: {})

order = np.argsort(amt['ID'])
print np.array(amt['ID'])[order]

for i in order:
    ordered[amt['DOCID'][i]][amt['SENTID'][i][1:]] = amt['TEXT'][i]


['D0S0' 'D0S1' 'D0S2' ..., 'D9S4' 'D9S5' 'D9S6']


In [30]:
sorted(int(k) for k in ordered['D1'].keys())

['0', '1', '10', '11', '12', '13', '2', '3', '4', '5', '6', '7', '8', '9']

In [53]:
sorted(ordered['D3'], key=lambda x: int(x))

['0', '1', '2', '3', '4', '5', '6', '7']

In [63]:
[ordered['D1'][txt] for txt in sorted(ordered['D1'], key=lambda x: int(x))]

["Usually musicals in the 1940's were of a set formula - and if you studied films you know what I'm talking about - a certain running lenghth, very 'showy' performances that were great on the surface but never got into the real personalities of the characters etc.",
 "THIS ONE IS DIFFERENT - and light years better and well worth it's nomination for best picture of the year - 1945 (although had no chance of beating the eventual winner - Lost Weekend).",
 "Gene Kelly was probably in the best form of his career - yes I know about 'American in Paris' and 'Singing in the Rain'.",
 'This one is different.',
 "He really gets into his character of a 'sea wolf' thinking (at first) that 'picking up any girl while on leave' is nothing more than a lark.",
 "And if you had to make up a 'story' to get her - so be it - until.",
 "Sort of like the Music Man when he gets 'his foot caught in the door'.",
 "The eventual hilarity of the film stems mostly from his and his new pal (Sinatra)'s attempt to mak

In [69]:
len(np.unique(amt['DOCID']))

250