In [30]:
import numpy as np

In [1]:
latex = """
BOW Unigrams 				& 1,617.8  & 	1,560.6 \\
BOW Bigrams 					& 20,957.4 & 	19,481.4 \\
BOW Trigrams 				& 46,318.2 & 	43,600.4 \\

Positional Unigrams 			&  8,305.0 & 	7,878.8 \\
Positional Bigrams			& 23,740.4 &  	23,424.8 \\
Positional Trigrams 			& 18,619.8 & 	20,119.2 \\

Positional Stemmed Unigrams 	& 6,486.6 	& 	6,363.4 \\
Positional Stemmed Bigrams 	& 23,263.6 	& 	23,344.4 \\
Positional Stemmed Trigrams	& 18,941.6 	& 	20,337.0 \\

BOW POS 					    & 40.6 		& 	41.6 \\
Positional POS				& 321.8 		& 	323.0 \\

Dependency Parsed Relations	& 4,848.0 	& 	4,711.2 \\
Brown Cluster Labels 		& 512.8 		& 	493.2 \\
"""

In [51]:
cb,sc = [],[]
cb_key2count,sc_key2count = {},{}

keys = []
for line in latex.split("\n"):
    if line:
        ix = line.find("&")
        key = line[:ix].strip()
        keys.append(key)
        
        numbers = [float(n.strip().replace(",","")) for n in ((line[ix+1:].strip())[:-1].split("&"))]
        cb_nos, sc_nos = numbers
        cb.append(cb_nos)
        sc.append(sc_nos)
        
        cb_key2count[key] = cb_nos
        sc_key2count[key] = sc_nos
        
        #print(key)
        print(key, "\t", "|".join(map(str,numbers)))
        #print(numbers)

BOW Unigrams 	 1617.8|1560.6
BOW Bigrams 	 20957.4|19481.4
BOW Trigrams 	 46318.2|43600.4
Positional Unigrams 	 8305.0|7878.8
Positional Bigrams 	 23740.4|23424.8
Positional Trigrams 	 18619.8|20119.2
Positional Stemmed Unigrams 	 6486.6|6363.4
Positional Stemmed Bigrams 	 23263.6|23344.4
Positional Stemmed Trigrams 	 18941.6|20337.0
BOW POS 	 40.6|41.6
Positional POS 	 321.8|323.0
Dependency Parsed Relations 	 4848.0|4711.2
Brown Cluster Labels 	 512.8|493.2


In [35]:
keys

['BOW Unigrams',
 'BOW Bigrams',
 'BOW Trigrams',
 'Positional Unigrams',
 'Positional Bigrams',
 'Positional Trigrams',
 'Positional Stemmed Unigrams',
 'Positional Stemmed Bigrams',
 'Positional Stemmed Trigrams',
 'BOW POS',
 'Positional POS',
 'Dependency Parsed Relations',
 'Brown Cluster Labels']

## Compute Totals Feature Counts 

In [81]:
sum_cb = np.sum(cb)
sum_sc = np.sum(sc)
sum_cb, sum_sc

(173973.59999999998, 171679.00000000003)

## Compute Percentage Reduction in Feature Counts

In [82]:
def correct_keys(k):
    return k.replace("Pos.","Positional")\
            .replace("Dep.", "Dependency")

cb_selected = ["Pos. Stemmed Unigrams",
               "Pos. Stemmed Bigrams",
               "BOW Unigrams",               
               "Brown Cluster Labels",
               "Pos. Stemmed Trigrams", 
               "Dep. Parsed Relations"
              ]

cb_selected  = list(map(correct_keys, cb_selected))

sc_selected = ["Pos. Stemmed Unigrams",
               "Pos. Stemmed Bigrams",
               "Brown Cluster Labels",
               "BOW Unigrams",               
               "Dep. Parsed Relations",
               "Pos. Unigrams"
              ]

sc_selected  = list(map(correct_keys, sc_selected))

In [83]:
assert len(set(cb_selected).intersection(keys)) == len(cb_selected)

In [84]:
assert len(set(sc_selected).intersection(keys)) == len(sc_selected)

In [86]:
def total_selected(selected_keys, tally):
    total = 0
    for key in selected_keys:
        assert key in tally, "Key: {key} is missing".format(key=key)
        total += tally[key]
    return total

In [104]:
sum_cb, sum_sc

(173973.59999999998, 171679.00000000003)

In [102]:
total_sel_cb = total_selected(cb_selected, cb_key2count)
pct_cb = total_sel_cb / sum_cb

Total: 55670.40	 Percent Inc. :0.32	 Percent Red. :0.68


In [105]:
total_sel_sc = total_selected(sc_selected, sc_key2count)
pct_sc = total_sel_sc / sum_sc

In [111]:
print("CB")
print("All: {all:.1f}\t Selected: {total:.2f}\t Percent Inc. :{pct_inc:.2f}\t Percent Red. :{pct_red:.2f}"
          .format(all=sum_cb, total=total_sel_cb, pct_inc=pct_cb, pct_red= 1-pct_cb))
print("\nSC")
print("All: {all:.1f}\t Selected: {total:.2f}\t Percent Inc. :{pct_inc:.2f}\t Percent Red. :{pct_red:.2f}"
          .format(all=sum_sc, total=total_sel_sc, pct_inc=pct_sc, pct_red= 1-pct_sc))

CB
All: 173973.6	 Selected: 55670.40	 Percent Inc. :0.32	 Percent Red. :0.68

SC
All: 171679.0	 Selected: 44351.60	 Percent Inc. :0.26	 Percent Red. :0.74
