In [1126]:
from IPython.display import display, Markdown, display_html
import pandas as pd
import sys

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)
import settings
import utils

In [1063]:
def display_results_summary():
    topics = utils.get_topics()
    for topic in topics:
        display(Markdown("# {} ".format(utils.get_readable_topic(topic))))
        phase1, phase2 = utils.get_all_stats(topic, stat_methods)
        display(Markdown("## Phase 1 "))
        display(phase1)
        utils.plot_phase1_stats(topic)
        display(Markdown("## Phase 2 "))
        display(phase2)
        display(Markdown('---'))        

def display_features():
    topics = utils.get_topics()
    for topic in topics:
        display(Markdown("# {} ".format(utils.get_readable_topic(topic))))
        topic_feats = []
        for method in feat_methods:
            display(Markdown("### {}".format(method)))
            display(utils.summarise_feats(topic, method))
        display(Markdown('---'))    
        
def phase1_to_latex():
    topics = utils.get_topics()
    for topic in topics:
        display(Markdown("# {} ".format(utils.get_readable_topic(topic))))
        phase1, phase2 = utils.get_all_stats(topic, settings.STAT_METHODS)
        display(Markdown("## Phase 1 "))
        display(phase1)
        utils.plot_phase1_stats(topic)
        display(Markdown("## Phase 2 "))
        display(phase2)
        display(Markdown('---'))        

In [1163]:
stat_summary_pd = pd.DataFrame()
for method in settings.STAT_METHODS:
    method_stats_pd = pd.DataFrame()
    for topic in settings.TOPICS:
        method_stats = utils.load_results_stats(topic, method)[0].iloc[:, -3:]
        method_stats_pd = method_stats_pd.append(method_stats)
    method_results_summary = method_stats_pd.apply(utils.get_confidence_interval, axis=0).rename(method)
    stat_summary_pd = stat_summary_pd.append(method_results_summary)

In [1167]:
stat_summary_pd.iloc[[0, 1, 2, 4, 6], [2, 1, 0]].to_latex()

u'\\begin{tabular}{llll}\n\\toprule\n{} &           Recall &        Precision &         F1 Score \\\\\n\\midrule\nfirehose     &            1.000 &  0.004 +\\textbackslash- 0.001 &  0.007 +\\textbackslash- 0.002 \\\\\ntopk         &  0.272 +\\textbackslash- 0.023 &  0.084 +\\textbackslash- 0.011 &  0.122 +\\textbackslash- 0.014 \\\\\ngurobi\\_cilp  &  0.865 +\\textbackslash- 0.021 &  0.036 +\\textbackslash- 0.009 &  0.068 +\\textbackslash- 0.016 \\\\\ngurobi\\_wilp  &  0.739 +\\textbackslash- 0.028 &  0.048 +\\textbackslash- 0.010 &  0.088 +\\textbackslash- 0.018 \\\\\ngurobi\\_cailp &  0.642 +\\textbackslash- 0.038 &  0.480 +\\textbackslash- 0.061 &  0.524 +\\textbackslash- 0.047 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [1186]:
print(stat_summary_pd.iloc[[6], [2, 1, 0]].to_latex(header=False, index=False).replace('+\\textbackslash-', '$\pm$'))

\begin{tabular}{lll}
\toprule
 0.642 $\pm$ 0.038 &  0.480 $\pm$ 0.061 &  0.524 $\pm$ 0.047 \\
\bottomrule
\end{tabular}



In [1185]:
stat_summary_pd.iloc[[6], [2, 1, 0]]

Unnamed: 0,Recall,Precision,F1 Score
gurobi_cailp,0.642 +\- 0.038,0.480 +\- 0.061,0.524 +\- 0.047


# Selected Features

Display commong features selected across the k folds:

In [1205]:
display_features()

# Natural Disaster 

### topk

Unnamed: 0,Feature,Type
0,storm,term
1,hurricane,term
2,earthquake,term
3,philippines,term
4,magnitude,term
5,typhoon,term
6,relief,term
7,yolandaph,hashtag
8,philippines,hashtag
9,california,term


### gurobi_cilp

Unnamed: 0,Feature,Type
0,and,term
1,at,term
2,california,term
3,empty_tweet,term
4,for,term
5,from,term
6,in,term
7,is,term
8,my,term
9,of,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,of,term
3,to,term
4,in,term
5,empty_tweet,term
6,for,term
7,is,term
8,and,term
9,from,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,rt,term


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term


### gurobi_cailp

Unnamed: 0,Feature,Type
0,earthquake,term
1,magnitude,term
2,philippines,term
3,storm,term
4,typhoon,term


### greedy_cailp

Unnamed: 0,Feature,Type
0,philippines,term
1,storm,term
2,typhoon,term
3,california,term
4,magnitude,term
5,relief,term
6,earthquake,term
7,hurricane,term
8,philippines,hashtag
9,gabriele_corno,mention


---

# Social Issues 

### topk

Unnamed: 0,Feature,Type
0,police,term
1,deray,mention
2,protesters,term
3,natedrug,mention
4,cops,term
5,protest,term
6,antoniofrench,mention
7,prolife,hashtag
8,debt,term
9,justice,term


### gurobi_cilp

Unnamed: 0,Feature,Type
0,amp,term
1,and,term
2,at,term
3,for,term
4,in,term
5,is,term
6,it,term
7,not,term
8,of,term
9,on,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,in,term
4,is,term
5,of,term
6,for,term
7,and,term
8,on,term
9,you,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,police,term
1,rt,term
2,deray,mention


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,police,term
2,deray,mention


### gurobi_cailp

Unnamed: 0,Feature,Type
0,black,term
1,cop,term
2,cops,term
3,justice,term
4,police,term
5,protest,term
6,protesters,term
7,deray,mention


### greedy_cailp

Unnamed: 0,Feature,Type
0,police,term
1,black,term
2,protesters,term
3,cops,term
4,protest,term
5,justice,term
6,deray,mention
7,cop,term
8,natedrug,mention
9,brown,term


---

# Space 

### topk

Unnamed: 0,Feature,Type
0,nasa,mention
1,tokiohotel,mention
2,moon,term
3,science,term
4,loc_houston_tx,location
5,30secondstomars,mention
6,loc_houston,location
7,solar,term
8,space,term
9,philae2014,mention


### gurobi_cilp

Unnamed: 0,Feature,Type
0,and,term
1,at,term
2,by,term
3,empty_tweet,term
4,for,term
5,in,term
6,is,term
7,of,term
8,on,term
9,rt,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,in,term
4,for,term
5,of,term
6,and,term
7,empty_tweet,term
8,on,term
9,is,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,nasa,mention


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,nasa,mention


### gurobi_cailp

Unnamed: 0,Feature,Type
0,moon,term
1,solar,term
2,space,term
3,texas,hashtag
4,loc_houston_tx,location
5,nasa,mention


### greedy_cailp

Unnamed: 0,Feature,Type
0,nasa,mention
1,space,term
2,star,term
3,moon,term
4,loc_houston_tx,location
5,summer,hashtag
6,science,term
7,solar,term
8,earth,term
9,beach,hashtag


---

# Soccer 

### topk

Unnamed: 0,Feature,Type
0,lfc,mention
1,ynwa,hashtag
2,brazil,hashtag
3,soccer,term
4,mcfc,mention
5,liverpool,term
6,ussoccer,mention
7,supporting,term
8,brazil,term
9,easportsfifa,mention


### gurobi_cilp

Unnamed: 0,Feature,Type
0,and,term
1,at,term
2,by,term
3,empty_tweet,term
4,for,term
5,from,term
6,in,term
7,is,term
8,it,term
9,new,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,for,term
4,in,term
5,and,term
6,on,term
7,you,term
8,is,term
9,of,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,fifaworldcup,mention
4,lfc,mention


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,lfc,mention
3,fifaworldcup,mention
4,to,term


### gurobi_cailp

Unnamed: 0,Feature,Type
0,cup,term
1,liverpool,term
2,team,term
3,lfc,mention


### greedy_cailp

Unnamed: 0,Feature,Type
0,cup,term
1,lfc,mention
2,liverpool,term
3,goal,term
4,match,term
5,football,term
6,team,term
7,league,term
8,vs,term
9,afc,hashtag


---

# Human Disasters 

### topk

Unnamed: 0,Feature,Type
0,iss,hashtag
1,camilacabello97,mention
2,israel,term
3,gaza,term
4,underattack,term
5,israeli,term
6,iraq,hashtag
7,palestine,term
8,palestinian,term
9,war,term


### gurobi_cilp

Unnamed: 0,Feature,Type
0,amp,term
1,and,term
2,by,term
3,empty_tweet,term
4,for,term
5,from,term
6,in,term
7,is,term
8,not,term
9,now,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,in,term
3,to,term
4,of,term
5,for,term
6,on,term
7,is,term
8,empty_tweet,term
9,and,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,gaza,term
1,in,term
2,israel,term
3,israeli,term
4,rt,term
5,iraq,hashtag
6,iss,hashtag


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,in,term
2,iraq,hashtag
3,iss,hashtag
4,gaza,term
5,israel,term
6,israeli,term


### gurobi_cailp

Unnamed: 0,Feature,Type
0,against,term
1,children,term
2,gaza,term
3,israel,term
4,israeli,term
5,killed,term
6,iraq,hashtag
7,iss,hashtag


### greedy_cailp

Unnamed: 0,Feature,Type
0,israel,term
1,israeli,term
2,iraq,hashtag
3,gaza,term
4,children,term
5,killed,term
6,iss,hashtag
7,war,term
8,flight,term
9,against,term


---

# Tennis 

### topk

Unnamed: 0,Feature,Type
0,tennis,term
1,wimbledon,mention
2,murray,term
3,usopen,mention
4,murray,hashtag
5,wimbledon,term
6,djokovic,term
7,djokernole,mention
8,nadal,term
9,andy_murray,mention


### gurobi_cilp

Unnamed: 0,Feature,Type
0,and,term
1,at,term
2,for,term
3,go,term
4,in,term
5,is,term
6,it,term
7,murray,term
8,of,term
9,on,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,is,term
4,tennis,term
5,on,term
6,in,term
7,for,term
8,and,term
9,murray,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,federer,term
1,rt,term
2,tennis,term
3,wimbledon,mention


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,federer,term
2,wimbledon,mention
3,tennis,term


### gurobi_cailp

Unnamed: 0,Feature,Type
0,djokovic,term
1,final,term
2,match,term
3,murray,term
4,serena,term
5,tennis,term
6,murray,hashtag
7,usopen,mention
8,wimbledon,mention


### greedy_cailp

Unnamed: 0,Feature,Type
0,tennis,term
1,murray,term
2,wimbledon,mention
3,final,term
4,match,term
5,usopen,mention
6,set,term
7,nadal,term
8,djokovic,term
9,atp,hashtag


---

# Health 

### topk

Unnamed: 0,Feature,Type
0,fitness,hashtag
1,foxtramedia,mention
2,libcrib,hashtag
3,p2,hashtag
4,wellness,hashtag
5,ebola,term
6,stoprush,hashtag
7,tntweeters,hashtag
8,anxiety,hashtag
9,virus,term


### gurobi_cilp

Unnamed: 0,Feature,Type
0,amp,term
1,and,term
2,at,term
3,by,term
4,for,term
5,from,term
6,in,term
7,is,term
8,of,term
9,on,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,in,term
4,for,term
5,of,term
6,and,term
7,job,hashtag
8,on,term
9,with,term


### gurobi_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,fitness,hashtag
3,libcrib,hashtag
4,p2,hashtag


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,p2,hashtag
2,fitness,hashtag
3,the,term
4,libcrib,hashtag


### gurobi_cailp

Unnamed: 0,Feature,Type
0,health,term
1,fitness,hashtag
2,libcrib,hashtag
3,p2,hashtag
4,tcot,hashtag
5,foxtramedia,mention


### greedy_cailp

Unnamed: 0,Feature,Type
0,fitness,hashtag
1,health,term
2,p2,hashtag
3,tcot,hashtag
4,ebola,term
5,news,hashtag
6,wellness,hashtag
7,foxtramedia,mention
8,tntweeters,hashtag
9,libcrib,hashtag


---

# LGBT 

### topk

Unnamed: 0,Feature,Type
0,tlot,hashtag
1,gop,hashtag
2,libcrib,hashtag
3,lnyhbt,hashtag
4,obama,term
5,jjauthor,mention
6,lgbt,hashtag
7,obamacare,hashtag
8,rednationrising,hashtag
9,nra,hashtag


### gurobi_cilp

Unnamed: 0,Feature,Type
0,amp,term
1,and,term
2,at,term
3,by,term
4,empty_tweet,term
5,for,term
6,in,term
7,is,term
8,of,term
9,on,term


### greedy_cilp

Unnamed: 0,Feature,Type
0,rt,term
1,the,term
2,to,term
3,in,term
4,for,term
5,of,term
6,on,term
7,is,term
8,and,term
9,tlot,hashtag


### gurobi_wilp

Unnamed: 0,Feature,Type
0,obama,term
1,rt,term
2,the,term
3,lnyhbt,hashtag
4,nj2as,hashtag
5,rednationrising,hashtag
6,tlot,hashtag


### greedy_wilp

Unnamed: 0,Feature,Type
0,rt,term
1,tlot,hashtag
2,nj2as,hashtag
3,obama,term
4,the,term
5,lnyhbt,hashtag
6,rednationrising,hashtag


### gurobi_cailp

Unnamed: 0,Feature,Type
0,gop,term
1,obama,term
2,gop,hashtag
3,nj2as,hashtag
4,obamacare,hashtag
5,rednationrising,hashtag
6,tlot,hashtag
7,jjauthor,mention


### greedy_cailp

Unnamed: 0,Feature,Type
0,obama,term
1,tlot,hashtag
2,nj2as,hashtag
3,gop,hashtag
4,obamacare,hashtag
5,nra,hashtag
6,lnyhbt,hashtag
7,tntweeters,hashtag
8,jjauthor,mention
9,libcrib,hashtag


---

# Latex

In [1206]:
_, phase2 = utils.get_all_stats('Social_issue', settings.STAT_METHODS)
ph_to_lat = phase2.iloc[[0, 1, 3, 5, 7],:]
ph_to_lat.index = ['Firehose', 'TopK', 'Greedy CILP','Greedy WILP', 'Greedy CAILP']
print(ph_to_lat.to_latex(header=False, index=True).replace('+\\textbackslash-', '$\pm$'))

\begin{tabular}{lll}
\toprule
Firehose     &  0.678 $\pm$ 0.003 &  0.730 $\pm$ 0.035 \\
TopK         &  0.574 $\pm$ 0.005 &  0.741 $\pm$ 0.029 \\
Greedy CILP  &  0.684 $\pm$ 0.003 &  0.733 $\pm$ 0.035 \\
Greedy WILP  &  1.386 $\pm$ 0.004 &  0.523 $\pm$ 0.018 \\
Greedy CAILP &  0.624 $\pm$ 0.005 &  0.714 $\pm$ 0.026 \\
\bottomrule
\end{tabular}



In [1201]:
settings.TOPICS

['Natr_Disaster',
 'Social_issue',
 'Space',
 'Soccer',
 'Human_Disaster',
 'Tennis',
 'Health',
 'LGBT']

In [1116]:
ph_to_lat

Unnamed: 0,Test AveP,P@100
Firehose,0.647 +\- 0.002,0.592 +\- 0.010
TopK,0.504 +\- 0.010,0.630 +\- 0.065
Greedy CILP,0.651 +\- 0.002,0.594 +\- 0.011
Greedy WILP,0.654 +\- 0.001,0.596 +\- 0.007
Greedy CAILP,0.544 +\- 0.015,0.592 +\- 0.027


In [1207]:
feats = utils.load_results_features('Social_issue', 'greedy_wilp')
df = pd.DataFrame()
for feat in feats:
    df = df.append(feat)

In [1208]:
df.Feature.value_counts()

police                  6
deray                   5
rt                      5
natedrug                3
protesters              3
cops                    3
wilson                  3
nypd                    2
racial                  2
protestors              2
darren                  2
gas                     2
murdered                2
peaceful                2
jury                    2
looting                 2
the                     2
stl                     2
protesting              2
protest                 2
officials               1
democracy               1
loc_san_francisco_ca    1
square                  1
riots                   1
civil                   1
ignorance               1
handsupunited_          1
march                   1
blacks                  1
                       ..
bipartisanism           1
keegannyc               1
antoniofrench           1
mike                    1
emergency               1
browns                  1
looters                 1
missouri    

In [1204]:
df

Unnamed: 0,Feature,Type
0,rt,term
1,in,term
2,iraq,hashtag
3,children,term
4,iss,hashtag
5,gaza,term
6,israel,term
7,assad,hashtag
8,chemical,term
9,obama,hashtag
