## Imports

In [None]:
import pandas as pd
import os
import glob

## Configuration

*input_dir:* The path to the directory containg annoted seed words. Please make sure to use a '/' (slash) in the end. For example: `path/to/annotated/seed_words/`.

*output_dir:* The path to the directory where you want to save selected seed words. Please make sure to use a '/' (slash) in the end. For example: `path/to/output/`.

In [None]:
input_dir = "results/annotated/"
output_dir = "results/selected/"

## Directory Setup (Optional)
Creates directories according to the configuration if not already created manually.

In [None]:
if not os.path.exists(input_dir):
    os.makedirs(input_dir)
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

## Seed Word Selection

### Load annoteted seed words

In [None]:
annotation_file_names = glob.glob("{}*.csv".format(input_dir))
print("found {} annotations".format(len(annotation_file_names)))
annotations = []
for annotation_file_name in annotation_file_names:
    annotations.append(pd.read_csv(annotation_file_name, index_col="word"))
print("loaded {} annotations".format(len(annotations)))

### Select seed words
This is based on a majority vote.

In [None]:
annotations_df = pd.concat(annotations, axis=1).fillna("neutral")
pos_words = []
neg_words = []
neu_words = []
for w, row in annotations_df.mode(axis=1).iterrows():
    row = row.dropna()
    if len(row) > 1:
        continue
    if row[0] == "positive":
        pos_words.append(w)
    elif row[0] == "negative":
        neg_words.append(w)
    elif row[0] == "neutral":
        neu_words.append(w)
print("number of positive:", len(pos_words))
print("number of negative:", len(neg_words))
print("number of neutral:", len(neu_words))

### Save selected seed words

In [None]:
with open("{}positive.txt".format(output_dir), mode="wt", encoding="utf-8") as pos_file:
    pos_file.write("\n".join(pos_words))
with open("{}negative.txt".format(output_dir), mode="wt", encoding="utf-8") as neg_file:
    neg_file.write("\n".join(neg_words))
with open("{}neutral.txt".format(output_dir), mode="wt", encoding="utf-8") as neu_file:
    neu_file.write("\n".join(neu_words))