diff --git a/corpus_handlers/bnc.py b/corpus_handlers/bnc.py new file mode 100644 index 0000000..df1a288 --- /dev/null +++ b/corpus_handlers/bnc.py @@ -0,0 +1,8 @@ +import nltk +from nltk import word_tokenize +from nltk import sent_tokenize +from nltk import pos_tag + +import bs4 as BeautifulSoup + +bnc_dir = 'This is a sent' \ No newline at end of file diff --git a/corpus_handlers/bnc2014_spoken.py b/corpus_handlers/bnc2014_spoken.py new file mode 100644 index 0000000..e69de29 diff --git a/corpus_handlers/brown.py b/corpus_handlers/brown.py new file mode 100644 index 0000000..e69de29 diff --git a/corpus_handlers/reuters.py b/corpus_handlers/reuters.py new file mode 100644 index 0000000..e69de29