Skip to content

Commit

Permalink
Implement Korpus.get_all_texts (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
lovit committed Sep 9, 2020
1 parent bec77cb commit ab5e00a
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 7 deletions.
6 changes: 5 additions & 1 deletion Korpora/korpora.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,11 @@ def cleaning(self, raw_documents: List[str], **kargs):
raise NotImplementedError('Implement this function')

def get_all_texts(self):
raise NotImplementedError('Implement this function')
texts = []
for name, var in sorted(self.__dict__.items()):
if isinstance(var, KorpusData):
texts += var.get_all_texts()
return texts

def save(self, root_dir):
"""save prorce` to `sentences`"""
Expand Down
3 changes: 0 additions & 3 deletions Korpora/korpora_chatbot_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,6 @@ def cleaning(self, examples):
labels = [int(label) for label in labels]
return questions, answers, labels

def get_all_texts(self):
return self.train.texts

def get_all_pairs(self):
return self.train.get_all_pairs()

Expand Down
3 changes: 0 additions & 3 deletions Korpora/korpora_korean_petitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,6 @@ def parse(json_line):
contents, categories, begins, ends, num_agrees, titles = zip(*separated_lines)
return contents, categories, begins, ends, num_agrees, titles

def get_all_texts(self):
return self.train.texts

def get_all_categories(self):
return self.train.categories

Expand Down
6 changes: 6 additions & 0 deletions Korpora/korpora_kornli.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ def cleaning(self, raw_lines: List[str]):
texts, pairs, labels = zip(*separated_lines)
return texts, pairs, labels

def get_all_texts(self):
return (self.multinli_train.get_all_texts() +
self.snli_train.get_all_texts() +
self.xnli_dev.get_all_texts() +
self.xnli_test.get_all_texts())

def get_all_pairs(self):
return (self.multinli_train.get_all_pairs() +
self.snli_train.get_all_pairs() +
Expand Down
3 changes: 3 additions & 0 deletions Korpora/korpora_korsts.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def cleaning(self, raw_lines: List[str]):
genres, filenames, years, _, labels, texts, pairs = zip(*separated_lines)
return texts, pairs, labels, genres, filenames, years

def get_all_texts(self):
return self.train.get_all_texts() + self.dev.get_all_texts() + self.test.get_all_texts()

def get_all_pairs(self):
return self.train.get_all_pairs() + self.dev.get_all_pairs() + self.test.get_all_pairs()

Expand Down
3 changes: 3 additions & 0 deletions Korpora/korpora_question_pair.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def cleaning(self, examples, is_train):
_, texts, pairs, labels, _ = zip(*examples)
return texts, pairs, labels

def get_all_texts(self):
return self.train.get_all_texts() + self.test.get_all_texts()

def get_all_pairs(self):
return self.train.get_all_pairs() + self.test.get_all_pairs()

Expand Down

0 comments on commit ab5e00a

Please sign in to comment.