Skip to content

Commit

Permalink
Unify description of NSMC data (#27, #59)
Browse files Browse the repository at this point in the history
  • Loading branch information
lovit committed Sep 8, 2020
1 parent b4ffecd commit dd4574b
Showing 1 changed file with 16 additions and 14 deletions.
30 changes: 16 additions & 14 deletions Korpora/korpora_nsmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@
},
]

description = """ Author : KakaoBrain
Repository : https://github.com/e9t/nsmc
References : www.lucypark.kr/docs/2015-pyconkr/#39
Naver sentiment movie corpus v1.0
This is a movie review dataset in the Korean language.
Reviews were scraped from Naver Movies.
The dataset construction is based on the method noted in
[Large movie review dataset][^1] from Maas et al., 2011.
[^1]: http://ai.stanford.edu/~amaas/data/sentiment/"""

license = """ CC0 1.0 Universal (CC0 1.0) Public Domain Dedication
Details in https://creativecommons.org/publicdomain/zero/1.0/"""


class NSMCData(KorpusData):
labels: List[str]
Expand All @@ -35,20 +51,6 @@ def __getitem__(self, index):

class NSMC(Korpus):
def __init__(self, root_dir=None, force_download=False):
description = """ Reference: https://github.com/e9t/nsmc
Naver sentiment movie corpus v1.0
This is a movie review dataset in the Korean language.
Reviews were scraped from Naver Movies.
The dataset construction is based on the method noted in
[Large movie review dataset][^1] from Maas et al., 2011.
[^1]: http://ai.stanford.edu/~amaas/data/sentiment/"""

license = """ CC0 1.0 Universal (CC0 1.0) Public Domain Dedication
Details in https://creativecommons.org/publicdomain/zero/1.0/"""

super().__init__(description, license)

if root_dir is None:
Expand Down

0 comments on commit dd4574b

Please sign in to comment.