update 0.1.0/support windows

taishi-i · Sep 2, 2018 · 41149ba · 41149ba
1 parent 29c8b20
commit 41149ba
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 38 deletions.
diff --git a/README.md b/README.md
@@ -1,16 +1,16 @@
-<p align="center"><img width="50%" src="/nagisa/data/nagisa_image.jpg"/></p>
+![Alt text](/nagisa/data/nagisa_image.jpg 'nagisa')
 
---------------------------------------------------------------------------------
+---
 
 [![Build Status](https://travis-ci.org/taishi-i/nagisa.svg?branch=master)](https://travis-ci.org/taishi-i/nagisa)
 [![Documentation Status](https://readthedocs.org/projects/nagisa/badge/?version=latest)](https://nagisa.readthedocs.io/en/latest/?badge=latest)
 [![PyPI](https://img.shields.io/pypi/v/nagisa.svg)](https://pypi.python.org/pypi/nagisa)
 
-Nagisa is a python module for Japanese word segmentation/POS-tagging.  
-It is designed to be a simple and easy-to-use tool.  
+Nagisa is a python module for Japanese word segmentation/POS-tagging.
+It is designed to be a simple and easy-to-use tool.
 
 This tool has the following features.
-- Based on recurrent neural networks. 
+- Based on recurrent neural networks.
 - The word segmentation model uses character- and word-level features [[池田+]](http://www.anlp.jp/proceedings/annual_meeting/2017/pdf_dir/B6-2.pdf).
 - The POS-tagging model uses tag dictionary information [[Inoue+]](http://www.aclweb.org/anthology/K17-1042).
 
@@ -21,13 +21,16 @@ For more details refer to the following links.
 Installation
 =============
 
-Python 2.7.x or 3.5+ is required.  
-This tool uses [DyNet](https://github.com/clab/dynet) (the Dynamic Neural Network Toolkit) to calcucate neural networks.  
+Python 2.7.x or 3.5+ is required.
+This tool uses [DyNet](https://github.com/clab/dynet) (the Dynamic Neural Network Toolkit) to calcucate neural networks.
 You can install nagisa by using the following command.
 ```bash
 pip install nagisa
 ```
 
+If you use nagisa on Windows, please run it with python 3.5+.
+
+
 Usage
 ======
 Basic usage.
@@ -37,53 +40,53 @@ import nagisa
 # Sample of word segmentation and POS-tagging for Japanese
 text = 'Pythonで簡単に使えるツールです'
 words = nagisa.tagging(text)
-print(words) 
+print(words)
 #=> Python/名詞 で/助詞 簡単/形状詞 に/助動詞 使える/動詞 ツール/名詞 です/助動詞
 
 # Get a list of words
-print(words.words) 
+print(words.words)
 #=> ['Python', 'で', '簡単', 'に', '使える', 'ツール', 'です']
 
 # Get a list of POS-tags
-print(words.postags) 
+print(words.postags)
 #=> ['名詞', '助詞', '形状詞', '助動詞', '動詞', '名詞', '助動詞']
 
 # The nagisa.wakati method is faster than the nagisa.tagging method.
 words = nagisa.wakati(text)
-print(words) 
+print(words)
 #=> ['Python', 'で', '簡単', 'に', '使える', 'ツール', 'です']
 ```
 
 
 Post processing functions.
 ```python
-# Extarcting all nouns from a text 
-words = nagisa.extract(text, extract_postags=['名詞']) 
-print(words)   
-#=> Python/名詞 ツール/名詞    
-   
-# Filtering specific POS-tags from a text  
-words = nagisa.filter(text, filter_postags=['助詞', '助動詞']) 
-print(words)   
-#=> Python/名詞 簡単/形状詞 使える/動詞 ツール/名詞    
-   
-# A list of available POS-tags 
-print(nagisa.tagger.postags)   
+# Extarcting all nouns from a text
+words = nagisa.extract(text, extract_postags=['名詞'])
+print(words)
+#=> Python/名詞 ツール/名詞
+
+# Filtering specific POS-tags from a text
+words = nagisa.filter(text, filter_postags=['助詞', '助動詞'])
+print(words)
+#=> Python/名詞 簡単/形状詞 使える/動詞 ツール/名詞
+
+# A list of available POS-tags
+print(nagisa.tagger.postags)
 #=> ['補助記号', '名詞', ... , 'URL']
 
-# A word can be recognized as a single word forcibly.   
-text = 'ニューラルネットワークを使ってます。'  
-print(nagisa.tagging(text))    
-#=> ニューラル/名詞 ネットワーク/名詞 を/助詞 使っ/動詞 て/助動詞 ます/助動詞 。/補助記号  
+# A word can be recognized as a single word forcibly.
+text = 'ニューラルネットワークを使ってます。'
+print(nagisa.tagging(text))
+#=> ニューラル/名詞 ネットワーク/名詞 を/助詞 使っ/動詞 て/助動詞 ます/助動詞 。/補助記号
 
-# If a word is included in the single_word_list, it is recognized as a single word.    
-tagger_nn = nagisa.Tagger(single_word_list=['ニューラルネットワーク']) 
-print(tagger_nn.tagging(text))     
+# If a word is included in the single_word_list, it is recognized as a single word.
+tagger_nn = nagisa.Tagger(single_word_list=['ニューラルネットワーク'])
+print(tagger_nn.tagging(text))
 #=> ニューラルネットワーク/名詞 を/助詞 使っ/動詞 て/助動詞 ます/助動詞 。/補助記号
 
 # Nagisa is good at capturing the URLs and kaomoji from an input text.
-url = 'https://github.com/taishi-i/nagisaでコードを公開中(๑¯ω¯๑)'   
+url = 'https://github.com/taishi-i/nagisaでコードを公開中(๑¯ω¯๑)'
 words = nagisa.tagging(url)
-print(words)   
+print(words)
 #=> https://github.com/taishi-i/nagisa/URL で/助詞 コード/名詞 を/助詞 公開/名詞 中/接尾辞 (๑　̄ω　̄๑)/補助記号
 ```
diff --git a/nagisa/__init__.py b/nagisa/__init__.py
@@ -1,7 +1,7 @@
 import utils
 from nagisa.tagger import Tagger
 
-version = '0.0.8'
+version = '0.1.0'
 # Initialize instance
 tagger  = Tagger()
 # Functions

diff --git a/setup.py b/setup.py
@@ -1,5 +1,6 @@
 # -*- coding:utf-8 -*-
 
+import io
 import os
 import sys
 
@@ -11,8 +12,14 @@
     from pypandoc import convert
     long_description = convert('README.md', 'rst')
 except ImportError:
-    with open('README.md') as f:
-        long_description = f.read()
+    if os.name == 'nt':
+        if sys.version_info.major == 2:
+            f = io.open(readme, 'r', encoding='utf_8_sig')
+        else:
+            f = open(readme, 'r', encoding='utf_8_sig')
+    else:
+        f = open(readme, 'r')
+    long_description = f.read()
 
 
 classifiers = [
@@ -22,6 +29,7 @@
     'Programming Language :: Python :: 3.5',
     'Programming Language :: Python :: 3.6',
     'Operating System :: Unix',
+    'Operating System :: Microsoft :: Windows',
     'Topic :: Text Processing :: Linguistic',
     'Topic :: Software Development :: Libraries :: Python Modules'
 ]
@@ -49,7 +57,7 @@ def __len__(self):
 def extensions():
     from Cython.Build import cythonize
     import numpy
-    extensions = [Extension('utils', 
+    extensions = [Extension('utils',
                   ['nagisa/utils.pyx'],
                   include_dirs = [numpy.get_include()])]
     return cythonize(extensions)
@@ -59,11 +67,11 @@ def extensions():
     packages=['nagisa'],
     author = 'Taishi Ikeda',
     author_email = 'taishi.ikeda.0323@gmail.com',
-    version = '0.0.9',
+    version = '0.1.0',
     description = 'A Japanese tokenizer based on recurrent neural networks',
     long_description = long_description,
     url = 'https://github.com/taishi-i/nagisa',
-    download_url = 'https://github.com/taishi-i/nagisa/archive/0.0.9.tar.gz',
+    download_url = 'https://github.com/taishi-i/nagisa/archive/0.1.0.tar.gz',
     license = 'MIT License',
     platforms = 'Unix',
     setup_requires=['six', 'cython', 'numpy',],