Permalink
Browse files

initial import

  • Loading branch information...
0 parents commit 737bf7ec7c2e116657284a8d5be6fb63bc89605b @kmike kmike committed Aug 19, 2012
Showing with 18,013 additions and 0 deletions.
  1. +19 −0 .hgignore
  2. 0 CHANGES.rst
  3. +18 −0 LICENSE
  4. +9 −0 MANIFEST.in
  5. +77 −0 README.rst
  6. +1 −0 lib/AUTHORS
  7. +10 −0 lib/COPYING
  8. +23 −0 lib/dawgdic/base-types.h
  9. +43 −0 lib/dawgdic/base-unit.h
  10. +70 −0 lib/dawgdic/bit-pool.h
  11. +149 −0 lib/dawgdic/completer.h
  12. +383 −0 lib/dawgdic/dawg-builder.h
  13. +84 −0 lib/dawgdic/dawg-unit.h
  14. +137 −0 lib/dawgdic/dawg.h
  15. +332 −0 lib/dawgdic/dictionary-builder.h
  16. +60 −0 lib/dawgdic/dictionary-extra-unit.h
  17. +71 −0 lib/dawgdic/dictionary-unit.h
  18. +229 −0 lib/dawgdic/dictionary.h
  19. +103 −0 lib/dawgdic/guide-builder.h
  20. +35 −0 lib/dawgdic/guide-unit.h
  21. +124 −0 lib/dawgdic/guide.h
  22. +71 −0 lib/dawgdic/link-table.h
  23. +69 −0 lib/dawgdic/object-pool.h
  24. +61 −0 lib/dawgdic/ranked-completer-candidate.h
  25. +58 −0 lib/dawgdic/ranked-completer-node.h
  26. +222 −0 lib/dawgdic/ranked-completer.h
  27. +182 −0 lib/dawgdic/ranked-guide-builder.h
  28. +62 −0 lib/dawgdic/ranked-guide-link.h
  29. +35 −0 lib/dawgdic/ranked-guide-unit.h
  30. +124 −0 lib/dawgdic/ranked-guide.h
  31. +42 −0 setup.py
  32. +1,345 −0 src/_base_types.cpp
  33. +14 −0 src/_base_types.pxd
  34. +1,348 −0 src/_dawg.cpp
  35. +46 −0 src/_dawg.pxd
  36. +1,351 −0 src/_dawg_builder.cpp
  37. +36 −0 src/_dawg_builder.pxd
  38. +1,365 −0 src/_dictionary.cpp
  39. +61 −0 src/_dictionary.pxd
  40. +1,371 −0 src/_dictionary_builder.cpp
  41. +12 −0 src/_dictionary_builder.pxd
  42. +1,348 −0 src/_dictionary_unit.cpp
  43. +31 −0 src/_dictionary_unit.pxd
  44. +5,221 −0 src/dawg.cpp
  45. +111 −0 src/dawg.pyx
  46. +1,356 −0 src/iostream.cpp
  47. +20 −0 src/iostream.pxd
  48. +2 −0 tests/__init__.py
  49. +62 −0 tests/test_dawg.py
  50. +8 −0 tox.ini
  51. +2 −0 update_cpp.sh
@@ -0,0 +1,19 @@
+^build
+^MANIFEST$
+^dist
+\.so$
+\.o$
+\.lo$
+
+\.svn
+\.cvsignore
+
+^src/.*\.html$
+
+^stuff/
+\.rej$
+\.pyc$
+^.tox
+\.orig$
+\.prof$
+\.coverage$
No changes.
18 LICENSE
@@ -0,0 +1,18 @@
+Copyright (c) Mikhail Korobov, 2012
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is furnished
+to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR
+A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,9 @@
+include README.rst
+include CHANGES.rst
+include LICENSE
+include tox.ini
+include update_cpp.sh
+include lib/COPYING
+
+recursive-include src *.cpp *.pxd *.pyx
+recursive-include lib/dawgdic *.h
@@ -0,0 +1,77 @@
+DAWG
+====
+
+This package provides DAWG-based dictionary-like
+read-only object for Python (2.x and 3.x).
+
+Based on `dawgdic` C++ library.
+
+.. _dawgdic: https://code.google.com/p/dawgdic/
+
+Installation
+============
+
+TODO
+
+Usage
+=====
+
+Create a new DAWG::
+
+ >>> import dawg
+ >>> d = dawg.IntDict({u'key1': value1, u'key2': value2, u'key3': value3})
+
+TODO
+
+Contributing
+============
+
+Development happens at github and bitbucket:
+
+* https://github.com/kmike/DAWG
+* https://bitbucket.org/kmike/DAWG
+
+The main issue tracker is at github: https://github.com/kmike/DAWG/issues
+
+Feel free to submit ideas, bugs, pull requests (git or hg) or
+regular patches.
+
+If you found a bug in a C++ part please report it to the original
+`bug tracker <https://code.google.com/p/dawgdic/issues/list>`_.
+
+
+Running tests and benchmarks
+----------------------------
+
+Make sure `tox`_ is installed and run
+
+::
+
+ $ tox
+
+from the source checkout. Tests should pass under python 2.6, 2.7, 3.2 and 3.3.
+
+.. note::
+
+ At the moment of writing the latest pip release (1.1) does not
+ support Python 3.3; in order to run tox tests under Python 3.3
+ find the "virtualenv_support" directory in site-packages
+ (of the env you run tox from) and place an sdist zip/tarball of the newer
+ pip (from github) there.
+
+.. _cython: http://cython.org
+.. _tox: http://tox.testrun.org
+
+Authors & Contributors
+----------------------
+
+* Mikhail Korobov <kmike84@gmail.com>
+
+This module is based on `dawgdic`_ C++ library by
+Susumu Yata & contributors.
+
+License
+=======
+
+Wrapper code is licensed under MIT License.
+Bundled `dawgdic`_ C++ library is licensed under BSD license.
@@ -0,0 +1 @@
+Susumu Yata <syata@acm.org>
@@ -0,0 +1,10 @@
+Copyright (c) 2009-2012, Susumu Yata
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+- Neither the name of the University of Tokushima nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,23 @@
+#ifndef DAWGDIC_BASE_TYPES_H
+#define DAWGDIC_BASE_TYPES_H
+
+#include <cstddef>
+
+namespace dawgdic {
+
+// 8-bit characters.
+typedef char CharType;
+typedef unsigned char UCharType;
+
+// 32-bit integer.
+typedef int ValueType;
+
+// 32-bit unsigned integer.
+typedef unsigned int BaseType;
+
+// 32 or 64-bit unsigned integer.
+typedef std::size_t SizeType;
+
+} // namespace dawgdic
+
+#endif // DAWGDIC_BASE_TYPES_H
@@ -0,0 +1,43 @@
+#ifndef DAWGDIC_BASE_UNIT_H
+#define DAWGDIC_BASE_UNIT_H
+
+#include "base-types.h"
+
+namespace dawgdic {
+
+// Unit for building a dawg.
+class BaseUnit {
+ public:
+ BaseUnit() : base_(0) {}
+
+ // Writes values.
+ void set_base(BaseType base) {
+ base_ = base;
+ }
+ BaseType base() const {
+ return base_;
+ }
+
+ // Reads values.
+ BaseType child() const {
+ return base_ >> 2;
+ }
+ bool has_sibling() const {
+ return (base_ & 1) ? true : false;
+ }
+ ValueType value() const {
+ return static_cast<ValueType>(base_ >> 1);
+ }
+ bool is_state() const {
+ return (base_ & 2) ? true : false;
+ }
+
+ private:
+ BaseType base_;
+
+ // Copyable.
+};
+
+} // namespace dawgdic
+
+#endif // DAWGDIC_BASE_UNIT_H
@@ -0,0 +1,70 @@
+#ifndef DAWGDIC_BIT_POOL_H
+#define DAWGDIC_BIT_POOL_H
+
+#include "object-pool.h"
+
+namespace dawgdic {
+
+// This class works as an array of bit flags with compact memory management.
+template <SizeType BLOCK_SIZE = 1 << 10>
+class BitPool {
+ public:
+ BitPool() : pool_(), size_(0) {}
+
+ // Accessors.
+ void set(SizeType index, bool bit) {
+ SizeType pool_index = PoolIndex(index);
+ UCharType bit_flag = BitFlag(index);
+ if (bit) {
+ pool_[pool_index] |= bit_flag;
+ } else {
+ pool_[pool_index] &= ~bit_flag;
+ }
+ }
+ bool get(SizeType index) const {
+ SizeType pool_index = PoolIndex(index);
+ UCharType bit_flag = BitFlag(index);
+ return (pool_[pool_index] & bit_flag) ? true : false;
+ }
+
+ // Deletes all bits and frees memory.
+ void Clear() {
+ pool_.Clear();
+ size_ = 0;
+ }
+
+ // Swaps bit pools.
+ void Swap(BitPool *bit_pool) {
+ pool_.Swap(&bit_pool->pool_);
+ }
+
+ // Allocates memory for a new bit and returns its ID.
+ // Note: Allocated bits are filled with false.
+ SizeType Allocate() {
+ SizeType pool_index = PoolIndex(size_);
+ if (pool_index == pool_.size()) {
+ pool_.Allocate();
+ pool_[pool_index] = '\0';
+ }
+ return size_++;
+ }
+
+ private:
+ ObjectPool<UCharType> pool_;
+ SizeType size_;
+
+ // Disallows copies.
+ BitPool(const BitPool &);
+ BitPool &operator=(const BitPool &);
+
+ static SizeType PoolIndex(SizeType index) {
+ return index / 8;
+ }
+ static UCharType BitFlag(BaseType index) {
+ return static_cast<UCharType>(1) << (index % 8);
+ }
+};
+
+} // namespace dawgdic
+
+#endif // DAWGDIC_BIT_POOL_H
Oops, something went wrong.

0 comments on commit 737bf7e

Please sign in to comment.