Permalink
Browse files

Some more 2to3 work.

We no longer need to worry about python having boolean values or not is one
really nice thing about making this 2.6+
  • Loading branch information...
1 parent c8d2712 commit da0415ea951659a620cb630d06196bf7268a07d3 @sigmavirus24 sigmavirus24 committed Nov 29, 2012
Showing with 48 additions and 38 deletions.
  1. +2 −1 .gitignore
  2. +27 −16 charade/charsetgroupprober.py
  3. +11 −9 charade/charsetprober.py
  4. +6 −2 charade/codingstatemachine.py
  5. +2 −10 charade/constants.py
View
@@ -1,5 +1,6 @@
*.pyc
*.egg-info
+*.swp
PKG-INFO
/dist
-/build
+/build
@@ -1,40 +1,42 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
-#
+#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
-#
+#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-import constants, sys
-from charsetprober import CharSetProber
+from charade import constants
+import sys
+from .charsetprober import CharSetProber
+
class CharSetGroupProber(CharSetProber):
def __init__(self):
CharSetProber.__init__(self)
self._mActiveNum = 0
self._mProbers = []
self._mBestGuessProber = None
-
+
def reset(self):
CharSetProber.reset(self)
self._mActiveNum = 0
@@ -48,16 +50,20 @@ def reset(self):
def get_charset_name(self):
if not self._mBestGuessProber:
self.get_confidence()
- if not self._mBestGuessProber: return None
-# self._mBestGuessProber = self._mProbers[0]
+ if not self._mBestGuessProber:
+ return None
+ # self._mBestGuessProber = self._mProbers[0]
return self._mBestGuessProber.get_charset_name()
def feed(self, aBuf):
for prober in self._mProbers:
- if not prober: continue
- if not prober.active: continue
+ if not prober:
+ continue
+ if not prober.active:
+ continue
st = prober.feed(aBuf)
- if not st: continue
+ if not st:
+ continue
if st == constants.eFoundIt:
self._mBestGuessProber = prober
return self.get_state()
@@ -78,18 +84,23 @@ def get_confidence(self):
bestConf = 0.0
self._mBestGuessProber = None
for prober in self._mProbers:
- if not prober: continue
+ if not prober:
+ continue
if not prober.active:
if constants._debug:
- sys.stderr.write(prober.get_charset_name() + ' not active\n')
+ sys.stderr.write(prober.get_charset_name() + ' not '
+ 'active\n')
continue
cf = prober.get_confidence()
if constants._debug:
- sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf))
+ sys.stderr.write('%s confidence = %s\n' %
+ (prober.get_charset_name(), cf))
if bestConf < cf:
bestConf = cf
self._mBestGuessProber = prober
- if not self._mBestGuessProber: return 0.0
+
+ if not self._mBestGuessProber:
+ return 0.0
return bestConf
# else:
# self._mBestGuessProber = self._mProbers[0]
@@ -1,11 +1,11 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Universal charset detector code.
-#
+#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
-#
+#
# Contributor(s):
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
@@ -14,27 +14,29 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-import constants, re
+from charade import constants
+import re
+
class CharSetProber:
def __init__(self):
pass
-
+
def reset(self):
self._mState = constants.eDetecting
-
+
def get_charset_name(self):
return None
@@ -50,11 +52,11 @@ def get_confidence(self):
def filter_high_bit_only(self, aBuf):
aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf)
return aBuf
-
+
def filter_without_english_letters(self, aBuf):
aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf)
return aBuf
-
+
def filter_with_english_letters(self, aBuf):
# TODO
return aBuf
@@ -25,7 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
-from constants import eStart, eError, eItsMe
+from .constants import eStart, eError
+
class CodingStateMachine:
def __init__(self, sm):
@@ -47,8 +48,11 @@ def next_state(self, c):
if self._mCurrentState == eStart:
self._mCurrentBytePos = 0
self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
+
# from byte's class and stateTable, we get its next state
- self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls]
+ next_state = (self._mCurrentState + self._mModel['classFactor'] +
+ byteCls)
+ self._mCurrentState = self._mModel['stateTable'][next_state]
self._mCurrentBytePos += 1
return self._mCurrentState
View
@@ -14,12 +14,12 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
-#
+#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
-#
+#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -37,11 +37,3 @@
eItsMe = 2
SHORTCUT_THRESHOLD = 0.95
-
-import __builtin__
-if not hasattr(__builtin__, 'False'):
- False = 0
- True = 1
-else:
- False = __builtin__.False
- True = __builtin__.True

0 comments on commit da0415e

Please sign in to comment.