Permalink
Browse files

Fixed rchardet old syntax, and stray ruby-debug require, sorry

  • Loading branch information...
1 parent 3cfc798 commit b588643e6c9e7c34bad445371307ea0e47728d4e @mikel committed Feb 7, 2010
View
@@ -1,3 +1,8 @@
+=== 1.2.7.1 / 2010-02-07
+
+* Fixed stray ruby-debug
+* Re indented RChardet, and handled old syntax
+
=== 1.2.3.1 / 2008-04-11
* Closed #19429 - Installing TMail on Windows with the gem
View
@@ -41,7 +41,7 @@ def attachments
end
private
- require 'ruby-debug'
+
def attachment(part)
if part.multipart?
part.attachments
@@ -6,7 +6,8 @@
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
-# Contributor(s):
+# Contributor(s)
+
# Jeff Hodges
# Mark Pilgrim - port to Python
#
@@ -49,34 +50,34 @@ def reset
def feed(aStr, aCharLen)
# # """feed a character with known length"""
if aCharLen == 2
- # we only care about 2-bytes character in our distribution analysis
- order = get_order(aStr)
+ # we only care about 2-bytes character in our distribution analysis
+ order = get_order(aStr)
else
- order = -1
+ order = -1
end
if order >= 0
- @_mTotalChars += 1
- # order is valid
- if order < @_mTableSize:
- if 512 > @_mCharToFreqOrder[order]:
- @_mFreqChars += 1
- end
- end
+ @_mTotalChars += 1
+ # order is valid
+ if order < @_mTableSize
+ if 512 > @_mCharToFreqOrder[order]
+ @_mFreqChars += 1
+ end
+ end
end
end
def get_confidence
# """return confidence based on existing data"""
# if we didn't receive any character in our consideration range, return negative answer
if @_mTotalChars <= 0
- return SURE_NO
+ return SURE_NO
end
- if @_mTotalChars != @_mFreqChars:
- r = @_mFreqChars / ((@_mTotalChars - @_mFreqChars) * @_mTypicalDistributionRatio)
- if r < SURE_YES
- return r
- end
+ if @_mTotalChars != @_mFreqChars
+ r = @_mFreqChars / ((@_mTotalChars - @_mFreqChars) * @_mTypicalDistributionRatio)
+ if r < SURE_YES
+ return r
+ end
end
# normalize confidence (we don't want to be 100% sure)
@@ -111,9 +112,9 @@ def get_order(aStr)
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if aStr[0..0] >= "\xC4"
- return 94 * (aStr[0] - 0xC4) + aStr[1] - 0xA1
+ return 94 * (aStr[0] - 0xC4) + aStr[1] - 0xA1
else
- return -1
+ return -1
end
end
end
@@ -132,9 +133,9 @@ def get_order(aStr)
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if aStr[0..0] >= "\xB0"
- return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
+ return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
else
- return -1
+ return -1
end
end
end
@@ -153,9 +154,9 @@ def get_order(aStr)
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if (aStr[0..0] >= "\xB0") and (aStr[1..1] >= "\xA1")
- return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
+ return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
else
- return -1
+ return -1
end
end
end
@@ -174,13 +175,13 @@ def get_order(aStr)
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if aStr[0..0] >= "\xA4"
- if aStr[1..1] >= "\xA1"
- return 157 * (aStr[0] - 0xA4) + aStr[1] - 0xA1 + 63
- else
- return 157 * (aStr[0] - 0xA4) + aStr[1] - 0x40
- end
+ if aStr[1..1] >= "\xA1"
+ return 157 * (aStr[0] - 0xA4) + aStr[1] - 0xA1 + 63
+ else
+ return 157 * (aStr[0] - 0xA4) + aStr[1] - 0x40
+ end
else
- return -1
+ return -1
end
end
end
@@ -200,15 +201,15 @@ def get_order(aStr)
# no validation needed here. State machine has done that
aStr = aStr[0..1].join if aStr.class == Array
if (aStr[0..0] >= "\x81") and (aStr[0..0] <= "\x9F")
- order = 188 * (aStr[0] - 0x81)
+ order = 188 * (aStr[0] - 0x81)
elsif (aStr[0..0] >= "\xE0") and (aStr[0..0] <= "\xEF")
- order = 188 * (aStr[0] - 0xE0 + 31)
+ order = 188 * (aStr[0] - 0xE0 + 31)
else
- return -1
+ return -1
end
order = order + aStr[1] - 0x40
if aStr[1..1] > "\x7F"
- order =- 1
+ order =- 1
end
return order
end
@@ -227,10 +228,10 @@ def get_order(aStr)
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
- if aStr[0..0] >= "\xA0":
- return 94 * (aStr[0] - 0xA1) + aStr[1] - 0xa1
+ if aStr[0..0] >= "\xA0"
+ return 94 * (aStr[0] - 0xA1) + aStr[1] - 0xa1
else
- return -1
+ return -1
end
end
end
@@ -6,7 +6,7 @@
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
-# Contributor(s):
+# Contributor(s)
# Jeff Hodges - port to Ruby
# Mark Pilgrim - port to Python
#
@@ -40,71 +40,71 @@ def reset
super
@_mActiveNum = 0
- for prober in @_mProbers:
- if prober
- prober.reset()
- prober.active = true
- @_mActiveNum += 1
- end
+ for prober in @_mProbers
+ if prober
+ prober.reset()
+ prober.active = true
+ @_mActiveNum += 1
+ end
end
@_mBestGuessProber = nil
end
def get_charset_name
if not @_mBestGuessProber
- get_confidence()
- return nil unless @_mBestGuessProber
- # self._mBestGuessProber = self._mProbers[0]
+ get_confidence()
+ return nil unless @_mBestGuessProber
+ # self._mBestGuessProber = self._mProbers[0]
end
return @_mBestGuessProber.get_charset_name()
end
def feed(aBuf)
for prober in @_mProbers
- next unless prober
- next unless prober.active
- st = prober.feed(aBuf)
- next unless st
- if st == EFoundIt
- @_mBestGuessProber = prober
- return get_state()
- elsif st == ENotMe
- prober.active = false
- @_mActiveNum -= 1
- if @_mActiveNum <= 0
- @_mState = ENotMe
- return get_state()
- end
- end
+ next unless prober
+ next unless prober.active
+ st = prober.feed(aBuf)
+ next unless st
+ if st == EFoundIt
+ @_mBestGuessProber = prober
+ return get_state()
+ elsif st == ENotMe
+ prober.active = false
+ @_mActiveNum -= 1
+ if @_mActiveNum <= 0
+ @_mState = ENotMe
+ return get_state()
+ end
+ end
end
return get_state()
end
def get_confidence()
st = get_state()
if st == EFoundIt
- return 0.99
+ return 0.99
elsif st == ENotMe
- return 0.01
+ return 0.01
end
bestConf = 0.0
@_mBestGuessProber = nil
for prober in @_mProbers
- next unless prober
- unless prober.active
- $stderr << "#{prober.get_charset_name()} not active\n" if $debug
- next
- end
- cf = prober.get_confidence()
- $stderr << "#{prober.get_charset_name} confidence = #{cf}\n" if $debug
- if bestConf < cf
- bestConf = cf
- @_mBestGuessProber = prober
- end
+ next unless prober
+ unless prober.active
+ $stderr << "#{prober.get_charset_name()} not active\n" if $debug
+ next
+ end
+ cf = prober.get_confidence()
+ $stderr << "#{prober.get_charset_name} confidence = #{cf}\n" if $debug
+ if bestConf < cf
+ bestConf = cf
+ @_mBestGuessProber = prober
+ end
end
return 0.0 unless @_mBestGuessProber
return bestConf
- # else:
+ # else
# self._mBestGuessProber = self._mProbers[0]
# return self._mBestGuessProber.get_confidence()
end
@@ -44,8 +44,8 @@ def next_state(c)
# if it is first byte, we also get byte length
byteCls = @_mModel['classTable'][c[0]]
if @_mCurrentState == EStart
- @_mCurrentBytePos = 0
- @_mCurrentCharLen = @_mModel['charLenTable'][byteCls]
+ @_mCurrentBytePos = 0
+ @_mCurrentCharLen = @_mModel['charLenTable'][byteCls]
end
# from byte's class and stateTable, we get its next state
@_mCurrentState = @_mModel['stateTable'][@_mCurrentState * @_mModel['classFactor'] + byteCls]
@@ -30,21 +30,19 @@ module CharDet
class EscCharSetProber < CharSetProber
def initialize
super()
- @_mCodingSM = [
- CodingStateMachine.new(HZSMModel),
- CodingStateMachine.new(ISO2022CNSMModel),
- CodingStateMachine.new(ISO2022JPSMModel),
- CodingStateMachine.new(ISO2022KRSMModel)
- ]
+ @_mCodingSM = [ CodingStateMachine.new(HZSMModel),
+ CodingStateMachine.new(ISO2022CNSMModel),
+ CodingStateMachine.new(ISO2022JPSMModel),
+ CodingStateMachine.new(ISO2022KRSMModel) ]
reset()
end
def reset
super()
- for codingSM in @_mCodingSM:
- next if not codingSM
- codingSM.active = true
- codingSM.reset()
+ for codingSM in @_mCodingSM
+ next if not codingSM
+ codingSM.active = true
+ codingSM.reset()
end
@_mActiveSM = @_mCodingSM.length
@_mDetectedCharset = nil
@@ -56,35 +54,36 @@ def get_charset_name
def get_confidence
if @_mDetectedCharset
- return 0.99
+ return 0.99
else
- return 0.00
+ return 0.00
end
end
def feed(aBuf)
aBuf.each_byte do |b|
- c = b.chr
- for codingSM in @_mCodingSM
- next unless codingSM
- next unless codingSM.active
- codingState = codingSM.next_state(c)
- if codingState == EError
- codingSM.active = false
- @_mActiveSM -= 1
- if @_mActiveSM <= 0
- @_mState = ENotMe
- return get_state()
- end
- elsif codingState == EItsMe
- @_mState = EFoundIt
- @_mDetectedCharset = codingSM.get_coding_state_machine()
- return get_state()
- end
- end
+ c = b.chr
+ for codingSM in @_mCodingSM
+ next unless codingSM
+ next unless codingSM.active
+ codingState = codingSM.next_state(c)
+ if codingState == EError
+ codingSM.active = false
+ @_mActiveSM -= 1
+ if @_mActiveSM <= 0
+ @_mState = ENotMe
+ return get_state()
+ end
+ elsif codingState == EItsMe
+ @_mState = EFoundIt
+ @_mDetectedCharset = codingSM.get_coding_state_machine()
+ return get_state()
+ end
+ end
end
-
return get_state()
+
end
+
end
end
Oops, something went wrong.

0 comments on commit b588643

Please sign in to comment.