Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fixed rchardet old syntax, and stray ruby-debug require, sorry

  • Loading branch information...
commit b588643e6c9e7c34bad445371307ea0e47728d4e 1 parent 3cfc798
@mikel authored
Showing with 453 additions and 450 deletions.
  1. +5 −0 CHANGES
  2. +1 −1  lib/tmail/attachments.rb
  3. +37 −36 lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb
  4. +39 −39 lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb
  5. +2 −2 lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb
  6. +30 −31 lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb
  7. +22 −22 lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb
  8. +29 −29 lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb
  9. +31 −31 lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb
  10. +13 −13 lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb
  11. +22 −22 lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb
  12. +7 −9 lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb
  13. +36 −36 lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb
  14. +38 −38 lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb
  15. +13 −15 lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb
  16. +23 −23 lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb
  17. +79 −78 lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb
  18. +22 −22 lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb
  19. +3 −2 lib/tmail/version.rb
  20. +1 −1  tmail.gemspec
View
5 CHANGES
@@ -1,3 +1,8 @@
+=== 1.2.7.1 / 2010-02-07
+
+* Fixed stray ruby-debug
+* Re indented RChardet, and handled old syntax
+
=== 1.2.3.1 / 2008-04-11
* Closed #19429 - Installing TMail on Windows with the gem
View
2  lib/tmail/attachments.rb
@@ -41,7 +41,7 @@ def attachments
end
private
- require 'ruby-debug'
+
def attachment(part)
if part.multipart?
part.attachments
View
73 lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb
@@ -6,7 +6,8 @@
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
-# Contributor(s):
+# Contributor(s)
+
# Jeff Hodges
# Mark Pilgrim - port to Python
#
@@ -49,19 +50,19 @@ def reset
def feed(aStr, aCharLen)
# # """feed a character with known length"""
if aCharLen == 2
- # we only care about 2-bytes character in our distribution analysis
- order = get_order(aStr)
+ # we only care about 2-bytes character in our distribution analysis
+ order = get_order(aStr)
else
- order = -1
+ order = -1
end
if order >= 0
- @_mTotalChars += 1
- # order is valid
- if order < @_mTableSize:
- if 512 > @_mCharToFreqOrder[order]:
- @_mFreqChars += 1
- end
- end
+ @_mTotalChars += 1
+ # order is valid
+ if order < @_mTableSize
+ if 512 > @_mCharToFreqOrder[order]
+ @_mFreqChars += 1
+ end
+ end
end
end
@@ -69,14 +70,14 @@ def get_confidence
# """return confidence based on existing data"""
# if we didn't receive any character in our consideration range, return negative answer
if @_mTotalChars <= 0
- return SURE_NO
+ return SURE_NO
end
- if @_mTotalChars != @_mFreqChars:
- r = @_mFreqChars / ((@_mTotalChars - @_mFreqChars) * @_mTypicalDistributionRatio)
- if r < SURE_YES
- return r
- end
+ if @_mTotalChars != @_mFreqChars
+ r = @_mFreqChars / ((@_mTotalChars - @_mFreqChars) * @_mTypicalDistributionRatio)
+ if r < SURE_YES
+ return r
+ end
end
# normalize confidence (we don't want to be 100% sure)
@@ -111,9 +112,9 @@ def get_order(aStr)
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if aStr[0..0] >= "\xC4"
- return 94 * (aStr[0] - 0xC4) + aStr[1] - 0xA1
+ return 94 * (aStr[0] - 0xC4) + aStr[1] - 0xA1
else
- return -1
+ return -1
end
end
end
@@ -132,9 +133,9 @@ def get_order(aStr)
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if aStr[0..0] >= "\xB0"
- return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
+ return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
else
- return -1
+ return -1
end
end
end
@@ -153,9 +154,9 @@ def get_order(aStr)
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if (aStr[0..0] >= "\xB0") and (aStr[1..1] >= "\xA1")
- return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
+ return 94 * (aStr[0] - 0xB0) + aStr[1] - 0xA1
else
- return -1
+ return -1
end
end
end
@@ -174,13 +175,13 @@ def get_order(aStr)
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
# no validation needed here. State machine has done that
if aStr[0..0] >= "\xA4"
- if aStr[1..1] >= "\xA1"
- return 157 * (aStr[0] - 0xA4) + aStr[1] - 0xA1 + 63
- else
- return 157 * (aStr[0] - 0xA4) + aStr[1] - 0x40
- end
+ if aStr[1..1] >= "\xA1"
+ return 157 * (aStr[0] - 0xA4) + aStr[1] - 0xA1 + 63
+ else
+ return 157 * (aStr[0] - 0xA4) + aStr[1] - 0x40
+ end
else
- return -1
+ return -1
end
end
end
@@ -200,15 +201,15 @@ def get_order(aStr)
# no validation needed here. State machine has done that
aStr = aStr[0..1].join if aStr.class == Array
if (aStr[0..0] >= "\x81") and (aStr[0..0] <= "\x9F")
- order = 188 * (aStr[0] - 0x81)
+ order = 188 * (aStr[0] - 0x81)
elsif (aStr[0..0] >= "\xE0") and (aStr[0..0] <= "\xEF")
- order = 188 * (aStr[0] - 0xE0 + 31)
+ order = 188 * (aStr[0] - 0xE0 + 31)
else
- return -1
+ return -1
end
order = order + aStr[1] - 0x40
if aStr[1..1] > "\x7F"
- order =- 1
+ order =- 1
end
return order
end
@@ -227,10 +228,10 @@ def get_order(aStr)
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
# no validation needed here. State machine has done that
- if aStr[0..0] >= "\xA0":
- return 94 * (aStr[0] - 0xA1) + aStr[1] - 0xa1
+ if aStr[0..0] >= "\xA0"
+ return 94 * (aStr[0] - 0xA1) + aStr[1] - 0xa1
else
- return -1
+ return -1
end
end
end
View
78 lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb
@@ -6,7 +6,7 @@
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
-# Contributor(s):
+# Contributor(s)
# Jeff Hodges - port to Ruby
# Mark Pilgrim - port to Python
#
@@ -40,42 +40,42 @@ def reset
super
@_mActiveNum = 0
- for prober in @_mProbers:
- if prober
- prober.reset()
- prober.active = true
- @_mActiveNum += 1
- end
+ for prober in @_mProbers
+ if prober
+ prober.reset()
+ prober.active = true
+ @_mActiveNum += 1
+ end
end
@_mBestGuessProber = nil
end
def get_charset_name
if not @_mBestGuessProber
- get_confidence()
- return nil unless @_mBestGuessProber
- # self._mBestGuessProber = self._mProbers[0]
+ get_confidence()
+ return nil unless @_mBestGuessProber
+ # self._mBestGuessProber = self._mProbers[0]
end
return @_mBestGuessProber.get_charset_name()
end
def feed(aBuf)
for prober in @_mProbers
- next unless prober
- next unless prober.active
- st = prober.feed(aBuf)
- next unless st
- if st == EFoundIt
- @_mBestGuessProber = prober
- return get_state()
- elsif st == ENotMe
- prober.active = false
- @_mActiveNum -= 1
- if @_mActiveNum <= 0
- @_mState = ENotMe
- return get_state()
- end
- end
+ next unless prober
+ next unless prober.active
+ st = prober.feed(aBuf)
+ next unless st
+ if st == EFoundIt
+ @_mBestGuessProber = prober
+ return get_state()
+ elsif st == ENotMe
+ prober.active = false
+ @_mActiveNum -= 1
+ if @_mActiveNum <= 0
+ @_mState = ENotMe
+ return get_state()
+ end
+ end
end
return get_state()
end
@@ -83,28 +83,28 @@ def feed(aBuf)
def get_confidence()
st = get_state()
if st == EFoundIt
- return 0.99
+ return 0.99
elsif st == ENotMe
- return 0.01
+ return 0.01
end
bestConf = 0.0
@_mBestGuessProber = nil
for prober in @_mProbers
- next unless prober
- unless prober.active
- $stderr << "#{prober.get_charset_name()} not active\n" if $debug
- next
- end
- cf = prober.get_confidence()
- $stderr << "#{prober.get_charset_name} confidence = #{cf}\n" if $debug
- if bestConf < cf
- bestConf = cf
- @_mBestGuessProber = prober
- end
+ next unless prober
+ unless prober.active
+ $stderr << "#{prober.get_charset_name()} not active\n" if $debug
+ next
+ end
+ cf = prober.get_confidence()
+ $stderr << "#{prober.get_charset_name} confidence = #{cf}\n" if $debug
+ if bestConf < cf
+ bestConf = cf
+ @_mBestGuessProber = prober
+ end
end
return 0.0 unless @_mBestGuessProber
return bestConf
- # else:
+ # else
# self._mBestGuessProber = self._mProbers[0]
# return self._mBestGuessProber.get_confidence()
end
View
4 lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb
@@ -44,8 +44,8 @@ def next_state(c)
# if it is first byte, we also get byte length
byteCls = @_mModel['classTable'][c[0]]
if @_mCurrentState == EStart
- @_mCurrentBytePos = 0
- @_mCurrentCharLen = @_mModel['charLenTable'][byteCls]
+ @_mCurrentBytePos = 0
+ @_mCurrentCharLen = @_mModel['charLenTable'][byteCls]
end
# from byte's class and stateTable, we get its next state
@_mCurrentState = @_mModel['stateTable'][@_mCurrentState * @_mModel['classFactor'] + byteCls]
View
61 lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb
@@ -30,21 +30,19 @@ module CharDet
class EscCharSetProber < CharSetProber
def initialize
super()
- @_mCodingSM = [
- CodingStateMachine.new(HZSMModel),
- CodingStateMachine.new(ISO2022CNSMModel),
- CodingStateMachine.new(ISO2022JPSMModel),
- CodingStateMachine.new(ISO2022KRSMModel)
- ]
+ @_mCodingSM = [ CodingStateMachine.new(HZSMModel),
+ CodingStateMachine.new(ISO2022CNSMModel),
+ CodingStateMachine.new(ISO2022JPSMModel),
+ CodingStateMachine.new(ISO2022KRSMModel) ]
reset()
end
def reset
super()
- for codingSM in @_mCodingSM:
- next if not codingSM
- codingSM.active = true
- codingSM.reset()
+ for codingSM in @_mCodingSM
+ next if not codingSM
+ codingSM.active = true
+ codingSM.reset()
end
@_mActiveSM = @_mCodingSM.length
@_mDetectedCharset = nil
@@ -56,35 +54,36 @@ def get_charset_name
def get_confidence
if @_mDetectedCharset
- return 0.99
+ return 0.99
else
- return 0.00
+ return 0.00
end
end
def feed(aBuf)
aBuf.each_byte do |b|
- c = b.chr
- for codingSM in @_mCodingSM
- next unless codingSM
- next unless codingSM.active
- codingState = codingSM.next_state(c)
- if codingState == EError
- codingSM.active = false
- @_mActiveSM -= 1
- if @_mActiveSM <= 0
- @_mState = ENotMe
- return get_state()
- end
- elsif codingState == EItsMe
- @_mState = EFoundIt
- @_mDetectedCharset = codingSM.get_coding_state_machine()
- return get_state()
- end
- end
+ c = b.chr
+ for codingSM in @_mCodingSM
+ next unless codingSM
+ next unless codingSM.active
+ codingState = codingSM.next_state(c)
+ if codingState == EError
+ codingSM.active = false
+ @_mActiveSM -= 1
+ if @_mActiveSM <= 0
+ @_mState = ENotMe
+ return get_state()
+ end
+ elsif codingState == EItsMe
+ @_mState = EFoundIt
+ @_mDetectedCharset = codingSM.get_coding_state_machine()
+ return get_state()
+ end
+ end
end
-
return get_state()
+
end
+
end
end
View
44 lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb
@@ -48,33 +48,33 @@ def get_charset_name
def feed(aBuf)
aLen = aBuf.length
for i in (0...aLen)
- codingState = @_mCodingSM.next_state(aBuf[i..i])
- if codingState == EError
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
- @_mState = ENotMe
- break
- elsif codingState == EItsMe
- @_mState = EFoundIt
- break
- elsif codingState == EStart:
- charLen = @_mCodingSM.get_current_charlen()
- if i == 0
- @_mLastChar[1] = aBuf[0..0]
- @_mContextAnalyzer.feed(@_mLastChar, charLen)
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
- else
- @_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
- @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
- end
- end
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
+ if codingState == EError
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
+ @_mState = ENotMe
+ break
+ elsif codingState == EItsMe
+ @_mState = EFoundIt
+ break
+ elsif codingState == EStart
+ charLen = @_mCodingSM.get_current_charlen()
+ if i == 0
+ @_mLastChar[1] = aBuf[0..0]
+ @_mContextAnalyzer.feed(@_mLastChar, charLen)
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
+ else
+ @_mContextAnalyzer.feed(aBuf[i-1...i+1], charLen)
+ @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
+ end
+ end
end
@_mLastChar[0] = aBuf[aLen-1..aLen-1]
if get_state() == EDetecting
- if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
- @_mState = EFoundIt
- end
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
+ @_mState = EFoundIt
+ end
end
return get_state()
View
58 lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb
@@ -215,34 +215,34 @@ def feed(aBuf)
# so the word boundary detection works properly. [MAP]
if get_state() == ENotMe
- # Both model probers say it's not them. No reason to continue.
- return ENotMe
+ # Both model probers say it's not them. No reason to continue.
+ return ENotMe
end
aBuf = filter_high_bit_only(aBuf)
for cur in aBuf.split(' ')
- if cur == ' '
- # We stand on a space - a word just ended
- if @_mBeforePrev != ' '
- # next-to-last char was not a space so self._mPrev is not a 1 letter word
- if is_final(@_mPrev)
- # case (1) [-2:not space][-1:final letter][cur:space]
- @_mFinalCharLogicalScore += 1
- elsif is_non_final(@_mPrev)
- # case (2) [-2:not space][-1:Non-Final letter][cur:space]
- @_mFinalCharVisualScore += 1
- end
- end
- else
- # Not standing on a space
- if (@_mBeforePrev == ' ') and (is_final(@_mPrev)) and (cur != ' ')
- # case (3) [-2:space][-1:final letter][cur:not space]
- @_mFinalCharVisualScore += 1
- end
- end
- @_mBeforePrev = @_mPrev
- @_mPrev = cur
+ if cur == ' '
+ # We stand on a space - a word just ended
+ if @_mBeforePrev != ' '
+ # next-to-last char was not a space so self._mPrev is not a 1 letter word
+ if is_final(@_mPrev)
+ # case (1) [-2:not space][-1:final letter][cur:space]
+ @_mFinalCharLogicalScore += 1
+ elsif is_non_final(@_mPrev)
+ # case (2) [-2:not space][-1:Non-Final letter][cur:space]
+ @_mFinalCharVisualScore += 1
+ end
+ end
+ else
+ # Not standing on a space
+ if (@_mBeforePrev == ' ') and (is_final(@_mPrev)) and (cur != ' ')
+ # case (3) [-2:space][-1:final letter][cur:not space]
+ @_mFinalCharVisualScore += 1
+ end
+ end
+ @_mBeforePrev = @_mPrev
+ @_mPrev = cur
end
# Forever detecting, till the end or until both model probers return eNotMe (handled above)
@@ -254,24 +254,24 @@ def get_charset_name
# If the final letter score distance is dominant enough, rely on it.
finalsub = @_mFinalCharLogicalScore - @_mFinalCharVisualScore
if finalsub >= MIN_FINAL_CHAR_DISTANCE
- return LOGICAL_HEBREW_NAME
+ return LOGICAL_HEBREW_NAME
end
if finalsub <= -MIN_FINAL_CHAR_DISTANCE
- return VISUAL_HEBREW_NAME
+ return VISUAL_HEBREW_NAME
end
# It's not dominant enough, try to rely on the model scores instead.
modelsub = @_mLogicalProber.get_confidence() - @_mVisualProber.get_confidence()
if modelsub > MIN_MODEL_DISTANCE
- return LOGICAL_HEBREW_NAME
+ return LOGICAL_HEBREW_NAME
end
if modelsub < -MIN_MODEL_DISTANCE
- return VISUAL_HEBREW_NAME
+ return VISUAL_HEBREW_NAME
end
# Still no good, back to final letter distance, maybe it'll save the day.
if finalsub < 0.0
- return VISUAL_HEBREW_NAME
+ return VISUAL_HEBREW_NAME
end
# (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
@@ -281,7 +281,7 @@ def get_charset_name
def get_state
# Remain active as long as any of the model probers are active.
if (@_mLogicalProber.get_state() == ENotMe) and (@_mVisualProber.get_state() == ENotMe)
- return ENotMe
+ return ENotMe
end
return EDetecting
end
View
62 lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb
@@ -144,22 +144,22 @@ def feed(aBuf, aLen)
# this character will simply our logic and improve performance.
i = @_mNeedToSkipCharNum
while i < aLen
- order, charLen = get_order(aBuf[i...i+2])
- i += charLen
- if i > aLen
- @_mNeedToSkipCharNum = i - aLen
- @_mLastCharOrder = -1
- else
- if (order != -1) and (@_mLastCharOrder != -1):
- @_mTotalRel += 1
- if @_mTotalRel > MAX_REL_THRESHOLD:
- @_mDone = true
- break
- end
- @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1
- end
- @_mLastCharOrder = order
- end
+ order, charLen = get_order(aBuf[i...i+2])
+ i += charLen
+ if i > aLen
+ @_mNeedToSkipCharNum = i - aLen
+ @_mLastCharOrder = -1
+ else
+ if (order != -1) and (@_mLastCharOrder != -1)
+ @_mTotalRel += 1
+ if @_mTotalRel > MAX_REL_THRESHOLD
+ @_mDone = true
+ break
+ end
+ @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1
+ end
+ @_mLastCharOrder = order
+ end
end
end
@@ -169,10 +169,10 @@ def got_enough_data
def get_confidence
# This is just one way to calculate confidence. It works well for me.
- if @_mTotalRel > MINIMUM_DATA_THRESHOLD:
- return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel
+ if @_mTotalRel > MINIMUM_DATA_THRESHOLD
+ return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel
else
- return DONT_KNOW
+ return DONT_KNOW
end
end
@@ -188,15 +188,15 @@ def get_order(aStr)
# find out current char's byte length
aStr = aStr[0..1].join if aStr.class == Array
if ((aStr[0..0] >= "\x81") and (aStr[0..0] <= "\x9F")) or ((aStr[0..0] >= "\xE0") and (aStr[0..0] <= "\xFC"))
- charLen = 2
+ charLen = 2
else
- charLen = 1
+ charLen = 1
end
# return its order if it is hiragana
if aStr.length > 1
- if (aStr[0..0] == "\202") and (aStr[1..1] >= "\x9F") and (aStr[1..1] <= "\xF1")
- return aStr[1] - 0x9F, charLen
- end
+ if (aStr[0..0] == "\202") and (aStr[1..1] >= "\x9F") and (aStr[1..1] <= "\xF1")
+ return aStr[1] - 0x9F, charLen
+ end
end
return -1, charLen
@@ -208,19 +208,19 @@ def get_order(aStr)
return -1, 1 unless aStr
# find out current char's byte length
aStr = aStr[0..1].join if aStr.class == Array
- if (aStr[0..0] == "\x8E") or ((aStr[0..0] >= "\xA1") and (aStr[0..0] <= "\xFE")):
- charLen = 2
+ if (aStr[0..0] == "\x8E") or ((aStr[0..0] >= "\xA1") and (aStr[0..0] <= "\xFE"))
+ charLen = 2
elsif aStr[0..0] == "\x8F"
- charLen = 3
+ charLen = 3
else
- charLen = 1
+ charLen = 1
end
# return its order if it is hiragana
if aStr.length > 1
- if (aStr[0..0] == "\xA4") and (aStr[1..1] >= "\xA1") and (aStr[1..1] <= "\xF3")
- return aStr[1] - 0xA1, charLen
- end
+ if (aStr[0..0] == "\xA4") and (aStr[1..1] >= "\xA1") and (aStr[1..1] <= "\xF3")
+ return aStr[1] - 0xA1, charLen
+ end
end
return -1, charLen
View
26 lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb
@@ -110,15 +110,15 @@ def get_charset_name
def feed(aBuf)
aBuf = filter_with_english_letters(aBuf)
aBuf.each_byte do |b|
- c = b.chr
- charClass = Latin1_CharToClass[c[0]]
- freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
- if freq == 0
- @_mState = ENotMe
- break
- end
- @_mFreqCounter[freq] += 1
- @_mLastCharClass = charClass
+ c = b.chr
+ charClass = Latin1_CharToClass[c[0]]
+ freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
+ if freq == 0
+ @_mState = ENotMe
+ break
+ end
+ @_mFreqCounter[freq] += 1
+ @_mLastCharClass = charClass
end
return get_state()
@@ -126,17 +126,17 @@ def feed(aBuf)
def get_confidence
if get_state() == ENotMe
- return 0.01
+ return 0.01
end
total = @_mFreqCounter.inject{|a,b| a+b}
if total < 0.01
- confidence = 0.0
+ confidence = 0.0
else
- confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
+ confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
end
if confidence < 0.0
- confidence = 0.0
+ confidence = 0.0
end
# lower the confidence of latin1 so that other more accurate detector
# can take priority.
View
44 lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb
@@ -40,10 +40,10 @@ def initialize
def reset
super
if @_mCodingSM
- @_mCodingSM.reset()
+ @_mCodingSM.reset()
end
if @_mDistributionAnalyzer
- @_mDistributionAnalyzer.reset()
+ @_mDistributionAnalyzer.reset()
end
@_mLastChar = "\x00\x00"
end
@@ -54,30 +54,30 @@ def get_charset_name
def feed(aBuf)
aLen = aBuf.length
for i in (0...aLen)
- codingState = @_mCodingSM.next_state(aBuf[i..i])
- if codingState == EError
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
- @_mState = ENotMe
- break
- elsif codingState == EItsMe
- @_mState = EFoundIt
- break
- elsif codingState == EStart
- charLen = @_mCodingSM.get_current_charlen()
- if i == 0
- @_mLastChar[1] = aBuf[0..0]
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
- else
- @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
- end
- end
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
+ if codingState == EError
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
+ @_mState = ENotMe
+ break
+ elsif codingState == EItsMe
+ @_mState = EFoundIt
+ break
+ elsif codingState == EStart
+ charLen = @_mCodingSM.get_current_charlen()
+ if i == 0
+ @_mLastChar[1] = aBuf[0..0]
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
+ else
+ @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
+ end
+ end
end
@_mLastChar[0] = aBuf[aLen-1..aLen-1]
if get_state() == EDetecting
- if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
- @_mState = EFoundIt
- end
+ if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
+ @_mState = EFoundIt
+ end
end
return get_state()
end
View
16 lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb
@@ -32,15 +32,13 @@ module CharDet
class MBCSGroupProber < CharSetGroupProber
def initialize
super
- @_mProbers = [
- UTF8Prober.new,
- SJISProber.new,
- EUCJPProber.new,
- GB2312Prober.new,
- EUCKRProber.new,
- Big5Prober.new,
- EUCTWProber.new
- ]
+ @_mProbers = [ UTF8Prober.new,
+ SJISProber.new,
+ EUCJPProber.new,
+ GB2312Prober.new,
+ EUCKRProber.new,
+ Big5Prober.new,
+ EUCTWProber.new ]
reset()
end
end
View
72 lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb
@@ -73,10 +73,10 @@ module CharDet
Big5CharLenTable = [0, 1, 1, 2, 0]
Big5SMModel = {'classTable' => BIG5_cls,
- 'classFactor' => 5,
- 'stateTable' => BIG5_st,
- 'charLenTable' => Big5CharLenTable,
- 'name' => 'Big5'
+ 'classFactor' => 5,
+ 'stateTable' => BIG5_st,
+ 'charLenTable' => Big5CharLenTable,
+ 'name' => 'Big5'
}
# EUC-JP
@@ -127,10 +127,10 @@ module CharDet
EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
EUCJPSMModel = {'classTable' => EUCJP_cls,
- 'classFactor' => 6,
- 'stateTable' => EUCJP_st,
- 'charLenTable' => EUCJPCharLenTable,
- 'name' => 'EUC-JP'
+ 'classFactor' => 6,
+ 'stateTable' => EUCJP_st,
+ 'charLenTable' => EUCJPCharLenTable,
+ 'name' => 'EUC-JP'
}
# EUC-KR
@@ -178,10 +178,10 @@ module CharDet
EUCKRCharLenTable = [0, 1, 2, 0]
EUCKRSMModel = {'classTable' => EUCKR_cls,
- 'classFactor' => 4,
- 'stateTable' => EUCKR_st,
- 'charLenTable' => EUCKRCharLenTable,
- 'name' => 'EUC-KR'
+ 'classFactor' => 4,
+ 'stateTable' => EUCKR_st,
+ 'charLenTable' => EUCKRCharLenTable,
+ 'name' => 'EUC-KR'
}
# EUC-TW
@@ -233,10 +233,10 @@ module CharDet
EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
EUCTWSMModel = {'classTable' => EUCTW_cls,
- 'classFactor' => 7,
- 'stateTable' => EUCTW_st,
- 'charLenTable' => EUCTWCharLenTable,
- 'name' => 'x-euc-tw'
+ 'classFactor' => 7,
+ 'stateTable' => EUCTW_st,
+ 'charLenTable' => EUCTWCharLenTable,
+ 'name' => 'x-euc-tw'
}
# GB2312
@@ -293,10 +293,10 @@ module CharDet
GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
GB2312SMModel = {'classTable' => GB2312_cls,
- 'classFactor' => 7,
- 'stateTable' => GB2312_st,
- 'charLenTable' => GB2312CharLenTable,
- 'name' => 'GB2312'
+ 'classFactor' => 7,
+ 'stateTable' => GB2312_st,
+ 'charLenTable' => GB2312CharLenTable,
+ 'name' => 'GB2312'
}
# Shift_JIS
@@ -347,10 +347,10 @@ module CharDet
SJISCharLenTable = [0, 1, 1, 2, 0, 0]
SJISSMModel = {'classTable' => SJIS_cls,
- 'classFactor' => 6,
- 'stateTable' => SJIS_st,
- 'charLenTable' => SJISCharLenTable,
- 'name' => 'Shift_JIS'
+ 'classFactor' => 6,
+ 'stateTable' => SJIS_st,
+ 'charLenTable' => SJISCharLenTable,
+ 'name' => 'Shift_JIS'
}
# UCS2-BE
@@ -403,10 +403,10 @@ module CharDet
UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
UCS2BESMModel = {'classTable' => UCS2BE_cls,
- 'classFactor' => 6,
- 'stateTable' => UCS2BE_st,
- 'charLenTable' => UCS2BECharLenTable,
- 'name' => 'UTF-16BE'
+ 'classFactor' => 6,
+ 'stateTable' => UCS2BE_st,
+ 'charLenTable' => UCS2BECharLenTable,
+ 'name' => 'UTF-16BE'
}
# UCS2-LE
@@ -459,10 +459,10 @@ module CharDet
UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
UCS2LESMModel = {'classTable' => UCS2LE_cls,
- 'classFactor' => 6,
- 'stateTable' => UCS2LE_st,
- 'charLenTable' => UCS2LECharLenTable,
- 'name' => 'UTF-16LE'
+ 'classFactor' => 6,
+ 'stateTable' => UCS2LE_st,
+ 'charLenTable' => UCS2LECharLenTable,
+ 'name' => 'UTF-16LE'
}
# UTF-8
@@ -534,9 +534,9 @@ module CharDet
UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
UTF8SMModel = {'classTable' => UTF8_cls,
- 'classFactor' => 16,
- 'stateTable' => UTF8_st,
- 'charLenTable' => UTF8CharLenTable,
- 'name' => 'UTF-8'
+ 'classFactor' => 16,
+ 'stateTable' => UTF8_st,
+ 'charLenTable' => UTF8CharLenTable,
+ 'name' => 'UTF-8'
}
end
View
76 lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb
@@ -57,51 +57,51 @@ def reset
def get_charset_name
if @_mNameProber
- return @_mNameProber.get_charset_name()
+ return @_mNameProber.get_charset_name()
else
- return @_mModel['charsetName']
+ return @_mModel['charsetName']
end
end
def feed(aBuf)
if not @_mModel['keepEnglishLetter']
- aBuf = filter_without_english_letters(aBuf)
+ aBuf = filter_without_english_letters(aBuf)
end
aLen = aBuf.length
if not aLen
- return get_state()
+ return get_state()
end
aBuf.each_byte do |b|
- c = b.chr
- order = @_mModel['charToOrderMap'][c[0]]
- if order < SYMBOL_CAT_ORDER
- @_mTotalChar += 1
- end
- if order < SAMPLE_SIZE
- @_mFreqChar += 1
- if @_mLastOrder < SAMPLE_SIZE
- @_mTotalSeqs += 1
- if not @_mReversed
- @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
- else # reverse the order of the letters in the lookup
- @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
- end
- end
- end
- @_mLastOrder = order
+ c = b.chr
+ order = @_mModel['charToOrderMap'][c[0]]
+ if order < SYMBOL_CAT_ORDER
+ @_mTotalChar += 1
+ end
+ if order < SAMPLE_SIZE
+ @_mFreqChar += 1
+ if @_mLastOrder < SAMPLE_SIZE
+ @_mTotalSeqs += 1
+ if not @_mReversed
+ @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
+ else # reverse the order of the letters in the lookup
+ @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
+ end
+ end
+ end
+ @_mLastOrder = order
end
if get_state() == EDetecting
- if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
- cf = get_confidence()
- if cf > POSITIVE_SHORTCUT_THRESHOLD
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
- @_mState = EFoundIt
- elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
- $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
- @_mState = ENotMe
- end
- end
+ if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
+ cf = get_confidence()
+ if cf > POSITIVE_SHORTCUT_THRESHOLD
+ $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
+ @_mState = EFoundIt
+ elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
+ $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
+ @_mState = ENotMe
+ end
+ end
end
return get_state()
@@ -110,13 +110,13 @@ def feed(aBuf)
def get_confidence
r = 0.01
if @_mTotalSeqs > 0
- # print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
- r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
- # print r, self._mFreqChar, self._mTotalChar
- r = r * @_mFreqChar / @_mTotalChar
- if r >= 1.0
- r = 0.99
- end
+ # print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
+ r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
+ # print r, self._mFreqChar, self._mTotalChar
+ r = r * @_mFreqChar / @_mTotalChar
+ if r >= 1.0
+ r = 0.99
+ end
end
return r
end
View
28 lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb
@@ -31,21 +31,19 @@ module CharDet
class SBCSGroupProber < CharSetGroupProber
def initialize
super
- @_mProbers = [
- SingleByteCharSetProber.new(Win1251CyrillicModel),
- SingleByteCharSetProber.new(Koi8rModel),
- SingleByteCharSetProber.new(Latin5CyrillicModel),
- SingleByteCharSetProber.new(MacCyrillicModel),
- SingleByteCharSetProber.new(Ibm866Model),
- SingleByteCharSetProber.new(Ibm855Model),
- SingleByteCharSetProber.new(Latin7GreekModel),
- SingleByteCharSetProber.new(Win1253GreekModel),
- SingleByteCharSetProber.new(Latin5BulgarianModel),
- SingleByteCharSetProber.new(Win1251BulgarianModel),
- SingleByteCharSetProber.new(Latin2HungarianModel),
- SingleByteCharSetProber.new(Win1250HungarianModel),
- SingleByteCharSetProber.new(TIS620ThaiModel),
- ]
+ @_mProbers = [ SingleByteCharSetProber.new(Win1251CyrillicModel),
+ SingleByteCharSetProber.new(Koi8rModel),
+ SingleByteCharSetProber.new(Latin5CyrillicModel),
+ SingleByteCharSetProber.new(MacCyrillicModel),
+ SingleByteCharSetProber.new(Ibm866Model),
+ SingleByteCharSetProber.new(Ibm855Model),
+ SingleByteCharSetProber.new(Latin7GreekModel),
+ SingleByteCharSetProber.new(Win1253GreekModel),
+ SingleByteCharSetProber.new(Latin5BulgarianModel),
+ SingleByteCharSetProber.new(Win1251BulgarianModel),
+ SingleByteCharSetProber.new(Latin2HungarianModel),
+ SingleByteCharSetProber.new(Win1250HungarianModel),
+ SingleByteCharSetProber.new(TIS620ThaiModel) ]
hebrewProber = HebrewProber.new()
logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
View
46 lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb
@@ -48,33 +48,33 @@ def get_charset_name
def feed(aBuf)
aLen = aBuf.length
for i in (0...aLen)
- codingState = @_mCodingSM.next_state(aBuf[i..i])
- if codingState == EError
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
- @_mState = ENotMe
- break
- elsif codingState == EItsMe
- @_mState = EFoundIt
- break
- elsif codingState == EStart
- charLen = @_mCodingSM.get_current_charlen()
- if i == 0
- @_mLastChar[1] = aBuf[0..0]
- @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
- else
- @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
- @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
- end
- end
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
+ if codingState == EError
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
+ @_mState = ENotMe
+ break
+ elsif codingState == EItsMe
+ @_mState = EFoundIt
+ break
+ elsif codingState == EStart
+ charLen = @_mCodingSM.get_current_charlen()
+ if i == 0
+ @_mLastChar[1] = aBuf[0..0]
+ @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
+ else
+ @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
+ @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
+ end
+ end
end
@_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
- if get_state() == EDetecting:
- if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
- @_mState = EFoundIt
- end
+ if get_state() == EDetecting
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
+ @_mState = EFoundIt
+ end
end
return get_state()
View
157 lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb
@@ -51,10 +51,10 @@ def reset
@_mInputState = EPureAscii
@_mLastChar = ''
if @_mEscCharSetProber
- @_mEscCharSetProber.reset()
+ @_mEscCharSetProber.reset()
end
for prober in @_mCharSetProbers
- prober.reset()
+ prober.reset()
end
end
@@ -65,101 +65,102 @@ def feed(aBuf)
return if not aLen
if not @_mGotData
- # If the data starts with BOM, we know it is UTF
- if aBuf[0...3] == "\xEF\xBB\xBF"
- # EF BB BF UTF-8 with BOM
- @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
- elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
- # FF FE 00 00 UTF-32, little-endian BOM
- @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
- elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
- # 00 00 FE FF UTF-32, big-endian BOM
- @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
- elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
- @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
- elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
- @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
- elsif aBuf[0...2] == "\xFF\xFE"
- # FF FE UTF-16, little endian BOM
- @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
- elsif aBuf[0...2] == "\xFE\xFF"
- # FE FF UTF-16, big endian BOM
- @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
- end
+ # If the data starts with BOM, we know it is UTF
+ if aBuf[0...3] == "\xEF\xBB\xBF"
+ # EF BB BF UTF-8 with BOM
+ @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
+ elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
+ # FF FE 00 00 UTF-32, little-endian BOM
+ @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
+ elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
+ # 00 00 FE FF UTF-32, big-endian BOM
+ @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
+ elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
+ @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
+ elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
+ @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
+ elsif aBuf[0...2] == "\xFF\xFE"
+ # FF FE UTF-16, little endian BOM
+ @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
+ elsif aBuf[0...2] == "\xFE\xFF"
+ # FE FF UTF-16, big endian BOM
+ @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
+ end
end
-
+
@_mGotData = true
- if @result['encoding'] and (@result['confidence'] > 0.0):
- @done = true
- return
+ if @result['encoding'] and (@result['confidence'] > 0.0)
+ @done = true
+ return
end
- if @_mInputState == EPureAscii:
- if @_highBitDetector =~ (aBuf)
- @_mInputState = EHighbyte
- elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
- @_mInputState = EEscAscii
- end
+
+ if @_mInputState == EPureAscii
+ if @_highBitDetector =~ (aBuf)
+ @_mInputState = EHighbyte
+ elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
+ @_mInputState = EEscAscii
+ end
end
-
+
@_mLastChar = aBuf[-1..-1]
if @_mInputState == EEscAscii
- if not @_mEscCharSetProber
- @_mEscCharSetProber = EscCharSetProber.new()
- end
- if @_mEscCharSetProber.feed(aBuf) == EFoundIt
- @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
- 'confidence' => @_mEscCharSetProber.get_confidence()
- }
- @done = true
- end
+ if not @_mEscCharSetProber
+ @_mEscCharSetProber = EscCharSetProber.new()
+ end
+ if @_mEscCharSetProber.feed(aBuf) == EFoundIt
+ @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
+ 'confidence' => @_mEscCharSetProber.get_confidence()
+ }
+ @done = true
+ end
elsif @_mInputState == EHighbyte
- if not @_mCharSetProbers or @_mCharSetProbers.empty?
- @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
- end
- for prober in @_mCharSetProbers
- if prober.feed(aBuf) == EFoundIt
- @result = {'encoding' => prober.get_charset_name(),
- 'confidence' => prober.get_confidence()}
- @done = true
- break
- end
- end
+ if not @_mCharSetProbers or @_mCharSetProbers.empty?
+ @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
+ end
+ for prober in @_mCharSetProbers
+ if prober.feed(aBuf) == EFoundIt
+ @result = {'encoding' => prober.get_charset_name(),
+ 'confidence' => prober.get_confidence()}
+ @done = true
+ break
+ end
+ end
end
-
+
end
-
+
def close
return if @done
if not @_mGotData
- $stderr << "no data received!\n" if $debug
- return
+ $stderr << "no data received!\n" if $debug
+ return
end
@done = true
-
- if @_mInputState == EPureAscii:
- @result = {'encoding' => 'ascii', 'confidence' => 1.0}
- return @result
+
+ if @_mInputState == EPureAscii
+ @result = {'encoding' => 'ascii', 'confidence' => 1.0}
+ return @result
end
-
- if @_mInputState == EHighbyte:
- confidences = {}
+
+ if @_mInputState == EHighbyte
+ confidences = {}
@_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
- maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
- if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
- @result = {'encoding' => maxProber.get_charset_name(),
- 'confidence' => maxProber.get_confidence()}
- return @result
- end
+ maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
+ if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
+ @result = {'encoding' => maxProber.get_charset_name(),
+ 'confidence' => maxProber.get_confidence()}
+ return @result
+ end
end
if $debug
- $stderr << "no probers hit minimum threshhold\n" if $debug
- for prober in @_mCharSetProbers[0]._mProbers
- next if not prober
- $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
- end
+ $stderr << "no probers hit minimum threshhold\n" if $debug
+ for prober in @_mCharSetProbers[0]._mProbers
+ next if not prober
+ $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
+ end
end
end
end
View
44 lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb
@@ -48,25 +48,25 @@ def get_charset_name
def feed(aBuf)
aBuf.each_byte do |b|
- c = b.chr
- codingState = @_mCodingSM.next_state(c)
- if codingState == EError
- @_mState = ENotMe
- break
- elsif codingState == EItsMe
- @_mState = EFoundIt
- break
- elsif codingState == EStart
- if @_mCodingSM.get_current_charlen() >= 2
- @_mNumOfMBChar += 1
- end
- end
+ c = b.chr
+ codingState = @_mCodingSM.next_state(c)
+ if codingState == EError
+ @_mState = ENotMe
+ break
+ elsif codingState == EItsMe
+ @_mState = EFoundIt
+ break
+ elsif codingState == EStart
+ if @_mCodingSM.get_current_charlen() >= 2
+ @_mNumOfMBChar += 1
+ end
+ end
end
- if get_state() == EDetecting:
- if get_confidence() > SHORTCUT_THRESHOLD
- @_mState = EFoundIt
- end
+ if get_state() == EDetecting
+ if get_confidence() > SHORTCUT_THRESHOLD
+ @_mState = EFoundIt
+ end
end
return get_state()
@@ -75,12 +75,12 @@ def feed(aBuf)
def get_confidence
unlike = 0.99
if @_mNumOfMBChar < 6
- for i in (0...@_mNumOfMBChar)
- unlike = unlike * ONE_CHAR_PROB
- end
- return 1.0 - unlike
+ for i in (0...@_mNumOfMBChar)
+ unlike = unlike * ONE_CHAR_PROB
+ end
+ return 1.0 - unlike
else
- return unlike
+ return unlike
end
end
end
View
5 lib/tmail/version.rb
@@ -32,8 +32,9 @@ module TMail
module VERSION
MAJOR = 1
MINOR = 2
- TINY = 6
+ TINY = 7
+ MICRO = 1
- STRING = [MAJOR, MINOR, TINY].join('.')
+ STRING = [MAJOR, MINOR, TINY, MICRO].join('.')
end
end
View
2  tmail.gemspec
@@ -2,7 +2,7 @@
Gem::Specification.new do |s|
s.name = %q{tmail}
- s.version = "1.2.6"
+ s.version = "1.2.7.1"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Mikel Lindsaar <raasdnil AT gmail.com>"]
Please sign in to comment.
Something went wrong with that request. Please try again.