Skip to content

Commit

Permalink
More reliable removal of trailing NL characters from lines read from …
Browse files Browse the repository at this point in the history
…file
  • Loading branch information
RobH123 committed Oct 21, 2017
1 parent d3aedba commit 23948bd
Show file tree
Hide file tree
Showing 19 changed files with 106 additions and 78 deletions.
10 changes: 5 additions & 5 deletions BCVBible.py
Expand Up @@ -28,10 +28,10 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "BCVBible"
ProgName = "BCV Bible handler"
ProgVersion = '0.19'
ProgVersion = '0.20'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )

Expand Down Expand Up @@ -261,7 +261,7 @@ def loadMetadata( self, metadataFilepath ):
elif line[:3] == '': # 0xEF,0xBB,0xBF
logging.info( "loadMetadata2: Detected Unicode Byte Order Marker (BOM) in {}".format( metadataFilepath ) )
line = line[3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line = line[:-1] # Remove trailing newline character
if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
line = line.strip() # Remove leading and trailing whitespace
if not line: continue # Just discard blank lines
lastLine = line
Expand Down Expand Up @@ -428,7 +428,7 @@ def loadBookMetadata( self, metadataFilepath ):
if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
logging.info( exp("loadBookMetadata: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) )
line = line[1:] # Remove the Byte Order Marker (BOM)
if line[-1]=='\n': line = line[:-1] # Remove trailing newline character
if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
line = line.strip() # Remove leading and trailing whitespace
if not line: continue # Just discard blank lines
lastLine = line
Expand Down Expand Up @@ -531,7 +531,7 @@ def doaddLine( originalMarker, originalText ):
if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
logging.info( exp("loadBCVBibleBook: Detected Unicode Byte Order Marker (BOM) in {}").format( metadataFilepath ) )
line = line[1:] # Remove the Byte Order Marker (BOM)
if line[-1]=='\n': line = line[:-1] # Remove trailing newline character
if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
#print( CV, "line", line )
assert line and line[0]=='\\'
ixEQ = line.find( '=' )
Expand Down
10 changes: 5 additions & 5 deletions BibleOrgSysGlobals.py
Expand Up @@ -77,7 +77,7 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-16' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "BOSGlobals"
ProgName = "BibleOrgSys Globals"
ProgVersion = '0.74'
Expand Down Expand Up @@ -531,7 +531,7 @@ def fileCompare( filename1, filename2, folder1=None, folder2=None, printFlag=Tru
if printFlag and verbosityLevel > 2:
print( " fileCompare: Detected Unicode Byte Order Marker (BOM) in file1" )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
lines1.append( line )
lineCount, lines2 = 0, []
Expand All @@ -542,7 +542,7 @@ def fileCompare( filename1, filename2, folder1=None, folder2=None, printFlag=Tru
if printFlag and verbosityLevel > 2:
print( " fileCompare: Detected Unicode Byte Order Marker (BOM) in file2" )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
lines2.append( line )

Expand Down Expand Up @@ -600,7 +600,7 @@ def fileCompareUSFM( filename1, filename2, folder1=None, folder2=None, printFlag
if printFlag and verbosityLevel > 2:
print( " fileCompare: Detected Unicode Byte Order Marker (BOM) in file1" )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
lines1.append( line )
lineCount, lines2 = 0, []
Expand All @@ -611,7 +611,7 @@ def fileCompareUSFM( filename1, filename2, folder1=None, folder2=None, printFlag
if printFlag and verbosityLevel > 2:
print( " fileCompare: Detected Unicode Byte Order Marker (BOM) in file2" )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
lines2.append( line )

Expand Down
6 changes: 3 additions & 3 deletions CSVBible.py
Expand Up @@ -38,10 +38,10 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "CSVBible"
ProgName = "CSV Bible format handler"
ProgVersion = '0.31'
ProgVersion = '0.32'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )

Expand Down Expand Up @@ -220,7 +220,7 @@ def load( self ):
#if lineCount==1 and self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
#logging.info( " CSVBible.load: Detected Unicode Byte Order Marker (BOM)" )
#line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
if line==' ': continue # Handle special case which has blanks on every second line -- HACK
lastLine = line
Expand Down
31 changes: 24 additions & 7 deletions CompareBibles.py
Expand Up @@ -68,10 +68,10 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-20' # by RJH
ShortProgName = "CompareBibles"
ProgName = "Bible compare analyzer"
ProgVersion = '0.20'
ProgVersion = '0.22'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )

Expand Down Expand Up @@ -188,7 +188,7 @@ def loadWordCompares( folder, filename ):
if lineCount==1 and line[0]==chr(65279): #U+FEFF or \ufeff
logging.info( "loadWordCompares: Detected Unicode Byte Order Marker (BOM) in {}".format( filepath ) )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
#print ( 'SFM file line is "' + line + '"' )
if line[0]=='#': continue # Just discard comment lines
Expand Down Expand Up @@ -299,6 +299,7 @@ def compareBooksPedantic( book1, book2,
if marker1 == marker2: # ok, formats of both books match
numMismatchedMarkers = 0
if line1 or line2:
line1len, line2len = len(line1), len(line2)
for quoteChar in compareQuotes:
c1, c2 = line1.count( quoteChar ), line2.count( quoteChar )
if c1 != c2:
Expand Down Expand Up @@ -326,31 +327,47 @@ def compareBooksPedantic( book1, book2,
if ixl == -1: break
ixr = line1.find( right, ixl+2 )
if ixr == -1:
bcResults.append( (reference,"Missing second part of pair in Bible1: {!r} after {!r}".format( right, line1[max(0,ixl-4):ixl+6] )) )
contextStart, contextEnd = max(0,ixl-5), ixl+7
context = line1[contextStart:contextEnd]
if contextStart > 0 and context[0]!=' ': context = '…' + context
if contextEnd < line1len and context[-1]!=' ': context = context + '…'
bcResults.append( (reference,"Missing second part of pair in Bible1: {!r} after {!r}".format( right, context )) )
hadMatchingError1 = True
ixl = -1
while True:
ixl = line2.find( left, ixl+1 )
if ixl == -1: break
ixr = line2.find( right, ixl+2 )
if ixr == -1:
bcResults.append( (reference,"Missing second part of pair in Bible2: {!r} after {!r}".format( right, line2[max(0,ixl-4):ixl+6] )) )
contextStart, contextEnd = max(0,ixl-5), ixl+7
context = line2[contextStart:contextEnd]
if contextStart > 0 and context[0]!=' ': context = '…' + context
if contextEnd < line2len and context[-1]!=' ': context = context + '…'
bcResults.append( (reference,"Missing second part of pair in Bible2: {!r} after {!r}".format( right, context )) )
hadMatchingError2 = True
ixr = 9999
while True:
ixr = line1.rfind( right, 0, ixr )
if ixr == -1: break
ixl = line1.rfind( left, 0, ixr )
if ixl == -1:
bcResults.append( (reference,"Missing first part of pair in Bible1: {!r} before {!r}".format( left, line1[max(0,ixr-5):ixr+5] )) )
contextStart, contextEnd = max(0,ixr-6), ixr+6
context = line1[contextStart:contextEnd]
if contextStart > 0 and context[0]!=' ': context = '…' + context
if contextEnd < line1len and context[-1]!=' ': context = context + '…'
bcResults.append( (reference,"Missing first part of pair in Bible1: {!r} before {!r}".format( left, context )) )
hadMatchingError1 = True
ixr = 9999
while True:
ixr = line2.rfind( right, 0, ixr )
if ixr == -1: break
ixl = line2.rfind( left, 0, ixr )
if ixl == -1:
bcResults.append( (reference,"Missing first part of pair in Bible2: {!r} before {!r}".format( left, line2[max(0,ixr-5):ixr+5] )) )
contextStart, contextEnd = max(0,ixr-6), ixr+6
context = line2[contextStart:contextEnd]
if contextStart > 0 and context[0]!=' ': context = '…' + context
if contextEnd < line2len and context[-1]!=' ': context = context + '…'
bcResults.append( (reference,"Missing first part of pair in Bible2: {!r} before {!r}".format( left, context )) )
hadMatchingError2 = True
# The above doesn't detect ( ) ) so we do it here
if not hadMatchingError1: # already
Expand Down
6 changes: 3 additions & 3 deletions DBLBible.py
Expand Up @@ -35,7 +35,7 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "DigitalBibleLibrary"
ProgName = "Digital Bible Library (DBL) XML Bible handler"
ProgVersion = '0.25'
Expand Down Expand Up @@ -1341,7 +1341,7 @@ def getStyle( element, location ):
#if lineCount==1 and line[0]==chr(65279): #U+FEFF
#logging.info( "SFMLines: Detected Unicode Byte Order Marker (BOM) in {}".format( versificationFilename ) )
#line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
#if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
#if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
#if not line: continue # Just discard blank lines
#lastLine = line
#if line[0]=='#' and not line.startswith('#!'): continue # Just discard comment lines
Expand Down Expand Up @@ -1422,7 +1422,7 @@ def getStyle( element, location ):
#if lineCount==1 and line[0]==chr(65279): #U+FEFF
#logging.info( "SFMLines: Detected Unicode Byte Order Marker (BOM) in {}".format( languageFilename ) )
#line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
#if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
#if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
#if not line: continue # Just discard blank lines
#lastLine = line
#if line[0]=='#': continue # Just discard comment lines
Expand Down
6 changes: 3 additions & 3 deletions DrupalBible.py
Expand Up @@ -74,10 +74,10 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "DrupalBible"
ProgName = "DrupalBible Bible format handler"
ProgVersion = '0.12'
ProgVersion = '0.13'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )

Expand Down Expand Up @@ -249,7 +249,7 @@ def load( self ):
elif line[:3] == '': # 0xEF,0xBB,0xBF
logging.info( "DrupalBible.load2: Detected Unicode Byte Order Marker (BOM) in {}".format( self.sourceFilepath ) )
line = line[3:] # Remove the UTF-8 Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines

#print ( 'DB file line is "' + line + '"' )
Expand Down
6 changes: 3 additions & 3 deletions ESFMBible.py
Expand Up @@ -28,7 +28,7 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "ESFMBible"
ProgName = "ESFM Bible handler"
ProgVersion = '0.60'
Expand Down Expand Up @@ -324,7 +324,7 @@ def preload( self ):
#if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
#logging.info( "ESFMBible.loadMetadata: Detected Unicode Byte Order Marker (BOM) in {}".format( ssfFilepath ) )
#line = line[1:] # Remove the Byte Order Marker (BOM)
#if line[-1]=='\n': line = line[:-1] # Remove trailing newline character
#if line and line[-1]=='\n': line = line[:-1] # Remove trailing newline character
#line = line.strip() # Remove leading and trailing whitespace
#if not line: continue # Just discard blank lines
#lastLine = line
Expand Down Expand Up @@ -595,7 +595,7 @@ def findInfo( somepath ):
if lineCount==1 and line and line[0]==chr(65279): #U+FEFF
logging.info( "ESFMBible: Detected Unicode Byte Order Marker (BOM) in copyright.htm file" )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line = line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
lastLine = line
if line.startswith("<title>"): title = line.replace("<title>","").replace("</title>","").strip()
Expand Down
8 changes: 4 additions & 4 deletions ESFMFile.py
Expand Up @@ -5,7 +5,7 @@
#
# ESFM (Enhanced Standard Format Marker) data file reader
#
# Copyright (C) 2010-2016 Robert Hunt
# Copyright (C) 2010-2017 Robert Hunt
# Author: Robert Hunt <Freely.Given.org@gmail.com>
# License: See gpl-3.0.txt
#
Expand Down Expand Up @@ -35,7 +35,7 @@

from gettext import gettext as _

LastModifiedDate = '2016-03-10' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "ESFMFile"
ProgName = "ESFM File loader"
ProgVersion = '0.86'
Expand Down Expand Up @@ -144,7 +144,7 @@ def read( self, esfm_filename, ignoreSFMs=None ):
if lineCount==1 and line[0]==chr(65279): #U+FEFF or \ufeff
logging.info( "ESFMFile: Detected Unicode Byte Order Marker (BOM) in {}".format( esfm_filename ) )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if not line: continue # Just discard blank lines
lastLine = line
#print ( 'ESFM file line is "' + line + '"' )
Expand Down Expand Up @@ -235,4 +235,4 @@ def demo():
demo()

BibleOrgSysGlobals.closedown( ProgName, ProgVersion )
# end of ESFMFile.py
# end of ESFMFile.py
6 changes: 3 additions & 3 deletions GreekNT.py
Expand Up @@ -5,7 +5,7 @@
#
# Module handling GreekNT.xml
#
# Copyright (C) 2012-2016 Robert Hunt
# Copyright (C) 2012-2017 Robert Hunt
# Author: Robert Hunt <Freely.Given.org@gmail.com>
# License: See gpl-3.0.txt
#
Expand Down Expand Up @@ -44,7 +44,7 @@

from gettext import gettext as _

LastModifiedDate = '2016-12-06' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "GreekNTHandler"
ProgName = "Greek NT format handler"
ProgVersion = '0.07'
Expand Down Expand Up @@ -221,7 +221,7 @@ def unpackLine( line ):
if lineCount==1 and encoding.lower()=='utf-8' and line and line[0]==chr(65279): #U+FEFF
logging.info( "GreekNT: Detected Unicode Byte Order Marker (BOM) in {}".format( filename ) )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line = line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line = line[:-1] # Removing trailing newline character
#if not line: continue # Just discard blank lines
lastLine = line
#print ( 'gNT file line is "' + line + '"' )
Expand Down
6 changes: 3 additions & 3 deletions OnlineBible.py
Expand Up @@ -34,10 +34,10 @@

from gettext import gettext as _

LastModifiedDate = '2017-10-04' # by RJH
LastModifiedDate = '2017-10-19' # by RJH
ShortProgName = "OnlineBible"
ProgName = "Online Bible format handler"
ProgVersion = '0.19'
ProgVersion = '0.20'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )

Expand Down Expand Up @@ -200,7 +200,7 @@ def loadOnlineBibleMetadata():
if lineCount==1 and encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF
logging.info( "loadOnlineBibleMetadata: Detected Unicode Byte Order Marker (BOM) in {}".format( filepath ) )
line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
if line and line[-1]=='\n': line=line[:-1] # Removing trailing newline character
#if not line: continue # Just discard blank lines
lines.append( line )
lastLine = line
Expand Down

0 comments on commit 23948bd

Please sign in to comment.