Skip to content

Commit

Permalink
Whitespace cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
shoe committed Sep 22, 2011
1 parent 2d188be commit 4a93d95
Show file tree
Hide file tree
Showing 6 changed files with 239 additions and 240 deletions.
20 changes: 10 additions & 10 deletions EPFConfig.json
@@ -1,16 +1,16 @@
{
"fieldSep": "\u0001",
"recordSep": "\u0002\n",
"dbHost": "localhost",
"dbName": "epf",
"dbUser": "epfimporter",
"dbPassword": "epf123",
"tablePrefix": "epf",
"allowExtensions": false,
"fieldSep": "\u0001",
"recordSep": "\u0002\n",
"dbHost": "localhost",
"dbName": "epf",
"dbUser": "epfimporter",
"dbPassword": "epf123",
"tablePrefix": "epf",
"allowExtensions": false,
"blackList": [
"^\\."
],
],
"whiteList": [
".*?"
]
}
}
20 changes: 10 additions & 10 deletions EPFFlatConfig.json
@@ -1,16 +1,16 @@
{
"fieldSep": "\t",
"recordSep": "\n",
"dbHost": "localhost",
"dbName": "epf",
"dbUser": "epfimporter",
"dbPassword": "epf123",
"tablePrefix": "epfflat",
"allowExtensions": true,
"fieldSep": "\t",
"recordSep": "\n",
"dbHost": "localhost",
"dbName": "epf",
"dbUser": "epfimporter",
"dbPassword": "epf123",
"tablePrefix": "epfflat",
"allowExtensions": true,
"blackList": [
"^\\."
],
],
"whiteList": [
".*?"
]
}
}
153 changes: 76 additions & 77 deletions EPFImporter.py
Expand Up @@ -2,39 +2,39 @@

# Copyright (c) 2010 Apple Inc. All rights reserved.

# IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") in
# consideration of your agreement to the following terms, and your use,
# installation, modification or redistribution of this Apple software
# constitutes acceptance of these terms. If you do not agree with these terms,
# IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") in
# consideration of your agreement to the following terms, and your use,
# installation, modification or redistribution of this Apple software
# constitutes acceptance of these terms. If you do not agree with these terms,
# please do not use, install, modify or redistribute this Apple software.

# In consideration of your agreement to abide by the following terms, and subject
# to these terms, Apple grants you a personal, non-exclusive license, under Apple's
# copyrights in this original Apple software (the "Apple Software"), to use,
# reproduce, modify and redistribute the Apple Software, with or without
# modifications, in source and/or binary forms; provided that if you redistribute
# the Apple Software in its entirety and without modifications, you must retain
# this notice and the following text and disclaimers in all such redistributions
# of the Apple Software. Neither the name, trademarks, service marks or logos of
# Apple Inc. may be used to endorse or promote products derived from the Apple
# Software without specific prior written permission from Apple. Except as
# expressly stated in this notice, no other rights or licenses, express or implied,
# are granted by Apple herein, including but not limited to any patent rights that
# may be infringed by your derivative works or by other works in which the Apple
# In consideration of your agreement to abide by the following terms, and subject
# to these terms, Apple grants you a personal, non-exclusive license, under Apple's
# copyrights in this original Apple software (the "Apple Software"), to use,
# reproduce, modify and redistribute the Apple Software, with or without
# modifications, in source and/or binary forms; provided that if you redistribute
# the Apple Software in its entirety and without modifications, you must retain
# this notice and the following text and disclaimers in all such redistributions
# of the Apple Software. Neither the name, trademarks, service marks or logos of
# Apple Inc. may be used to endorse or promote products derived from the Apple
# Software without specific prior written permission from Apple. Except as
# expressly stated in this notice, no other rights or licenses, express or implied,
# are granted by Apple herein, including but not limited to any patent rights that
# may be infringed by your derivative works or by other works in which the Apple
# Software may be incorporated.

# The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
# WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
# WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN
# COMBINATION WITH YOUR PRODUCTS.

# IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION
# OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT
# (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN
# The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO
# WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
# WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION ALONE OR IN
# COMBINATION WITH YOUR PRODUCTS.

# IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION
# OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT, TORT
# (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import EPFIngester
Expand Down Expand Up @@ -67,10 +67,10 @@
# INCREMENTAL_STATUS_PATH = "./EPFStatusIncremental.json"
# FULL_STATUS_DICT = {"tablePrefix":None, "dirsToImport":[], "dirsLeft":[], "currentDict":{}}
# INCREMENTAL_STATUS_DICT = {"tablePrefix":None, "dirsToImport":[], "dirsLeft":[], "currentDict":{}}
#
#
# STATUS_MAP = {"full":(FULL_STATUS_DICT, FULL_STATUS_PATH),
# "incremental":(INCREMENTAL_STATUS_DICT, INCREMENTAL_STATUS_PATH)}


#Create a directory for rotating logs
try:
Expand Down Expand Up @@ -130,34 +130,34 @@ def doImport(directoryPath,
fieldDelim='\x01'):
"""
Perform a full import of the EPF files in the directory specified by directoryPath.
importMode can be 'full' or 'incremental'
whiteList is a sequence of regular expressions. Only files whose basenames (i.e., the last
element in the path) match one or more of the regexes in whiteList will be imported. For
example, whiteList=[".*song.*", ".*video.*"] would result in all files containing "song" or
whiteList is a sequence of regular expressions. Only files whose basenames (i.e., the last
element in the path) match one or more of the regexes in whiteList will be imported. For
example, whiteList=[".*song.*", ".*video.*"] would result in all files containing "song" or
"video" anywhere in the filename being imported, and the rest being ignored. To import only
exact matches, precede the name with a caret (^) and follow it with a dollar sign ($), e.g.
"^video$".
The default is for all files to be whitelisted.
blackList works similarly; any filenames matching any of the items in blackList will be
excluded from the import, even if they are matched in whiteList. By default, any filename
with a dot (".") in it will be excluded. Since EPF filenames never include a dot, this permits
blackList works similarly; any filenames matching any of the items in blackList will be
excluded from the import, even if they are matched in whiteList. By default, any filename
with a dot (".") in it will be excluded. Since EPF filenames never include a dot, this permits
placing any file with an extension (e.g., .txt) in the directory without disrupting the import.
Returns a list of any files for which the import failed (empty if all succeeded)
"""
"""
#Exclude files with a dot (for example, the invisible .DSStore files HFS+ uses)
if not allowExtensions:
blackList.append(r'.*\..*?')
blackList.append(r'.*\..*?')

wListRe = (r"|".join(whiteList) if whiteList else r"$a^") #The latter can never match anything
bListRe = (r"|".join(blackList) if blackList else r"$a^") #The latter can never match anything
wMatcher = re.compile(wListRe)
bMatcher = re.compile(bListRe)

dirPath = os.path.abspath(directoryPath)
fileList = os.listdir(dirPath)
#filter the list down to the entries matching our whitelist/blacklist
Expand All @@ -166,7 +166,7 @@ def doImport(directoryPath,
filesLeft = copy.copy(fileList)
filesImported = []
failedFiles = []

SNAPSHOT_DICT['tablePrefix'] = tablePrefix
SNAPSHOT_DICT['wList'] = whiteList
SNAPSHOT_DICT['bList'] = blackList
Expand All @@ -175,7 +175,7 @@ def doImport(directoryPath,
SNAPSHOT_DICT['dirsLeft'].remove(dirPath)
except ValueError:
pass

currentDict = SNAPSHOT_DICT['currentDict']
currentDict['recordSep'] = recordDelim
currentDict['fieldSep'] = fieldDelim
Expand All @@ -184,11 +184,11 @@ def doImport(directoryPath,
currentDict['filesLeft'] = filesLeft
currentDict['filesImported'] = filesImported
currentDict['failedFiles'] = failedFiles


_dumpDict(SNAPSHOT_DICT, SNAPSHOT_PATH)
pathList = [os.path.join(dirPath, fileName) for fileName in fileList]

startTime = datetime.datetime.now()
LOGGER.info("Starting import of %s...", dirPath)
for aPath in pathList:
Expand All @@ -199,8 +199,8 @@ def doImport(directoryPath,
EPFIngester.__warningregistry__.clear()
except AttributeError:
pass
try:

try:
ing = EPFIngester.Ingester(aPath,
tablePrefix=tablePrefix,
dbHost=dbHost,
Expand All @@ -215,7 +215,7 @@ def doImport(directoryPath,
failedFiles.append(fName)
_dumpDict(SNAPSHOT_DICT, SNAPSHOT_PATH)
continue

try:
ing.ingest(skipKeyViolators=skipKeyViolators)
filesLeft.remove(fName)
Expand All @@ -225,11 +225,11 @@ def doImport(directoryPath,
failedFiles.append(fName)
_dumpDict(SNAPSHOT_DICT, SNAPSHOT_PATH)
continue

endTime = datetime.datetime.now()
ts = str(endTime - startTime)
dirName = os.path.basename(dirPath)
LOGGER.info("Import of %s completed at: %s", dirName,
LOGGER.info("Import of %s completed at: %s", dirName,
endTime.strftime(EPFIngester.DATETIME_FORMAT))
LOGGER.info("Total import time for %s: %s" , dirName, ts[:len(ts)-4])
if (failedFiles):
Expand Down Expand Up @@ -258,7 +258,7 @@ def resumeImport(currentDict,
wList = ["^%s$" % aFile for aFile in filesLeft] #anchor the regexes for exact matches
filesImported = currentDict['filesImported']
bList = ["^%s$" % aFile for aFile in filesImported] #anchor the regexes for exact matches

failedFiles = doImport(dirPath,
tablePrefix=tablePrefix,
dbHost=dbHost,
Expand All @@ -270,7 +270,7 @@ def resumeImport(currentDict,
recordDelim=recordDelim,
fieldDelim=fieldDelim)
return failedFiles


def _dumpDict(aDict, filePath):
"""
Expand All @@ -283,7 +283,7 @@ def _dumpDict(aDict, filePath):
with open(filePath, mode='w+') as f:
json.dump(aDict, f, indent=4)


def main():
"""
Entry point for command-line execution
Expand Down Expand Up @@ -314,12 +314,12 @@ def main():
recordSep='\n',
fieldSep='\t')
_dumpDict(flatOptions, FLAT_CONFIG_PATH)

#Command-line parsing
usage = """usage: %prog [-fxrak] [-d db_host] [-u db_user] [-p db_password] [-n db_name]
[-s record_separator] [-t field_separator] [-w regex [-w regex2 [...]]]
[-s record_separator] [-t field_separator] [-w regex [-w regex2 [...]]]
[-b regex [-b regex2 [...]]] source_directory [source_directory2 ...]"""

op = optparse.OptionParser(version="%prog " + VERSION, description=DESCRIPTION, usage=usage)
op.add_option('-f', '--flat', action='store_true', dest='isFlat', default=False,
help="""Import EPF Flat files, using values from EPFFlat.config if not overridden""")
Expand Down Expand Up @@ -347,33 +347,33 @@ def main():
help="""A regular expression to add to the whiteList; repeated -b arguments will append""")
op.add_option('-k', '--skipkeyviolators', action='store_true', dest='skipKeyViolators', default=False,
help="""Ignore inserts which would violate a primary key constraint; only applies to full imports""")

(options, args) = op.parse_args() #parse command-line options

if not args and not options.isResume: #no directory args were given, and we're not in resume mode
op.print_usage()
sys.exit()

#roll over the log file, so each import has its own log
for aHandler in LOGGER.handlers:
try:
aHandler.doRollover()
except AttributeError:
pass #only the file handler has a doRollover() method

configPath = (FLAT_CONFIG_PATH if options.isFlat else CONFIG_PATH)
with open(configPath) as configFile:
configDict = json.load(configFile)

#iterate through the options dict.
#For each entry which is None, replace it with the value from the config file
optDict = options.__dict__
for aKey in optDict.keys():
if (not optDict[aKey]) and (configDict.has_key(aKey)):
optDict[aKey] = configDict[aKey]

failedFilesDict = {}

#bind these to locals here; they will be rebound later if this is a resume
dirsToImport = args
tablePrefix = options.tablePrefix
Expand All @@ -382,11 +382,11 @@ def main():
recordSep = options.recordSep
fieldSep = options.fieldSep
allowExtensions = options.allowExtensions

global SNAPSHOT_DICT, SNAPSHOT_PATH
SNAPSHOT_DICT['dirsToImport'] = copy.copy(dirsToImport)
SNAPSHOT_DICT['dirsLeft'] = copy.copy(dirsToImport)

startTime = datetime.datetime.now()

#call the appropriate import function
Expand All @@ -396,7 +396,7 @@ def main():
tablePrefix = SNAPSHOT_DICT['tablePrefix']
currentDict = SNAPSHOT_DICT['currentDict']
LOGGER.info("Resuming import for %s", currentDict['dirPath'])

failedFiles = resumeImport(currentDict,
tablePrefix=tablePrefix,
dbHost=options.dbHost,
Expand All @@ -414,7 +414,7 @@ def main():
dirsToImport = SNAPSHOT_DICT['dirsLeft']
wList = SNAPSHOT_DICT['wList']
bList = SNAPSHOT_DICT['bList']

#non-resume
if dirsToImport:
LOGGER.info("Beginning import for the following directories:\n %s", "\n ".join(dirsToImport))
Expand All @@ -436,20 +436,19 @@ def main():

if failedFiles:
failedFilesDict[dirName] = failedFiles

endTime = datetime.datetime.now()
ts = str(endTime - startTime)

if failedFilesDict:
failedList = [" %s/%s" % (str(aKey), str(failedFilesDict[aKey])) for aKey in failedFilesDict.keys()]
failedString = "\n".join(failedList)
LOGGER.warning("The following files encountered errors and were not imported:\n %s", failedString)

LOGGER.info("Total import time for all directories: %s", ts[:len(ts)-4])

#Execute
if __name__ == "__main__":
main()



0 comments on commit 4a93d95

Please sign in to comment.