Skip to content

Commit

Permalink
add --compress-type everywhere and make it all behave.
Browse files Browse the repository at this point in the history
can't default to xz yet for all md b/c of y-m-p silliness.
  • Loading branch information
skvidal committed Sep 15, 2011
1 parent 242b042 commit dafea8c
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 49 deletions.
83 changes: 44 additions & 39 deletions createrepo/__init__.py
Expand Up @@ -34,7 +34,7 @@
from yum.packages import YumAvailablePackage

import rpmUtils.transaction
from utils import _, errorprint, MDError, lzma
from utils import _, errorprint, MDError, lzma, _available_compression
import readMetadata
try:
import sqlite3 as sqlite
Expand All @@ -46,7 +46,7 @@
except ImportError:
pass

from utils import _gzipOpen, bzipFile, xzFile, checkAndMakeDir, GzipFile, \
from utils import _gzipOpen, compressFile, compressOpen, checkAndMakeDir, GzipFile, \
checksum_and_rename, split_list_into_equal_chunks
import deltarpms

Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(self):
self.deltadir = None
self.delta_relative = 'drpms/'
self.oldpackage_paths = [] # where to look for the old packages -
self.deltafile = 'prestodelta.xml.gz'
self.deltafile = 'prestodelta.xml'
self.num_deltas = 1 # number of older versions to delta (max)
self.max_delta_rpm_size = 100000000
self.update_md_path = None
Expand All @@ -86,9 +86,9 @@ def __init__(self):
self.skip_symlinks = False
self.pkglist = []
self.database_only = False
self.primaryfile = 'primary.xml.gz'
self.filelistsfile = 'filelists.xml.gz'
self.otherfile = 'other.xml.gz'
self.primaryfile = 'primary.xml'
self.filelistsfile = 'filelists.xml'
self.otherfile = 'other.xml'
self.repomdfile = 'repomd.xml'
self.tempdir = '.repodata'
self.finaldir = 'repodata'
Expand All @@ -110,7 +110,8 @@ def __init__(self):
self.worker_cmd = '/usr/share/createrepo/worker.py'
#self.worker_cmd = './worker.py' # helpful when testing
self.retain_old_md = 0
self.xz = False # use xz for compression
self.compress_type = 'gz'


class SimpleMDCallBack(object):
def errorlog(self, thing):
Expand Down Expand Up @@ -146,8 +147,13 @@ def __init__(self, config_obj=None, callback=None):
if not self.conf.directory and not self.conf.directories:
raise MDError, "No directory given on which to run."

if self.conf.xz and not utils.lzma:
raise MDError, "XZ compression requested but lzma/xz module not available."
if not self.conf.compress_type:
self.conf.compress_type = 'gz'

if self.conf.compress_type not in utils._available_compression:
raise MDError, "Compression %s not available: Please choose from: %s" \
% (self.conf.compress_type, ', '.join(utils._available_compression))


if not self.conf.directories: # just makes things easier later
self.conf.directories = [self.conf.directory]
Expand Down Expand Up @@ -414,9 +420,11 @@ def openMetadataDocs(self):

def _setupPrimary(self):
# setup the primary metadata file
# FIXME - make this be conf.compress_type once y-m-p is fixed
fpz = self.conf.primaryfile + '.' + 'gz'
primaryfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
self.conf.primaryfile)
fo = _gzipOpen(primaryfilepath, 'w')
fpz)
fo = compressOpen(primaryfilepath, 'w', 'gz')
fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
fo.write('<metadata xmlns="http://linux.duke.edu/metadata/common"' \
' xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s">' %
Expand All @@ -425,19 +433,23 @@ def _setupPrimary(self):

def _setupFilelists(self):
# setup the filelist file
# FIXME - make this be conf.compress_type once y-m-p is fixed
fpz = self.conf.filelistsfile + '.' + 'gz'
filelistpath = os.path.join(self.conf.outputdir, self.conf.tempdir,
self.conf.filelistsfile)
fo = _gzipOpen(filelistpath, 'w')
fpz)
fo = compressOpen(filelistpath, 'w', 'gz')
fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
fo.write('<filelists xmlns="http://linux.duke.edu/metadata/filelists"' \
' packages="%s">' % self.pkgcount)
return fo

def _setupOther(self):
# setup the other file
# FIXME - make this be conf.compress_type once y-m-p is fixed
fpz = self.conf.otherfile + '.' + 'gz'
otherfilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
self.conf.otherfile)
fo = _gzipOpen(otherfilepath, 'w')
fpz)
fo = compressOpen(otherfilepath, 'w', 'gz')
fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
fo.write('<otherdata xmlns="http://linux.duke.edu/metadata/other"' \
' packages="%s">' %
Expand All @@ -446,9 +458,10 @@ def _setupOther(self):

def _setupDelta(self):
# setup the other file
fpz = self.conf.deltafile + '.' + self.conf.compress_type
deltafilepath = os.path.join(self.conf.outputdir, self.conf.tempdir,
self.conf.deltafile)
fo = _gzipOpen(deltafilepath, 'w')
fpz)
fo = compressOpen(deltafilepath, 'w', self.conf.compress_type)
fo.write('<?xml version="1.0" encoding="UTF-8"?>\n')
fo.write('<prestodelta>\n')
return fo
Expand Down Expand Up @@ -612,7 +625,6 @@ def writeMetadataDocs(self, pkglist=[], pkgpath=None):

for worker_num in range(self.conf.workers):
pkl = self._worker_tmp_path + '/pkglist-%s' % worker_num
print pkl
f = open(pkl, 'w')
f.write('\n'.join(worker_chunks[worker_num]))
f.close()
Expand Down Expand Up @@ -828,7 +840,7 @@ def generate_delta_xml(self):
return ' '.join(results)

def _createRepoDataObject(self, mdfile, mdtype, compress=True,
compress_type='gzip', attribs={}):
compress_type=None, attribs={}):
"""return random metadata as RepoData object to be added to RepoMD
mdfile = complete path to file
mdtype = the metadata type to use
Expand All @@ -838,19 +850,12 @@ def _createRepoDataObject(self, mdfile, mdtype, compress=True,
sfile = os.path.basename(mdfile)
fo = open(mdfile, 'r')
outdir = os.path.join(self.conf.outputdir, self.conf.tempdir)
if not compress_type:
compress_type = self.conf.compress_type
if compress:
if compress_type == 'gzip':
sfile = '%s.gz' % sfile
outfn = os.path.join(outdir, sfile)
output = GzipFile(filename = outfn, mode='wb')
elif compress_type == 'bzip2':
sfile = '%s.bz2' % sfile
outfn = os.path.join(outdir, sfile)
output = BZ2File(filename = outfn, mode='wb')
elif compress_type == 'xz':
sfile = '%s.xz' % sfile
outfn = os.path.join(outdir, sfile)
output = utils.lzma.LZMAFile(outfn, mode='wb')
sfile = '%s.%s' % (sfile, compress_type)
outfn = os.path.join(outdir, sfile)
output = compressOpen(outfn, mode='wb', compress_type=compress_type)

else:
outfn = os.path.join(outdir, sfile)
Expand Down Expand Up @@ -924,9 +929,13 @@ def doRepoMetadata(self):
rp = sqlitecachec.RepodataParserSqlite(repopath, repomd.repoid, None)

for (rpm_file, ftype) in workfiles:
# when we fix y-m-p and non-gzipped xml files - then we can make this just add
# self.conf.compress_type
if ftype in ('other', 'filelists', 'primary'):
rpm_file = rpm_file + '.' + 'gz'
complete_path = os.path.join(repopath, rpm_file)

zfo = _gzipOpen(complete_path)
zfo = compressOpen(complete_path)
# This is misc.checksum() done locally so we can get the size too.
data = misc.Checksums([sumtype])
while data.read(zfo, 2**16):
Expand Down Expand Up @@ -967,17 +976,13 @@ def doRepoMetadata(self):

# rename from silly name to not silly name
os.rename(tmp_result_path, resultpath)
ext = 'bz2'
compress_func = bzipFile
if self.conf.xz:
ext = 'xz'
compress_func = xzFile
ext = self.conf.compress_type
compressed_name = '%s.%s' % (good_name, ext)
result_compressed = os.path.join(repopath, compressed_name)
db_csums[ftype] = misc.checksum(sumtype, resultpath)

# compress the files
compress_func(resultpath, result_compressed)
compressFile(resultpath, result_compressed, self.conf.compress_type)
# csum the compressed file
db_compressed_sums[ftype] = misc.checksum(sumtype,
result_compressed)
Expand Down Expand Up @@ -1051,7 +1056,7 @@ def doRepoMetadata(self):

if self.conf.additional_metadata:
for md_type, md_file in self.conf.additional_metadata.items():
mdcontent = self._createRepoDataObject(md_file, md_type, compress_type='xz')
mdcontent = self._createRepoDataObject(md_file, md_type)
repomd.repoData[mdcontent.type] = mdcontent


Expand Down
43 changes: 43 additions & 0 deletions createrepo/utils.py
Expand Up @@ -91,6 +91,49 @@ def xzFile(source, dest):
destination.close()
s_fn.close()

def gzFile(source, dest):

s_fn = open(source, 'rb')
destination = GzipFile(dest, 'w')

while True:
data = s_fn.read(1024000)

if not data: break
destination.write(data)

destination.close()
s_fn.close()



def compressFile(source, dest, compress_type):
"""Compress an existing file using any compression type from source to dest"""

if compress_type == 'xz':
xzFile(source, dest)
elif compress_type == 'bz2':
bzipFile(source, dest)
elif compress_type == 'gz':
gzFile(source, dest)
else:
raise MDError, "Unknown compression type %s" % compress_type

def compressOpen(fn, mode='rb', compress_type=None):

if not compress_type:
# we are readonly and we don't give a compress_type - then guess based on the file extension
compress_type = fn.split('.')[-1]

if compress_type == 'xz':
return lzma.LZMAFile(fn, mode)
elif compress_type == 'bz2':
return bz2.BZ2File(fn, mode)
elif compress_type == 'gz':
return _gzipOpen(fn, mode)
else:
raise MDError, "Unknown compression type %s" % compress_type

def returnFD(filename):
try:
fdno = os.open(filename, os.O_RDONLY)
Expand Down
8 changes: 8 additions & 0 deletions genpkgmetadata.py
Expand Up @@ -127,6 +127,9 @@ def parse_args(args, conf):
parser.add_option("--xz", default=False,
action="store_true",
help="use xz for repodata compression")
parser.add_option("--compress-type", default=None, dest="compress_type",
help="which compression type to use")


(opts, argsleft) = parser.parse_args(args)
if len(argsleft) > 1 and not opts.split:
Expand Down Expand Up @@ -159,6 +162,11 @@ def parse_args(args, conf):

if opts.nodatabase:
opts.database = False

# xz is just a shorthand for compress_type
if opts.xz and not opts.compress_type:
opts.compress_type='xz'


# let's switch over to using the conf object - put all the opts into it
for opt in parser.option_list:
Expand Down
8 changes: 4 additions & 4 deletions mergerepo.py
Expand Up @@ -48,8 +48,8 @@ def parse_args(args):
help="Do not merge group(comps) metadata")
parser.add_option("", "--noupdateinfo", default=False, action="store_true",
help="Do not merge updateinfo metadata")
parser.add_option("", "--xz", default=False, action="store_true",
help="Use xz for repodata compression")
parser.add_option("--compress-type", default=None, dest="compress_type",
help="which compression type to use")

(opts, argsleft) = parser.parse_args(args)

Expand Down Expand Up @@ -81,8 +81,8 @@ def main(args):
rmbase.groups = False
if opts.noupdateinfo:
rmbase.updateinfo = False
if opts.xz:
rmbase.mdconf.xz = True
if opts.compress_type:
rmbase.mdconf.compress_type = opts.compress_type
try:
rmbase.merge_repos()
rmbase.write_metadata()
Expand Down
29 changes: 23 additions & 6 deletions modifyrepo.py
Expand Up @@ -29,7 +29,7 @@
import os
import sys
from createrepo import __version__
from createrepo.utils import checksum_and_rename, GzipFile, MDError
from createrepo.utils import checksum_and_rename, compressOpen, MDError
from yum.misc import checksum

from yum.repoMDObject import RepoMD, RepoMDError, RepoData
Expand All @@ -44,6 +44,8 @@ def __init__(self, repo):
self.repodir = os.path.abspath(repo)
self.repomdxml = os.path.join(self.repodir, 'repomd.xml')
self.checksum_type = 'sha256'
self.compress = False
self.compress_type='xz'

if not os.path.exists(self.repomdxml):
raise MDError, '%s not found' % self.repomdxml
Expand Down Expand Up @@ -97,8 +99,8 @@ def add(self, metadata, mdtype=None):
mdname = 'updateinfo.xml'
elif isinstance(metadata, str):
if os.path.exists(metadata):
if metadata.endswith('.gz'):
oldmd = GzipFile(filename=metadata, mode='rb')
if metadata.split('.')[-1] in ('gz', 'bz2', 'xz'):
oldmd = compressOpen(metadata, mode='rb')
else:
oldmd = file(metadata, 'r')
md = oldmd.read()
Expand All @@ -109,13 +111,19 @@ def add(self, metadata, mdtype=None):
else:
raise MDError, 'invalid metadata type'

do_compress = False
## Compress the metadata and move it into the repodata
if not mdname.endswith('.gz'):
mdname += '.gz'
if self.compress or not mdname.split('.')[-1] in ('gz', 'bz2', 'xz'):
do_compress = True
mdname += '.' + self.compress_type
mdtype = self._get_mdtype(mdname, mdtype)

destmd = os.path.join(self.repodir, mdname)
newmd = GzipFile(filename=destmd, mode='wb')
if do_compress:
newmd = compressOpen(destmd, mode='wb', compress_type=self.compress_type)
else:
newmd = open(destmd, 'wb')

newmd.write(md)
newmd.close()
print "Wrote:", destmd
Expand Down Expand Up @@ -166,6 +174,10 @@ def main(args):
help="specific datatype of the metadata, will be derived from the filename if not specified")
parser.add_option("--remove", action="store_true",
help="remove specified file from repodata")
parser.add_option("--compress", action="store_true", default=False,
help="compress the new repodata before adding it to the repo")
parser.add_option("--compress-type", dest='compress_type', default='xz',
help="compression format to use")
parser.usage = "modifyrepo [options] [--remove] <input_metadata> <output repodata>"

(opts, argsleft) = parser.parse_args(args)
Expand All @@ -180,6 +192,10 @@ def main(args):
print "Could not access repository: %s" % str(e)
return 1


repomd.compress = opts.compress
repomd.compress_type = opts.compress_type

# remove
if opts.remove:
try:
Expand All @@ -195,6 +211,7 @@ def main(args):
except MDError, e:
print "Could not add metadata from file %s: %s" % (metadata, str(e))
return 1


if __name__ == '__main__':
ret = main(sys.argv[1:])
Expand Down

0 comments on commit dafea8c

Please sign in to comment.