Navigation Menu

Skip to content

Commit

Permalink
Rework repacking logic to make it more clever
Browse files Browse the repository at this point in the history
- Rework the repack code to be more clever -- instead of repacking
  based purely on dates, we now track the number of loose objects
  and the number of generated packs. Many of the settings are
  hardcoded for the moment while testing, but will probably end up
  settable via global and per-repository config settings.
- The following fsck.conf settings have no further effect:

    - repack_flags (replaced with extra_repack_flags)
    - full_repack_flags (replaced with extra_repack_flags_full)
    - full_repack_every (we now figure it out ourselves)

- Move git command invocation routines into a central function to
  reduce the amount of code duplication. You can also set the path
  to the git binary using the GITBIN env variable or by simply
  adding it to your path.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
  • Loading branch information
mricon committed Aug 14, 2018
1 parent 7e32c1d commit 299b0d0
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 223 deletions.
19 changes: 17 additions & 2 deletions CHANGELOG.rst
@@ -1,9 +1,24 @@
master
------
v1.2-master
-----------
- Make sure to set gc.auto=0 on repositories to avoid pruning repos
that are acting as alternates to others. We run our own prune
during fsck, so there is no need to auto-gc, ever (unless you
didn't set up grok-fsck, in which case you're not doing it right).
- Rework the repack code to be more clever -- instead of repacking
based purely on dates, we now track the number of loose objects
and the number of generated packs. Many of the settings are
hardcoded for the moment while testing, but will probably end up
settable via global and per-repository config settings.
- The following fsck.conf settings have no further effect:

- repack_flags (replaced with extra_repack_flags)
- full_repack_flags (replaced with extra_repack_flags_full)
- full_repack_every (we now figure it out ourselves)

- Move git command invocation routines into a central function to
reduce the amount of code duplication. You can also set the path
to the git binary using the GITBIN env variable or by simply
adding it to your path.


v1.1.1 (2018-07-25)
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Expand Up @@ -8,7 +8,7 @@ Framework to smartly mirror git repositories
:Date: 2018-04-24
:Copyright: The Linux Foundation and contributors
:License: GPLv3+
:Version: 1.1.1
:Version: 1.2-pre

DESCRIPTION
-----------
Expand Down
10 changes: 10 additions & 0 deletions fsck.conf
Expand Up @@ -44,6 +44,16 @@ ignore_errors = notice: HEAD points to an unborn branch
# Should we repack the repositories? Hint: you almost always want this on.
repack = yes
#
# We set proper flags for repacking depending if the repo is using alternates
# or not, and whether this is a full repack or not. We will also always
# build bitmaps (when it makes sense), to make cloning faster.
# You can add other flags (e.g. --threads and --window-memory) via
# the following parameter:
extra_repack_flags =
#
# These flags are added *in addition* to extra_repack_flags
extra_repack_flags_full = --window=250 --depth=50
#
# Run git-prune to remove obsolete old objects if no other repositories are
# using the repo in their objects/info/alternates. If other repositories
# are relying on this repo via alternates, it will not be pruned to avoid
Expand Down
32 changes: 31 additions & 1 deletion grokmirror/__init__.py
Expand Up @@ -19,6 +19,7 @@
import time
import anyjson
import fnmatch
import subprocess

import logging

Expand All @@ -28,16 +29,45 @@

from git import Repo

VERSION = '1.1.1'
VERSION = '1.2-pre'
MANIFEST_LOCKH = None
REPO_LOCKH = {}
GITBIN = '/usr/bin/git'

# default logger. Will probably be overridden.
logger = logging.getLogger(__name__)

_alt_repo_cache = None


def run_git_command(fullpath, args):
if 'GITBIN' in os.environ:
_git = os.environ['GITBIN']
else:
_git = GITBIN

if not os.path.isfile(_git) and os.access(_git, os.X_OK):
# we hope for the best by using 'git' without full path
_git = 'git'

if fullpath is not None:
cmdargs = [_git, '--git-dir', fullpath] + args
else:
cmdargs = [_git] + args

logger.debug('Running: %s', ' '.join(cmdargs))

child = subprocess.Popen(cmdargs,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
output, error = child.communicate()

output = output.decode().strip()
error = error.decode().strip()

return child.returncode, output, error


def _lockname(fullpath):
lockpath = os.path.dirname(fullpath)
lockname = '.%s.lock' % os.path.basename(fullpath)
Expand Down
33 changes: 9 additions & 24 deletions grokmirror/dumb_pull.py
Expand Up @@ -28,16 +28,8 @@


def git_rev_parse_all(gitdir):
logger.debug('Running: GIT_DIR=%s git rev-parse --all', gitdir)

env = {'GIT_DIR': gitdir}
args = ['git', 'rev-parse', '--all']

(output, error) = subprocess.Popen(args, stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=env).communicate()

error = error.decode().strip()
args = ['rev-parse', '--all']
retcode, output, error = grokmirror.run_git_command(gitdir, args)

if error:
# Put things we recognize into debug
Expand All @@ -53,14 +45,8 @@ def git_rev_parse_all(gitdir):
return output


def git_remote_update(args, env):
logger.debug('Running: GIT_DIR=%s %s', env['GIT_DIR'], ' '.join(args))

(output, error) = subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
env=env).communicate()

error = error.decode().strip()
def git_remote_update(args, fullpath):
retcode, output, error = grokmirror.run_git_command(fullpath, args)

if error:
# Put things we recognize into debug
Expand Down Expand Up @@ -97,8 +83,6 @@ def dumb_pull_repo(gitdir, remotes, svn=False):
logger.info('\tAssuming another process is running.')
return False

env = {'GIT_DIR': gitdir}

old_revs = git_rev_parse_all(gitdir)

if svn:
Expand All @@ -110,8 +94,8 @@ def dumb_pull_repo(gitdir, remotes, svn=False):
remote = '--all'

logger.info('Running git-svn fetch %s in %s', remote, gitdir)
args = ['/usr/bin/git', 'svn', 'fetch', remote]
git_remote_update(args, env)
args = ['svn', 'fetch', remote]
git_remote_update(args, gitdir)

else:
# Not an svn remote
Expand All @@ -128,10 +112,10 @@ def dumb_pull_repo(gitdir, remotes, svn=False):
remotefound = True
logger.debug('existing remote %s matches %s',
hasremote, remote)
args = ['/usr/bin/git', 'remote', 'update', hasremote]
args = ['remote', 'update', hasremote]
logger.info('Updating remote %s in %s', hasremote, gitdir)

git_remote_update(args, env)
git_remote_update(args, gitdir)

if not remotefound:
logger.info('Could not find any remotes matching %s in %s',
Expand Down Expand Up @@ -263,5 +247,6 @@ def command():
args, verbose=opts.verbose, svn=opts.svn, remotes=opts.remotes,
posthook=opts.posthook, logfile=opts.logfile)


if __name__ == '__main__':
command()

0 comments on commit 299b0d0

Please sign in to comment.