Skip to content

Commit

Permalink
tools: add rebase-release-notes script
Browse files Browse the repository at this point in the history
The goal of the script is to assist with removing changelog entries that
are already included into a release of another development branch.

For example, if a bug fix is released as part of 2.11.3, there is no
reason to highlight that it is fixed in 3.0.0. It is better to describe
changes comparing to the latest present 2.11 release.

The script was initially written for personal use and there are obvious
points for improvements. However, it seems that it already simplifies
some tasks, so I'm going to share it as is and improve later. I'll leave
the relevant issue open.

Part of tarantool#9135

NO_DOC=The commit adds a development tool and doesn't touch anything in
       tarantool itself.
NO_CHANGELOG=see NO_DOC
NO_TEST=see NO_DOC
  • Loading branch information
Totktonada authored and kyukhin committed Sep 22, 2023
1 parent 3b1de78 commit 3240201
Showing 1 changed file with 370 additions and 0 deletions.
370 changes: 370 additions & 0 deletions tools/rebase-release-notes
Original file line number Diff line number Diff line change
@@ -0,0 +1,370 @@
#!/usr/bin/env python

from __future__ import print_function
import os
import sys
import glob
import re
import subprocess


# {{{ Helpers

def popen(cmdline):
""" Wrapper around Popen.subprocess() that redirects the output to a pipe,
correctly handles encoding and raises a RuntimeError if the executable
was not found. Works on both Python 2 and 3.
"""
popen_kwargs = {
'stdout': subprocess.PIPE,
}
if sys.version_info[0] == 3:
popen_kwargs['encoding'] = 'utf-8'

if sys.version_info[0] == 2:
global FileNotFoundError
FileNotFoundError = OSError

try:
return subprocess.Popen(cmdline, **popen_kwargs)
except FileNotFoundError as e:
raise RuntimeError("Unable to find '{}' executable: {}".format(
cmdline[0], str(e)))


def log_debug(s):
if os.environ.get('DEBUG'):
print(s)


# }}} Helpers


def repository_root_dir():
""" Return a repository root directory.
"""
# Sic: Use abspath rather than realpath because the script
# may be called via a symlink from another repository.
script_file = os.path.abspath(__file__)
script_dir = os.path.dirname(script_file)
return os.path.dirname(script_dir)


class Commit:
""" Represents a result of parsing a git commit from git log.
"""

COMMIT_LINE_RE = re.compile(r'^commit ([0-9a-f]{40})')
AUTHOR_RE = re.compile(r'^Author: (.*)$')
DATE_RE = re.compile(r'^Date: (.*)$')
MESSAGE_LINE_RE = re.compile(r'^[ ]{4}(.*)$')
CHERRY_PICKED_FROM_RE = re.compile(
r'^[ ]{4}\(cherry picked from commit ([0-9a-f]{40})\)$')
FILE_ADDED_RE = re.compile(r'^A\t(.*)$')
FILE_MODIFIED_RE = re.compile(r'^M\t(.*)$')
FILE_DELETED_RE = re.compile(r'^D\t(.*)$')
FILE_RENAMED_RE = re.compile(r'^R[0-9]+')

def __init__(self):
self._commit_id = None
self._author = None
self._date = None
self._subject = None
self._body = None
self._cherry_picked_from = None
self._added_files = []
self._modified_files = []
self._deleted_files = []

def commit_id(self):
return self._commit_id

def subject(self):
return self._subject

def cherry_picked_from(self):
return self._cherry_picked_from

def changelog_entries(self):
res = []
for f in self._added_files:
if f.startswith('changelogs/unreleased/'):
res.append(os.path.basename(f))
return res

def __str__(self):
return 'commit {} ("{}")'.format(self._commit_id[:12], self._subject)

def __repr__(self):
return str(self)

def add_line(self, line):
m = self.COMMIT_LINE_RE.match(line)
if m:
assert self._commit_id is None
self._commit_id = m.group(1)
return

m = self.AUTHOR_RE.match(line)
if m:
assert self._author is None
self._author = m.group(1)
return

m = self.DATE_RE.match(line)
if m:
assert self._date is None
self._date = m.group(1)
return

# The empty line after the headers and before the message.
if line == '':
return

m = self.MESSAGE_LINE_RE.match(line)
if m:
xline = m.group(1)
if self._subject is None:
self._subject = xline
return
if self._body is None and xline == '':
return
if self._body is None:
self._body = ''
self._body += xline + '\n'

m = self.CHERRY_PICKED_FROM_RE.match(line)
if m:
assert self._cherry_picked_from is None
self._cherry_picked_from = m.group(1)
return

return

m = self.FILE_ADDED_RE.match(line)
if m:
self._added_files.append(m.group(1))
return

m = self.FILE_MODIFIED_RE.match(line)
if m:
self._modified_files.append(m.group(1))
return

m = self.FILE_DELETED_RE.match(line)
if m:
self._deleted_files.append(m.group(1))
return

m = self.FILE_RENAMED_RE.match(line)
if m:
# Ignore for now.
return

raise RuntimeError('Unexpected line: {}'.format(line))


if __name__ == '__main__':
# The story: there are two branches: say, release/2.11 and
# master (future 3.0.0). The latter was forked from the former
# at some point, let's call this point a 'merge base'.
#
# There were one or several 2.11 releases and now we're about
# to release 3.0.0.
#
# We want to publish release notes for 3.0.0 as if its
# development was derived from latest 2.11 commit (or latest
# 2.11 release, it doesn't matter much). IOW, the changes that
# are present in 2.11 should be excluded from 3.0.0 release
# notes.
#
# TODO: Accept these refspecs as CLI arguments.
base_refspec = 'origin/release/2.11'
target_refspec = 'origin/master'

# Setup paths.
repo_dir = repository_root_dir()
entries_dir = os.path.join(repo_dir, 'changelogs', 'unreleased')

# Determine the merge base.
process = popen(['git', '-C', repo_dir, 'merge-base', base_refspec,
target_refspec])
merge_base = process.stdout.read().rstrip()
process.wait()
log_debug('merge_base: ' + merge_base)

# Assume that there are no changelog entries at the merge base
# point or they were deleted manually after the branching.
# IOW, we don't look at git log before the merge base.
#
# Example: commit c99bbdd69ee9 ("changelogs: delete released
# changelogs").

# Prepare a 'database' of base refspec commits. It will be
# used to determine, whether given commit is present in the
# base branch or not.
base_commits = {
'by_cherry_picked_from': {},
'by_subject': {},
'by_changelog_entry': {},
}
process = popen(['git', '-C', repo_dir, 'log',
'--reverse',
'--name-status',
'{}..{}'.format(merge_base, base_refspec)])

log_debug(['git', '-C', repo_dir, 'log',
'--reverse',
'--name-status',
'{}..{}'.format(merge_base, base_refspec)])

# We assume that those file names are unique. While it is,
# of course, not so in the general case, it is enough for
# our fuzzy matching.
#
# Exclude entries that are present several times by the
# manually crafted list.
ignored_entries = [
'gh-6548-luajit-fixes.md',
'gh-7230-luajit-fixes.md',
'gh-8069-luajit-fixes.md',
'gh-8516-luajit-fixes.md',
'gh-8825-luajit-fixes.md',
]

ignored_subjects = ['luajit: bump new version']

cur = Commit()
for line in process.stdout:
line = line.rstrip()
if line.startswith('commit'):
cherry_picked_from = cur.cherry_picked_from()
if cherry_picked_from:
assert cherry_picked_from not in \
base_commits['by_cherry_picked_from']
base_commits['by_cherry_picked_from'][cherry_picked_from] = cur

subject = cur.subject()
if subject:
if subject not in base_commits['by_subject']:
base_commits['by_subject'][subject] = []
base_commits['by_subject'][subject].append(cur)

for entry in cur.changelog_entries():
if entry not in ignored_entries and \
entry in base_commits['by_changelog_entry']:
found_commit_id = \
base_commits['by_changelog_entry'][entry].commit_id()
raise RuntimeError(('Found duplicate changelog entry: {}' +
' (commit {} and commit {}').format(
entry, found_commit_id,
cur.commit_id()))
base_commits['by_changelog_entry'][entry] = cur

cur = Commit()
cur.add_line(line)
process.wait()

entries_glob = os.path.join(entries_dir, '*.md')
entries = glob.glob(entries_glob)

# TODO: Perform all the actions in a semi-automatic way, like
# `git rebase -i` does, instead of writing *.txt to let a
# human perform the file manipulations.

def_fh = open('def.txt', 'w')
print('Writing definitely found entries into def.txt')

ign_fh = open('ign.txt', 'w')
print('Writing ignored entries into ign.txt')

fuz_fh = open('fuz.txt', 'w')
print('Writing fuzzy matched entries into fuz.txt')

nfd_fh = open('nfd.txt', 'w')
print('Writing not found entries into nfd.txt')

# We can run `git log` once, parse it and match with file
# names, but that's tiresome.
for entry in entries:
entry = os.path.basename(entry)

# See comment for ignored_entries.
if entry in ignored_entries:
print('changelogs/unreleased/{}'.format(entry), file=ign_fh)
print(' ignored by changelog entry name', file=ign_fh)
continue

# Find all commits that touch the changelog entry.
commits = []
process = popen(['git', '-C', repo_dir, 'log',
'--reverse',
'--name-status',
'changelogs/unreleased/{}'.format(entry)])
for line in process.stdout:
line = line.rstrip()
if line.startswith('commit'):
commits.append(Commit())
commits[-1].add_line(line)
process.wait()

# Find a commit that added the entry.
res = []
for commit in commits:
if entry in commit.changelog_entries():
res.append(commit)
if len(res) != 1:
print('changelogs/unreleased/{}'.format(entry), file=ign_fh)
print((' ignored because the changelog entry was added ' +
'several times: {}').format(res), file=ign_fh)
continue

commit = res[0]

# If the commit was cherry picked to the base branch using
# `git cherry-pich -x <refspec>`?
commit_id = commit.commit_id()
if commit_id in base_commits['by_cherry_picked_from']:
base_commit = base_commits['by_cherry_picked_from'][commit_id]
print('changelogs/unreleased/{}'.format(entry), file=def_fh)
print(' {} cherry picked from {}'.format(base_commit, commit),
file=def_fh)
continue

fuzzy_matched_by = []
ignored_by = None

# If there is a base branch commit with the same subject?
subject = commit.subject()
if subject in ignored_subjects:
ignored_by = 'subject {}'.format(subject)
elif subject in base_commits['by_subject']:
fuzzy_matched_by.append('subject (see commits {})'.format(
base_commits['by_subject'][subject]))

# If there is a base branch commit with the same changelog
# entry file name?
base_commit = base_commits['by_changelog_entry'].get(entry)
if base_commit:
fuzzy_matched_by.append(('changelog entry name ' +
'(see commit {})').format(base_commit))

if fuzzy_matched_by:
print('changelogs/unreleased/{}'.format(entry), file=fuz_fh)
for by in fuzzy_matched_by:
print(' fuzzy matched by {}'.format(by), file=fuz_fh)
continue
elif ignored_by:
print('changelogs/unreleased/{}'.format(entry), file=ign_fh)
print(' ignored by {}'.format(ignored_by), file=ign_fh)
continue

print('changelogs/unreleased/{}'.format(entry), file=nfd_fh)
url = 'https://github.com/tarantool/tarantool/commit/{}'.format(
commit.commit_id())
print(' see {}'.format(url), file=nfd_fh)

def_fh.close()
ign_fh.close()
fuz_fh.close()
nfd_fh.close()

0 comments on commit 3240201

Please sign in to comment.