Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 563a868228
Fetching contributors…

Cannot retrieve contributors at this time

executable file 578 lines (492 sloc) 19.387 kb
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
##
## Author: Steve Purcell, http://www.sanityinc.com/
##
## Further info:
## http://www.sanityinc.com/articles/converting-darcs-repositories-to-git
##
## Obtain the latest version of this software here:
##
## http://github.com/purcell/darcs-to-git
## http://git.sanityinc.com/
##
## Please submit bug reports and patches via Github (preferred), or by
## email to the author.
##
##
# XXX: make backwards compatible
# TODO: import parallel darcs repos as git branches, identifying branch points
# TODO: use default repo if none was supplied
# TODO: handle *-darcs-backupN files?
require 'ostruct'
require 'rexml/document'
require 'optparse'
require 'yaml'
require 'pathname'
require 'iconv'
require 'shellwords'
require 'fileutils'
require 'open-uri'
# Explicitly setting a time zone would cause darcs to only output in
# that timezone hence we couldn't get the actual patch TZ
# ENV['TZ'] = 'GMT0'
GIT_PATCHES = ".git/darcs_patches"
DEFAULT_AUTHOR_MAP_FILE = ".git/darcs_author_substitutions"
# -------------------------------------------------------------------------------
# Usage info and argument parsing
# -------------------------------------------------------------------------------
OPTIONS = { :default_author => 'none',
:default_email => 'none',
:list_authors => false,
:author_map => nil,
:verbose => false,
:quiet => false,
:do_checks => true,
:clean_commit_messages => false,
:num_patches => nil }
opts = OptionParser.new do |opts|
opts.banner = <<-end_usage
Creates git repositories from darcs repositories
usage: darcs-to-git DARCSREPO [options]
1. Create an *empty* directory that will become the new git repository
2. From inside that directory, run this program, passing the location
of the source darcs repo as a parameter
The program will git-init the empty directory, and migrate all patches
in the source darcs repo into commits in that repository.
Thereafter, incremental patch conversion from the same source repo is
possible by repeating step 2.
If DARCSREPO is local, its contents will be compared with those of the
git repo after conversion finishes, as a sanity check for the
conversion process.
NOTE: In case of duplicate tags, the latest will take precedence.
If you really need to, you can manually identify the patch and use
\"git tag -f <tagname> <sha1-of-commit-before-tagging>\".
OPTIONS
end_usage
opts.on('--default-author NAME',
"Set the author name used when darcs patch has no explicit author") do |m|
OPTIONS[:default_author] = m
end
opts.on('--default-email ADDRESS',
"Set the email address used when no explicit address is given") do |m|
OPTIONS[:default_email] = m
end
opts.on('--list-authors',
"List all unique authors in source repo and quit.") do |m|
OPTIONS[:list_authors] = m
end
opts.on('--author-map FILE',
"Supply a YAML file that maps committer names to canonical author names") do |f|
OPTIONS[:author_map] = f
end
opts.on('--patches [N]', OptionParser::DecimalInteger,
"Only pull N patches.") do |n|
abort opts.to_s unless n >= 0
OPTIONS[:num_patches] = n
end
opts.on('--verbose',
"Show executed commands and other internal information") do |n|
OPTIONS[:verbose] = true
end
opts.on('--no-verbose',
"Don't show status update after every imported patch (bit faster) ") do |n|
OPTIONS[:quiet] = true
end
opts.on('--no-checks',
"Don't check repository consistency after every imported patch, only at start and finish of pull (faster) ") do |n|
OPTIONS[:do_checks] = false
end
opts.on('--clean-commit-messages',
"Don't note darcs hashes in git commit messages (not recommended)") do |n|
OPTIONS[:clean_commit_messages] = true
end
opts.on('--version', "Output version information and exit") do
puts <<-EOF
darcs-to-git 0.1
Copyright (c) 2009 Steve Purcell, http://www.sanityinc.com/
License MIT: <http://www.opensource.org/licenses/mit-license.php>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
EOF
exit
end
opts.on('-h', '--help', "Show this message") do
puts opts.to_s
exit
end
end
opts.parse!
SRCREPO = ARGV[0]
if SRCREPO.nil?
abort opts.to_s
end
# -------------------------------------------------------------------------------
# Utilities
# -------------------------------------------------------------------------------
def run(*args)
puts "Running: #{args.inspect}" if OPTIONS[:verbose]
system(*args) || raise("Failed to run: #{args.inspect}")
end
# cf. Paul Battley, http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
def validate_utf8(s)
return Iconv.iconv('UTF-8//IGNORE', 'UTF-8', (s + ' ') ).first[0..-2]
end
def output_of(*args)
puts "Running: #{args.inspect}" if OPTIONS[:verbose]
output = IO.popen(args.map {|a| Shellwords.shellescape(a) }.join(' '), 'r') { |p| p.read }
if $?.exitstatus == 0
return validate_utf8(output)
else
raise "Failed to run: #{args.inspect}"
end
end
class Symbol
def to_proc() lambda { |o| o.send(self) } end
end
class String
def darcs_unescape
# darcs uses '[_\hh_]' to quote non-ascii characters where 'h' is
# a hexadecimal. We translate this to '=hh' and use ruby's unpack
# to do replace this with the proper byte.
gsub(/\[\_\\(..)\_\]/) { |x| "=#{$1}".unpack("M*")[0] }
end
end
# -------------------------------------------------------------------------------
# Map darcs authors to git authors
# -------------------------------------------------------------------------------
class AuthorMap < Hash
attr_accessor :default_email
attr_accessor :default_author
def self.load(filename)
new.merge(YAML.load_file(filename))
end
# gives the name and email
def [](author)
name_and_email(super || author)
end
private
def name_and_email(author)
case author
when /^\s*(\S.*?)\s*\<(\S+@\S+?)\>\s*$/
[$1, $2]
when /^\s*\<?(\S+@\S+?)\>?\s*$/
email = $1
[email.split('@').first, email]
when ''
[default_author, default_email]
else
[author, default_email]
end
end
end
# -------------------------------------------------------------------------------
# Storing a history of related darcs and git commits
# -------------------------------------------------------------------------------
class CommitHistory
def initialize(patch_file_name)
@patch_file_name = patch_file_name
@darcs_patches_in_git = {}
if File.exists?(patch_file_name)
@darcs_patches_in_git = YAML.load_file(patch_file_name)
unless @darcs_patches_in_git.is_a?(Hash)
raise "yaml hash not found in #{patch_file_name}"
end
else
# TODO: consider doing this unconditionally, since that
# might allow merging between repositories created with darcs-to-git
fill_from_darcs_hash_comments
end
end
def record_git_commit(commit_id, identifier)
# using one file per darcs patch would be an incredible waste of space
# on my system one file takes up 4K even if only a few bytes are in it
# hence we just use a simple YAML hash
@darcs_patches_in_git[identifier] = commit_id
File.open(@patch_file_name, 'w') do |f|
YAML.dump(@darcs_patches_in_git, f)
end
end
def find_git_commit(is_tag, git_tag_name, identifier)
if is_tag
(output_of("git", "tag", "-l") rescue "").split(/\r?\n/).include?(git_tag_name) &&
output_of("git", "rev-list", "--max-count=1", "tags/#{git_tag_name}").strip
else
@darcs_patches_in_git[identifier];
end
end
def empty_repo?
!system("git rev-parse --verify HEAD >/dev/null 2>&1")
end
private
def fill_from_darcs_hash_comments
return if empty_repo?
Array(output_of("git", "log", "--grep=darcs-hash:", "--no-color").split(/^commit /m)[1..-1]).each do |entry|
commit_id, identifier = entry.scan(/^([a-z0-9]+$).*darcs-hash:(.*?)$/sm).flatten
record_git_commit(commit_id, identifier)
end
end
end
# -------------------------------------------------------------------------------
# Reading darcs patches and applying them to a git repo
# -------------------------------------------------------------------------------
class DarcsPatch
attr_accessor :source_repo, :author, :date, :inverted, :identifier, :name, :is_tag, :git_tag_name, :comment
attr_reader :git_author_name, :git_author_email
def initialize(source_repo, patch_xml)
self.source_repo = source_repo
self.author = patch_xml.attribute('author').value.darcs_unescape
self.date = darcs_date_to_git_date(patch_xml.attribute('date').value,
patch_xml.attribute('local_date').value)
self.inverted = (patch_xml.attribute('inverted').to_s == 'True')
self.identifier = patch_xml.attribute('hash').to_s
self.name = patch_xml.get_elements('name').first.get_text.value.darcs_unescape rescue 'Unnamed patch'
self.comment = patch_xml.get_elements('comment').first.get_text.value.darcs_unescape rescue nil
if (self.is_tag = (self.name =~ /^TAG (.*)/))
self.git_tag_name = $1.gsub(/[\s:.*]+/, '_')
end
@git_author_name, @git_author_email = AUTHOR_MAP[author]
end
def <=>(other)
self.identifier <=> other.identifier
end
def git_commit_message
patch_name = ((inverted ? "UNDO: #{name}" : name) unless name =~ /^\[\w+ @ \d+\]/)
OPTIONS[:clean_commit_messages] ? [ patch_name, comment && comment.gsub(/^Ignore-this:.*$/, '')].compact.join("\n") \
: [ patch_name, comment, "darcs-hash:#{identifier}" ].compact.join("\n\n")
end
def self.read_from_repo(repo)
REXML::Document.new(output_of("darcs", "changes", "--reverse",
"--repo=#{repo}", "--xml",
"--no-summary")).
get_elements('changelog/patch').map do |p|
DarcsPatch.new(repo, p)
end
end
# Return committish for corresponding patch in current git repo, or false/nil
def id_in_git_repo
COMMIT_HISTORY.find_git_commit(is_tag, git_tag_name, identifier)
end
def pull_and_apply
puts "\n" + ("=" * 80)
puts "PATCH : #{name}" ## "1 of 43"
puts "DATE : #{date}"
puts "AUTHOR: #{author} => #{git_author_name} <#{git_author_email}>"
puts "-" * 80
pull
puts output_of("git", "status", "--short") unless OPTIONS[:quiet]
commit_to_git_repo
end
private
def pull
if OPTIONS[:do_checks] and not darcs_reports_clean_repo?
raise "Darcs reports dirty repo before pulling #{identifier}; confused, so aborting"
end
run("darcs", "pull", "--all", "--quiet",
"--match", "hash #{identifier}",
"--set-scripts-executable", "--set-default", source_repo)
if OPTIONS[:do_checks] and not darcs_reports_clean_repo?
puts "Darcs reports dirty directory: assuming conflict that is fixed by a later patch... reverting"
run("darcs revert --all")
run("find . -name '*-darcs-backup0' -exec rm -f {} \\;") # darcs2 creates these
unless darcs_reports_clean_repo?
system("darcs whatsnew -sl")
raise "Failed to clean repo, see above"
end
end
end
def darcs_reports_clean_repo?
`darcs whatsnew -sl | egrep -v '^a (\./)?\.git(/|$)' | egrep -v '^a ./darcs_testing_for_nfs/$'` =~ /^(No changes!)?$/
end
def commit_to_git_repo
ENV['GIT_AUTHOR_NAME'] = ENV['GIT_COMMITTER_NAME'] = git_author_name
ENV['GIT_AUTHOR_EMAIL'] = ENV['GIT_COMMITTER_EMAIL'] = git_author_email
ENV['GIT_AUTHOR_DATE'] = ENV['GIT_COMMITTER_DATE'] = date
if is_tag
run("git", "tag", "-a", "-f", "-m", git_commit_message, git_tag_name)
else
new_files, changed_files = git_ls_files
if new_files.any?
run(*(["git", "add", "-f"] + new_files))
end
if changed_files.any? || new_files.any?
output_of("git", "commit", "-a", "-m", git_commit_message)
end
# get full id of last commit and associate it with the patch id
commit_id = output_of("git", "log", "-n1", "--no-color").scan(/^commit ([a-z0-9]+$)/).flatten.first
COMMIT_HISTORY.record_git_commit(commit_id, identifier)
end
end
def darcs_date_to_git_date(utc,local)
# Calculates a git-friendly date (e.g., timezone CET decribed as
# +0100) by using the two date fields that darcs gives us: a list
# of numbers describing the UTC time and a local time formatted in
# a human-readable format. We could parse the local time and
# derive the timezone offset from the timezone name. but timezones
# aren't well-defined, so we ignore the timezone name and instead
# calculate the timezone offset ourselves by calculating the
# difference between local time and UTC time.
if not utc =~ /^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/
raise "Wrong darcs date format"
end
utc_time = Time.utc($1,$2,$3,$4,$5,$6)
# example: Mon Oct 2 14:23:28 CEST 2006
# everything except timezone name is fixed-length, if parsing
# fails we just use UTC
pat = /^\w\w\w (\w\w\w) ([ 1-9]\d) ([ 0-9]\d)\:(\d\d)\:(\d\d) \w* (\d\d\d\d)/
local_time = if pat =~ local
Time.utc($6,$1,$2,$3,$4,$5)
else
utc_time
end
offs = local_time - utc_time # time offset in seconds
t = local_time
# formats the above example as: 2006-10-02 14:23:28 +0200
s = sprintf("%4d-%02d-%02d %02d:%02d:%02d %s%02d%02d",
t.year, t.month, t.day,
t.hour, t.min, t.sec,
offs < 0 ? "-" : "+", offs.abs/3600, offs.abs.modulo(3600)/60 )
end
def git_ls_files
summary = output_of(*["git", "ls-files", "-t", "-o", "-m", "-d", "-z", "-X", ".git/info/exclude"])
new_files, changed_files = [%w(?), %w(R C)].map do |wanted|
summary.scan(/(.?) (.*?)\0/m).map do |code, name|
name if wanted.include?(code)
end.compact
end
end
end
def extract_authors(patches)
unique_authors = {}
patches.each do |p|
unique_authors[p.author] =
"#{p.git_author_name}" + (p.git_author_email.nil? ? "" : " <#{p.git_author_email}>")
end
puts "# You can use the following output as a starting point for an author_map"
puts "# Just fill in the proper text after the colon; put email addresses in"
puts "# angle brackets. You can remove any lines that look OK to you."
# TODO: Can we make the output sorted?
puts YAML::dump( unique_authors )
end
# -------------------------------------------------------------------------------
# Pre-flight checks
# -------------------------------------------------------------------------------
DARCS_VERSION = output_of(*%w(darcs -v)).scan(/(\d+)\.(\d+)(?:\.(\d+))?/).flatten.map {|v| v.to_i}
def darcs2_repo?(repo)
if File.exist?(repo) # Local repo
begin
output_of("darcs", "show", "repo", "--repo=#{repo}") =~ /Format:.*darcs-2/
rescue # darcs1 does not have a "show" command, so we get an exception
false
end
else
format_file = File.read(repo.scan(/^(.*?)\/?$/).flatten.first + "/_darcs/format") rescue nil
return format_file && !(format_file =~ /darcs-1\./)
end
end
class Array; include Comparable; end
unless DARCS_VERSION > [1, 0, 7]
STDERR.write("WARNING: your darcs appears to be old, and may not work with this script\n")
end
# -------------------------------------------------------------------------------
# Initialise the working area
# -------------------------------------------------------------------------------
ENV['GIT_PAGER'] = ENV['PAGER'] = "cat" # so that pager of git-log doesn't halt conversion
unless File.directory?("_darcs")
puts "Initialising the working area."
unless Dir.entries(Dir.pwd).size == 2
raise "Directory not empty. Aborting"
end
darcs_init = %w(darcs init)
if darcs2_repo?(SRCREPO)
darcs_init << "--darcs-2"
elsif DARCS_VERSION >= [2, 0, 0]
puts "Using legacy darcs inventory format to match upstream repo"
darcs_init << "--old-fashioned-inventory"
end
run(*darcs_init)
run("git", "init")
FileUtils.mkdir_p(".git/info")
File.open(".git/info/exclude", "a+") { |f| f.write("_darcs\n.DS_Store\n") }
# Patterns to exclude
git_borings = [] << '(^|/)\.git($|/)' << '(^|/)\.DS_Store$'
existing_borings = []
# Check existing global boring patterns
global_darcs_dir = Pathname.new("#{ENV['HOME']}/.darcs")
global_boring_file = global_darcs_dir + 'boring'
if global_boring_file.exist?
existing_borings = File.open(global_boring_file, 'r') {|f| f.readlines}.map {|l| l.chomp }
else
global_darcs_dir.mkdir unless global_darcs_dir.directory?
end
# Add boring patterns to global boring file
File.open(global_boring_file, 'a') do |f|
(git_borings - existing_borings).each {|b| f.puts b }
end
# TODO: migrate darcs borings into git excludes?
end
COMMIT_HISTORY = CommitHistory.new(GIT_PATCHES)
AUTHOR_MAP = if OPTIONS[:author_map]
AuthorMap.load(OPTIONS[:author_map])
elsif File.exists?(DEFAULT_AUTHOR_MAP_FILE)
AuthorMap.load(DEFAULT_AUTHOR_MAP_FILE)
else
AuthorMap.new
end
AUTHOR_MAP.default_author = OPTIONS[:default_author]
AUTHOR_MAP.default_email = OPTIONS[:default_email]
patches = DarcsPatch.read_from_repo(SRCREPO)
if OPTIONS[:list_authors]
extract_authors(patches)
exit(0)
end
if COMMIT_HISTORY.empty_repo?
patches_available = patches
else
patches_available = patches.find_all { |p| not p.id_in_git_repo }
end
patches_to_pull = if OPTIONS[:num_patches]
patches_available.first(OPTIONS[:num_patches])
else
patches_available
end
original_stdout = $stdout
if OPTIONS[:quiet]
# Capture all writes to stdout.
$stdout = StringIO.new
end
patches_to_pull.each_with_index { |p, i|
original_stdout.puts "\nImporting patch #{i+1} of #{patches_to_pull.size}:"
p.pull_and_apply
}
$stdout = original_stdout
pulled = patches_to_pull.size
if pulled == 0
puts "\nNothing to pull."
else
puts "\nPulled #{pulled} patch#{"es" unless pulled == 1}."
puts "\nDarcs import successful! You may now want to run `git gc' to
optimize space usage of the git repo"
end
# -------------------------------------------------------------------------------
# Post-flight checks
# -------------------------------------------------------------------------------
# if we didn't pull all patches, then the consistency check would
# fail, so we simply skip it
if patches_to_pull.size == patches_available.size
if File.exist?(SRCREPO)
puts "Comparing final state with source repo..."
system("diff", "-ur", "-x", "_darcs", "-x", ".git", ".", SRCREPO)
if $? != 0
abort <<-end_msg
!!! There were differences! See diff above for details.
!!! It may be that the source repository was dirty.
!!! Run "cd #{SRCREPO} && darcs whatsnew -sl" to check.
end_msg
else
puts "Contents match."
end
else
puts "Warning: source repo is not local, so we can't compare final git state with latest darcs contents."
end
end
Jump to Line
Something went wrong with that request. Please try again.