Skip to content

Commit

Permalink
Support stripping by blob Id on the command line
Browse files Browse the repository at this point in the history
Helps a little with issue #12. People can get a list of blob-ids using
"git rev-list --all --objects", then grep to list all files in
directories they want to nuke, and pass that to the BFG, as noted by
@Fryguy:

#12 (comment)
  • Loading branch information
rtyley committed Mar 19, 2013
1 parent 6f6b84b commit 12d1b00
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 9 deletions.
8 changes: 4 additions & 4 deletions src/main/scala/com/madgag/git/bfg/GitUtil.scala
Expand Up @@ -35,10 +35,6 @@ import com.madgag.git.bfg.cleaner._
import org.eclipse.jgit.api.Git
import org.eclipse.jgit.treewalk.filter.{AndTreeFilter, TreeFilter}

object ObjectId {
def apply(str: String) = org.eclipse.jgit.lib.ObjectId.fromString(str)
}

trait CleaningMapper[V] extends Cleaner[V] {
def isDirty(v: V) = apply(v) != v

Expand Down Expand Up @@ -82,6 +78,10 @@ object GitUtil {
}
}

implicit class RichString(str: String) {
def asObjectId = org.eclipse.jgit.lib.ObjectId.fromString(str)
}

implicit class RichRevTree(revTree: RevTree) {
def walk(postOrderTraversal: Boolean = false)(implicit reader: ObjectReader) = {
val treeWalk = new TreeWalk(reader)
Expand Down
12 changes: 9 additions & 3 deletions src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala
Expand Up @@ -21,7 +21,6 @@
package com.madgag.git.bfg.cli

import java.io.File
import com.madgag.git.bfg.GitUtil._
import com.madgag.git.bfg.cleaner._
import com.madgag.git.bfg.cleaner.TreeBlobsCleaner.Kit
import com.madgag.git.bfg.textmatching.RegexReplacer._
Expand All @@ -34,10 +33,11 @@ import org.eclipse.jgit.storage.file.FileRepository
import protection.ObjectProtection
import scopt.immutable.OptionParser
import scala.Some
import com.madgag.git.bfg.GitUtil.SizedObject
import com.madgag.git.bfg.GitUtil._
import com.madgag.git.bfg.model.{FileName, TreeBlobEntry}
import com.madgag.git.bfg.textmatching.{Glob, Literal, TextMatcher}
import com.madgag.inclusion._
import org.eclipse.jgit.lib.ObjectId


object CLIConfig {
Expand All @@ -49,6 +49,9 @@ object CLIConfig {
intOpt("B", "strip-biggest-blobs", "NUM", "strip the top NUM biggest blobs") {
(v: Int, c: CLIConfig) => c.copy(stripBiggestBlobs = Some(v))
},
opt("bi", "strip-blobs-with-ids", "<blob-ids-file>", "strip blobs with the specified Git object ids") {
(v: String, c: CLIConfig) => c.copy(stripBlobsWithIds = Some(Source.fromFile(v).getLines().map(_.trim).filterNot(_.isEmpty).map(_.asObjectId).toSet))
},
opt("D", "delete-files", "<glob>", "delete files with the specified names (eg '*.class', '*.{txt,log}' - matches on file name, not path within repo)") {
(v: String, c: CLIConfig) => c.copy(deleteFiles = Some(FileMatcher(v)))
},
Expand Down Expand Up @@ -99,6 +102,7 @@ case class CLIConfig(stripBiggestBlobs: Option[Int] = None,
filenameFilters: Seq[Filter[String]] = Nil,
filterSizeThreshold: Int = BlobTextModifier.DefaultSizeThreshold,
replaceBannedStrings: Traversable[String] = List.empty,
stripBlobsWithIds: Option[Set[ObjectId]] = None,
blobCharsetDetector: BlobCharsetDetector = QuickBlobCharsetDetector,
strictObjectChecking: Boolean = false,
sensitiveData: Option[Boolean] = None,
Expand Down Expand Up @@ -138,6 +142,8 @@ case class CLIConfig(stripBiggestBlobs: Option[Int] = None,
}
}

lazy val blobsByIdRemover = stripBlobsWithIds.map(new BlobRemover(_))

lazy val blobRemover = {
implicit val progressMonitor = new TextProgressMonitor()

Expand Down Expand Up @@ -171,7 +177,7 @@ case class CLIConfig(stripBiggestBlobs: Option[Int] = None,

lazy val commitNodeCleaners = Seq(new CommitMessageObjectIdsUpdater(objectIdSubstitutor)) ++ formerCommitFooter

lazy val treeBlobCleaners = Seq(blobRemover, fileDeletion, blobTextModifier).flatten
lazy val treeBlobCleaners = Seq(blobsByIdRemover, blobRemover, fileDeletion, blobTextModifier).flatten

lazy val definesNoWork = treeBlobCleaners.isEmpty

Expand Down
25 changes: 24 additions & 1 deletion src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala
Expand Up @@ -24,9 +24,10 @@ import com.madgag.git.bfg._
import com.madgag.git.bfg.GitUtil._
import scala.collection.convert.wrapAsScala._
import org.specs2.mutable._
import org.eclipse.jgit.lib.Repository
import org.eclipse.jgit.lib.{ObjectId, Repository}
import org.eclipse.jgit.revwalk.RevCommit
import org.specs2.matcher.Matcher
import scalax.file.Path

class MainSpec extends Specification {
"CLI" should {
Expand Down Expand Up @@ -59,7 +60,29 @@ class MainSpec extends Specification {

commitHist must (not(haveFolder("secret-files"))).forall
}

"strip blobs by id" in {
implicit val repo = unpackRepo("/sample-repos/example.git.zip")
implicit val (revWalk, reader) = repo.singleThreadedReaderTuple

def haveCommitWhereObjectIds(boom: Matcher[Traversable[ObjectId]]): Matcher[RevCommit] = boom ^^ {
(c: RevCommit) => c.getTree.walk().map(_.getObjectId(0)).toList
}

val badBlobs = Set(abbrId("db59"),abbrId("86f9"))

val blobIdsFile = Path.createTempFile()
blobIdsFile.writeStrings(badBlobs.map(_.name()),"\n")

commitHist must haveCommitWhereObjectIds(containAllOf(badBlobs.toSeq)).atLeastOnce

run(s"--strip-blobs-with-ids ${blobIdsFile.path}")

commitHist must (not(haveCommitWhereObjectIds(containAnyOf(badBlobs.toSeq)))).forall
}
}



def commitHist(implicit repo: Repository) = repo.git.log.all.call.toSeq.reverse
}
2 changes: 1 addition & 1 deletion src/test/scala/com/madgag/git/bfg/package.scala
Expand Up @@ -52,5 +52,5 @@ package object bfg {
}

def commitThatWasFormerly(id: ObjectId): RevCommit => Boolean =
_.getFooterLines.exists(f => f.getKey == FormerCommitFooter.Key && ObjectId(f.getValue) == id)
_.getFooterLines.exists(f => f.getKey == FormerCommitFooter.Key && f.getValue.asObjectId == id)
}

0 comments on commit 12d1b00

Please sign in to comment.