From 7800c8aa26ef19ebfaf41ea3147d03e70b82975f Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Mon, 11 Sep 2017 16:19:33 +0300 Subject: [PATCH 1/5] chore: remove excess constants from extractors, add detailed comment about timestamp to proto --- src/main/kotlin/app/extractors/JavaExtractor.kt | 3 +-- src/main/kotlin/app/extractors/ObjectiveCExtractor.kt | 2 +- src/main/proto/sourcerer.proto | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/app/extractors/JavaExtractor.kt b/src/main/kotlin/app/extractors/JavaExtractor.kt index 6fb4bf25..051dc68e 100644 --- a/src/main/kotlin/app/extractors/JavaExtractor.kt +++ b/src/main/kotlin/app/extractors/JavaExtractor.kt @@ -13,7 +13,6 @@ class JavaExtractor : ExtractorInterface { val LANGUAGE_NAME = "java" val FILE_EXTS = listOf("java") } - val NAME = "Java" val KEYWORDS = listOf("abstract", "continue", "for", "new", "switch", "assert", "default", "goto", "package", "synchronized", "boolean", @@ -49,7 +48,7 @@ class JavaExtractor : ExtractorInterface { numLinesAdded = totalAdded, numLinesDeleted = totalDeleted, type = Extractor.TYPE_KEYWORD, - tech = NAME + Extractor.SEPARATOR + keyword)) + tech = LANGUAGE_NAME + Extractor.SEPARATOR + keyword)) } } diff --git a/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt b/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt index eecd0c32..5f16b09e 100644 --- a/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt +++ b/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt @@ -10,7 +10,7 @@ import app.model.DiffFile class ObjectiveCExtractor : ExtractorInterface { companion object { val LANGUAGE_NAME = "objectivec" - val FILE_EXTS = listOf("h", "m", "mm") + val FILE_EXTS = listOf("m", "mm") } override fun extract(files: List): List { diff --git a/src/main/proto/sourcerer.proto b/src/main/proto/sourcerer.proto index 26a38de3..e3e42b5e 100644 --- a/src/main/proto/sourcerer.proto +++ b/src/main/proto/sourcerer.proto @@ -23,7 +23,7 @@ message Commit { string author_name = 4; string author_email = 5; - // Timestamp of a commit creation. + // Timestamp of a commit creation in seconds UTC. uint32 date = 6; // Is quality commit. From bdd3c9664f9cdef5066575469d92481f2f77e988 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Mon, 11 Sep 2017 17:41:09 +0300 Subject: [PATCH 2/5] wip: filter binary files hashing, add number of lines per commit stats --- src/main/kotlin/app/hashers/CommitHasher.kt | 27 ++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/main/kotlin/app/hashers/CommitHasher.kt b/src/main/kotlin/app/hashers/CommitHasher.kt index 71bba8c7..e88d9329 100644 --- a/src/main/kotlin/app/hashers/CommitHasher.kt +++ b/src/main/kotlin/app/hashers/CommitHasher.kt @@ -23,6 +23,7 @@ import org.eclipse.jgit.lib.Repository import org.eclipse.jgit.revwalk.RevWalk import java.nio.charset.Charset import org.eclipse.jgit.diff.DiffFormatter +import org.eclipse.jgit.diff.RawText import org.eclipse.jgit.lib.ObjectId import org.eclipse.jgit.errors.MissingObjectException import org.eclipse.jgit.revwalk.RevCommit @@ -58,13 +59,23 @@ class CommitHasher(private val localRepo: LocalRepo, || !knownCommits.contains(new) } .filter { (new, _) -> emailFilter(new) } // Email filtering. .map { (new, old) -> // Mapping and stats extraction. - new.repo = repo - val diffFiles = getDiffFiles(new, old) Logger.debug("Commit: ${new.raw?.name ?: ""}: " + new.raw?.shortMessage) + new.repo = repo + + val diffFiles = getDiffFiles(new, old) Logger.debug("Diff: ${diffFiles.size} entries") new.stats = Extractor().extract(diffFiles) Logger.debug("Stats: ${new.stats.size} entries") + + // Count lines on all non-binary files. This is additional + // statistics to CommitStats because not all file extensions + // may be supported. + new.numLinesAdded = diffFiles.fold(0) { total, file -> + total + file.getAllAdded().size } + new.numLinesDeleted = diffFiles.fold(0) { total, file -> + total + file.getAllDeleted().size } + new } .observeOn(Schedulers.io()) // Different thread for data sending. @@ -81,18 +92,28 @@ class CommitHasher(private val localRepo: LocalRepo, private fun getDiffFiles(commitNew: Commit, commitOld: Commit): List { - // TODO(anatoly): Binary files. val revCommitNew:RevCommit? = commitNew.raw val revCommitOld:RevCommit? = commitOld.raw return DiffFormatter(DisabledOutputStream.INSTANCE).use { formatter -> formatter.setRepository(gitRepo) + formatter.setDetectRenames(true) formatter.scan(revCommitOld?.tree, revCommitNew?.tree) // RENAME change type doesn't change file content. .filter { it.changeType != DiffEntry.ChangeType.RENAME } + // Skip binary files. + .filter { + val id = if (it.changeType == DiffEntry.ChangeType.DELETE) { + it.oldId.toObjectId() + } else { + it.newId.toObjectId() + } + !RawText.isBinary(gitRepo.open(id).openStream()) + } .map { diff -> val new = getContentByObjectId(diff.newId.toObjectId()) val old = getContentByObjectId(diff.oldId.toObjectId()) + val edits = formatter.toFileHeader(diff).toEditList() val path = when (diff.changeType) { DiffEntry.ChangeType.DELETE -> diff.oldPath From 08d7309dca5b917f9c06bcc71ffabc25e2036653 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Mon, 11 Sep 2017 17:42:23 +0300 Subject: [PATCH 3/5] chore: private function, remove excess imports --- src/main/kotlin/app/hashers/CommitHasher.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/kotlin/app/hashers/CommitHasher.kt b/src/main/kotlin/app/hashers/CommitHasher.kt index e88d9329..323698a2 100644 --- a/src/main/kotlin/app/hashers/CommitHasher.kt +++ b/src/main/kotlin/app/hashers/CommitHasher.kt @@ -5,11 +5,9 @@ package app.hashers import app.Logger import app.api.Api -import app.config.Configurator import app.extractors.Extractor import app.model.Commit import app.model.DiffContent -import app.model.DiffEdit import app.model.DiffFile import app.model.DiffRange import app.model.LocalRepo @@ -173,7 +171,7 @@ class CommitHasher(private val localRepo: LocalRepo, repo.emails.contains(email)) } - fun Observable.pairWithNext(): Observable> { + private fun Observable.pairWithNext(): Observable> { return this.map { emit -> Pair(emit, emit) } // Accumulate emits by prev-next pair. .scan { pairAccumulated, pairNext -> From 7d6cec1946c28d89e51f081813e59d530bb6abc7 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Mon, 11 Sep 2017 21:53:16 +0300 Subject: [PATCH 4/5] fix: disable slf4j --- build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/build.gradle b/build.gradle index 62b31d43..865c0621 100644 --- a/build.gradle +++ b/build.gradle @@ -63,6 +63,7 @@ dependencies { compile 'com.github.kittinunf.fuel:fuel-rxjava:1.9.0' compile group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '4.8.0.201706111038-r' + compile "org.slf4j:slf4j-nop:1.7.2" testCompile 'org.jetbrains.kotlin:kotlin-test' testCompile 'org.jetbrains.spek:spek-api:1.1.4' From ee8473ac4504f439fd5cecd3bfe4edbc85f0c593 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Tue, 12 Sep 2017 00:14:45 +0300 Subject: [PATCH 5/5] chore: detailed format --- src/main/proto/sourcerer.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/proto/sourcerer.proto b/src/main/proto/sourcerer.proto index e3e42b5e..cc9c26ec 100644 --- a/src/main/proto/sourcerer.proto +++ b/src/main/proto/sourcerer.proto @@ -23,7 +23,7 @@ message Commit { string author_name = 4; string author_email = 5; - // Timestamp of a commit creation in seconds UTC. + // Timestamp of a commit creation in seconds UTC+0. uint32 date = 6; // Is quality commit.