Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ dependencies {
compile 'com.github.kittinunf.fuel:fuel-rxjava:1.9.0'
compile group: 'org.eclipse.jgit', name: 'org.eclipse.jgit',
version: '4.8.0.201706111038-r'
compile "org.slf4j:slf4j-nop:1.7.2"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure how this exactly works, I trust you though on this one.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JGit use SLF4J API, nop is an empty implementation of this api (stands for no operation).


testCompile 'org.jetbrains.kotlin:kotlin-test'
testCompile 'org.jetbrains.spek:spek-api:1.1.4'
Expand Down
3 changes: 1 addition & 2 deletions src/main/kotlin/app/extractors/JavaExtractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ class JavaExtractor : ExtractorInterface {
val LANGUAGE_NAME = "java"
val FILE_EXTS = listOf("java")
}
val NAME = "Java"

val KEYWORDS = listOf("abstract", "continue", "for", "new", "switch",
"assert", "default", "goto", "package", "synchronized", "boolean",
Expand Down Expand Up @@ -49,7 +48,7 @@ class JavaExtractor : ExtractorInterface {
numLinesAdded = totalAdded,
numLinesDeleted = totalDeleted,
type = Extractor.TYPE_KEYWORD,
tech = NAME + Extractor.SEPARATOR + keyword))
tech = LANGUAGE_NAME + Extractor.SEPARATOR + keyword))
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/kotlin/app/extractors/ObjectiveCExtractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import app.model.DiffFile
class ObjectiveCExtractor : ExtractorInterface {
companion object {
val LANGUAGE_NAME = "objectivec"
val FILE_EXTS = listOf("h", "m", "mm")
val FILE_EXTS = listOf("m", "mm")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as far as I know .h is valid file extension for object c, at least it's in use in Gecko

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but for now we can't distinguish h file from C/C++ or ObjectiveC. This extension will be introduced with ANTLR parsing of files.

}

override fun extract(files: List<DiffFile>): List<CommitStats> {
Expand Down
31 changes: 25 additions & 6 deletions src/main/kotlin/app/hashers/CommitHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ package app.hashers

import app.Logger
import app.api.Api
import app.config.Configurator
import app.extractors.Extractor
import app.model.Commit
import app.model.DiffContent
import app.model.DiffEdit
import app.model.DiffFile
import app.model.DiffRange
import app.model.LocalRepo
Expand All @@ -23,6 +21,7 @@ import org.eclipse.jgit.lib.Repository
import org.eclipse.jgit.revwalk.RevWalk
import java.nio.charset.Charset
import org.eclipse.jgit.diff.DiffFormatter
import org.eclipse.jgit.diff.RawText
import org.eclipse.jgit.lib.ObjectId
import org.eclipse.jgit.errors.MissingObjectException
import org.eclipse.jgit.revwalk.RevCommit
Expand Down Expand Up @@ -58,13 +57,23 @@ class CommitHasher(private val localRepo: LocalRepo,
|| !knownCommits.contains(new) }
.filter { (new, _) -> emailFilter(new) } // Email filtering.
.map { (new, old) -> // Mapping and stats extraction.
new.repo = repo
val diffFiles = getDiffFiles(new, old)
Logger.debug("Commit: ${new.raw?.name ?: ""}: "
+ new.raw?.shortMessage)
new.repo = repo

val diffFiles = getDiffFiles(new, old)
Logger.debug("Diff: ${diffFiles.size} entries")
new.stats = Extractor().extract(diffFiles)
Logger.debug("Stats: ${new.stats.size} entries")

// Count lines on all non-binary files. This is additional
// statistics to CommitStats because not all file extensions
// may be supported.
new.numLinesAdded = diffFiles.fold(0) { total, file ->
total + file.getAllAdded().size }
new.numLinesDeleted = diffFiles.fold(0) { total, file ->
total + file.getAllDeleted().size }

new
}
.observeOn(Schedulers.io()) // Different thread for data sending.
Expand All @@ -81,18 +90,28 @@ class CommitHasher(private val localRepo: LocalRepo,

private fun getDiffFiles(commitNew: Commit,
commitOld: Commit): List<DiffFile> {
// TODO(anatoly): Binary files.
val revCommitNew:RevCommit? = commitNew.raw
val revCommitOld:RevCommit? = commitOld.raw

return DiffFormatter(DisabledOutputStream.INSTANCE).use { formatter ->
formatter.setRepository(gitRepo)
formatter.setDetectRenames(true)
formatter.scan(revCommitOld?.tree, revCommitNew?.tree)
// RENAME change type doesn't change file content.
.filter { it.changeType != DiffEntry.ChangeType.RENAME }
// Skip binary files.
.filter {
val id = if (it.changeType == DiffEntry.ChangeType.DELETE) {
it.oldId.toObjectId()
} else {
it.newId.toObjectId()
}
!RawText.isBinary(gitRepo.open(id).openStream())
}
.map { diff ->
val new = getContentByObjectId(diff.newId.toObjectId())
val old = getContentByObjectId(diff.oldId.toObjectId())

val edits = formatter.toFileHeader(diff).toEditList()
val path = when (diff.changeType) {
DiffEntry.ChangeType.DELETE -> diff.oldPath
Expand Down Expand Up @@ -152,7 +171,7 @@ class CommitHasher(private val localRepo: LocalRepo,
repo.emails.contains(email))
}

fun <T> Observable<T>.pairWithNext(): Observable<Pair<T, T>> {
private fun <T> Observable<T>.pairWithNext(): Observable<Pair<T, T>> {
return this.map { emit -> Pair(emit, emit) }
// Accumulate emits by prev-next pair.
.scan { pairAccumulated, pairNext ->
Expand Down
2 changes: 1 addition & 1 deletion src/main/proto/sourcerer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ message Commit {
string author_name = 4;
string author_email = 5;

// Timestamp of a commit creation.
// Timestamp of a commit creation in seconds UTC+0.
uint32 date = 6;

// Is quality commit.
Expand Down