diff --git a/.gitignore b/.gitignore index 51c61766..3cfd2c6f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .DS_Store .gradle/ .idea/ +.sourcerer build/ sourcerer-app.iml sourcerer-app.ipr @@ -20,4 +21,3 @@ app.iws /ultimate/ideaSDK /ultimate/out /ultimate/tmp -src/main/resources/data/models/ diff --git a/src/main/kotlin/app/FactCodes.kt b/src/main/kotlin/app/FactCodes.kt index ccc5834a..572e1c0e 100644 --- a/src/main/kotlin/app/FactCodes.kt +++ b/src/main/kotlin/app/FactCodes.kt @@ -1,11 +1,20 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Anatoly Kislov (anatoly@sourcerer.io) + package app object FactCodes { - val COMMITS_DAY_WEEK = 1 - val COMMITS_DAY_TIME = 2 - val LINE_LONGEVITY = 3 - val LINE_LONGEVITY_REPO = 4 - val REPO_DATE_START = 5 - val REPO_DATE_END = 6 - val REPO_TEAM_SIZE = 7 + val COMMIT_DAY_WEEK = 1 // Day of week fun fact and graph. + val COMMIT_DAY_TIME = 2 // Day time fun fact. + val COMMIT_LINE_NUM_AVG = 8 // Average number of lines per commit fun fact. + val COMMIT_NUM = 9 // Used for averaging COMMIT_LINE_NUM_AVG between repos. + // A map of line numbers to commits number. Used in a commit histogram. + val COMMIT_NUM_TO_LINE_NUM = 12 + val LINE_LONGEVITY = 3 // Used for longevity graph. + val LINE_LONGEVITY_REPO = 4 // Used for longevity graph. + val LINE_LEN_AVG = 10 // Average length of line fun fact. + val LINE_NUM = 11 // Used for averaging LINE_LEN_AVG between repos. + val REPO_DATE_START = 5 // Repo summary info. Date of first contribution. + val REPO_DATE_END = 6 // Repo summary info. Date of last contribution. + val REPO_TEAM_SIZE = 7 // Repo summary info. Number of contributors. } diff --git a/src/main/kotlin/app/hashers/CommitCrawler.kt b/src/main/kotlin/app/hashers/CommitCrawler.kt index 6b0bb1d3..861cf728 100644 --- a/src/main/kotlin/app/hashers/CommitCrawler.kt +++ b/src/main/kotlin/app/hashers/CommitCrawler.kt @@ -48,6 +48,15 @@ object CommitCrawler { } new.diffs = getDiffFiles(git, new, old) Logger.debug { "Diff: ${new.diffs.size} entries" } + // Count lines on all non-binary files. This is additional + // statistics to CommitStats because not all file extensions + // may be supported. + new.numLinesAdded = new.diffs.fold(0) { total, file -> + total + file.getAllAdded().size + } + new.numLinesDeleted = new.diffs.fold(0) { total, file -> + total + file.getAllDeleted().size + } new.repo = repo new } diff --git a/src/main/kotlin/app/hashers/CommitHasher.kt b/src/main/kotlin/app/hashers/CommitHasher.kt index 4776b9dd..dd40535b 100644 --- a/src/main/kotlin/app/hashers/CommitHasher.kt +++ b/src/main/kotlin/app/hashers/CommitHasher.kt @@ -51,15 +51,6 @@ class CommitHasher(private val serverRepo: Repo = Repo(), Logger.info { "Stats: ${commit.stats.size} entries" } Logger.debug { commit.stats.toString() } - // Count lines on all non-binary files. This is additional - // statistics to CommitStats because not all file extensions - // may be supported. - commit.numLinesAdded = commit.diffs.fold(0) { total, file -> - total + file.getAllAdded().size - } - commit.numLinesDeleted = commit.diffs.fold(0) { total, file -> - total + file.getAllDeleted().size - } commit } .buffer(20, TimeUnit.SECONDS) // Group ready commits by time. diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 19f44243..4d9a3c58 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -19,12 +19,17 @@ import java.time.ZoneOffset */ class FactHasher(private val serverRepo: Repo = Repo(), private val api: Api, + private val rehashes: List, private val emails: HashSet) { private val fsDayWeek = hashMapOf>() private val fsDayTime = hashMapOf>() private val fsRepoDateStart = hashMapOf() private val fsRepoDateEnd = hashMapOf() - private val fsRepoTeamSize = hashSetOf() + private val fsCommitLineNumAvg = hashMapOf() + private val fsCommitNum = hashMapOf() + private val fsLineLenAvg = hashMapOf() + private val fsLineNum = hashMapOf() + private val fsLinesPerCommits = hashMapOf>() init { for (author in emails) { @@ -32,6 +37,12 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsDayTime.put(author, Array(24) { 0 }) fsRepoDateStart.put(author, -1) fsRepoDateEnd.put(author, -1) + fsCommitLineNumAvg.put(author, 0.0) + fsCommitNum.put(author, 0) + fsLineLenAvg.put(author, 0.0) + fsLineNum.put(author, 0) + // TODO(anatoly): Do the bin computations on the go. + fsLinesPerCommits.put(author, Array(rehashes.size) {0}) } } @@ -39,36 +50,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), onError: (Throwable) -> Unit) { observable .filter { commit -> emails.contains(commit.author.email) } - .subscribe({ commit -> // OnNext. - // Calculate facts. - val email = commit.author.email - val timestamp = commit.dateTimestamp - val dateTime = LocalDateTime.ofEpochSecond(timestamp, 0, - ZoneOffset.ofTotalSeconds(commit.dateTimeZoneOffset * 60)) - - // DayWeek. - val factDayWeek = fsDayWeek[email] ?: Array(7) { 0 } - // The value is numbered from 1 (Monday) to 7 (Sunday). - factDayWeek[dateTime.dayOfWeek.value - 1] += 1 - fsDayWeek[email] = factDayWeek - - // DayTime. - val factDayTime = fsDayTime[email] ?: Array(24) { 0 } - // Hour from 0 to 23. - factDayTime[dateTime.hour] += 1 - fsDayTime[email] = factDayTime - - // RepoDateStart. - fsRepoDateStart[email] = timestamp - - // RepoDateEnd. - if ((fsRepoDateEnd[email] ?: -1) == -1L) { - fsRepoDateEnd[email] = timestamp - } - - // RepoTeamSize. - fsRepoTeamSize.add(email) - }, onError, { // OnComplete. + .subscribe(onNext, onError, { // OnComplete. try { postFactsToServer(createFacts()) } catch (e: Throwable) { @@ -77,25 +59,79 @@ class FactHasher(private val serverRepo: Repo = Repo(), }) } + private val onNext: (Commit) -> Unit = { commit -> + // Calculate facts. + val email = commit.author.email + val timestamp = commit.dateTimestamp + val dateTime = LocalDateTime.ofEpochSecond(timestamp, 0, + ZoneOffset.ofTotalSeconds(commit.dateTimeZoneOffset * 60)) + + // DayWeek. + val factDayWeek = fsDayWeek[email] ?: Array(7) { 0 } + // The value is numbered from 1 (Monday) to 7 (Sunday). + factDayWeek[dateTime.dayOfWeek.value - 1] += 1 + fsDayWeek[email] = factDayWeek + + // DayTime. + val factDayTime = fsDayTime[email] ?: Array(24) { 0 } + // Hour from 0 to 23. + factDayTime[dateTime.hour] += 1 + fsDayTime[email] = factDayTime + + // RepoDateStart. + fsRepoDateStart[email] = timestamp + + // RepoDateEnd. + if ((fsRepoDateEnd[email] ?: -1) == -1L) { + fsRepoDateEnd[email] = timestamp + } + + // Commits. + val numCommits = fsCommitNum[email]!! + 1 + val numLinesCurrent = commit.numLinesAdded + commit.numLinesDeleted + + fsCommitNum[email] = numCommits + fsCommitLineNumAvg[email] = calcIncAvg(fsCommitLineNumAvg[email]!!, + numLinesCurrent.toDouble(), numCommits.toLong()) + + val lines = commit.getAllAdded() + commit.getAllDeleted() + lines.forEachIndexed { index, line -> + fsLineLenAvg[email] = calcIncAvg(fsLineLenAvg[email]!!, + line.length.toDouble(), fsLineNum[email]!! + index + 1) + } + fsLineNum[email] = fsLineNum[email]!! + lines.size + + fsLinesPerCommits[email]!![numCommits - 1] += lines.size + } + private fun createFacts(): List { val fs = mutableListOf() emails.forEach { email -> val author = Author(email = email) fsDayTime[email]?.forEachIndexed { hour, count -> if (count > 0) { - fs.add(Fact(serverRepo, FactCodes.COMMITS_DAY_TIME, hour, + fs.add(Fact(serverRepo, FactCodes.COMMIT_DAY_TIME, hour, count.toString(), author)) }} fsDayWeek[email]?.forEachIndexed { day, count -> if (count > 0) { - fs.add(Fact(serverRepo, FactCodes.COMMITS_DAY_WEEK, day, + fs.add(Fact(serverRepo, FactCodes.COMMIT_DAY_WEEK, day, count.toString(), author)) }} fs.add(Fact(serverRepo, FactCodes.REPO_DATE_START, 0, fsRepoDateStart[email].toString(), author)) fs.add(Fact(serverRepo, FactCodes.REPO_DATE_END, 0, fsRepoDateEnd[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.COMMIT_NUM, 0, + fsCommitNum[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.COMMIT_LINE_NUM_AVG, 0, + fsCommitLineNumAvg[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.LINE_NUM, 0, + fsLineNum[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.LINE_LEN_AVG, 0, + fsLineLenAvg[email].toString(), author)) + addCommitsPerLinesFacts(fs, fsLinesPerCommits[email]!!, author) } fs.add(Fact(serverRepo, FactCodes.REPO_TEAM_SIZE, 0, - fsRepoTeamSize.size.toString())) + emails.size.toString())) return fs } @@ -105,4 +141,55 @@ class FactHasher(private val serverRepo: Repo = Repo(), Logger.info { "Sent ${facts.size} facts to server" } } } + + /** + * Computes the average of a numerical sequence. + * Calculated numbers is never bigger than maximum element of sequence. + * No overflow due to summing of elements. + * @param prev previous value of average + * @param element new element of sequence + * @param count number of element in sequence + * @return new value of average with considering of new element + */ + private fun calcIncAvg(prev: Double, element: Double, + count: Long): Double { + return prev * (1 - 1.0 / count) + element / count + } + + private fun addCommitsPerLinesFacts(fs: MutableList, + linesPerCommits: Array, + author: Author) { + var max = linesPerCommits[0] + var min = linesPerCommits[0] + for (lines in linesPerCommits) { + if (lines > max) { + max = lines + } + if (lines < min) { + min = lines + } + } + + val numBins = Math.min(10, max - min + 1) + val binSize = (max - min + 1) / numBins.toDouble() + val bins = Array(numBins) { 0 } + for (numLines in linesPerCommits) { + if (numLines == 0) { + continue + } + + val binId = Math.floor((numLines - min) / binSize).toInt() + bins[binId]++ + } + + for ((binId, numCommits) in bins.withIndex()) { + if (numCommits == 0) { + continue + } + + val numLines = Math.floor(min + binId * binSize).toInt() + fs.add(Fact(serverRepo, FactCodes.COMMIT_NUM_TO_LINE_NUM, + numLines, numCommits.toString(), author)) + } + } } diff --git a/src/main/kotlin/app/hashers/RepoHasher.kt b/src/main/kotlin/app/hashers/RepoHasher.kt index 52712fb4..7197e297 100644 --- a/src/main/kotlin/app/hashers/RepoHasher.kt +++ b/src/main/kotlin/app/hashers/RepoHasher.kt @@ -67,7 +67,7 @@ class RepoHasher(private val localRepo: LocalRepo, private val api: Api, .publish() CommitHasher(serverRepo, api, rehashes, filteredEmails) .updateFromObservable(observable, onError) - FactHasher(serverRepo, api, filteredEmails) + FactHasher(serverRepo, api, rehashes, filteredEmails) .updateFromObservable(observable, onError) // Start and synchronously wait until all subscribers complete. diff --git a/src/main/kotlin/app/model/Commit.kt b/src/main/kotlin/app/model/Commit.kt index 9810a77a..32835788 100644 --- a/src/main/kotlin/app/model/Commit.kt +++ b/src/main/kotlin/app/model/Commit.kt @@ -86,4 +86,12 @@ data class Commit( override fun hashCode(): Int { return rehash.hashCode() } + + fun getAllAdded(): List { + return diffs.map { it.getAllAdded() }.flatten() + } + + fun getAllDeleted(): List { + return diffs.map { it.getAllDeleted() }.flatten() + } } diff --git a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt index 3a39b6c2..33f51e42 100644 --- a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt @@ -16,6 +16,7 @@ import org.jetbrains.spek.api.dsl.it import test.utils.TestRepo import java.util.* import kotlin.test.assertEquals +import kotlin.test.assertNotNull import kotlin.test.assertTrue class FactHasherTest : Spek({ @@ -35,6 +36,27 @@ class FactHasherTest : Spek({ return cal.time } + fun getFact(code: Int, key: Int, author: Author, facts: List): Fact { + val fact = facts.find { fact -> + fact.code == code && fact.key == key && fact.author == author + } + assertNotNull(fact) + return fact!! + } + + fun assertFactInt(code: Int, key: Int, value: Int, author: Author, + facts: List) { + val fact = getFact(code, key, author, facts) + assertEquals(value, fact.value.toInt()) + } + + fun assertFactDouble(code: Int, key: Int, value: Double, author: Author, + facts: List) { + val fact = getFact(code, key, author, facts) + assertTrue(Math.abs(value - fact.value.toDouble()) < 0.1, + "Expected approximately <$value>, actual <${fact.value}>") + } + given("commits for date facts") { val testRepo = TestRepo(repoPath + "date-facts") val emails = hashSetOf(authorEmail1, authorEmail2) @@ -54,13 +76,13 @@ class FactHasherTest : Spek({ val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, listOf("r1"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_TIME, 13, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_TIME, 13, "1", author1))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_WEEK, 6, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_WEEK, 6, "1", author1))) } @@ -79,17 +101,17 @@ class FactHasherTest : Spek({ val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, listOf("r1", "r2"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_TIME, 18, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_TIME, 18, "1", author2))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_WEEK, 0, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_WEEK, 0, "1", author2))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_TIME, 13, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_TIME, 13, "2", author1))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_WEEK, 0, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_WEEK, 0, "1", author1))) } @@ -154,9 +176,11 @@ class FactHasherTest : Spek({ val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, + listOf("r1", "r2", "r3", "r4", "r5", "r6"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) + assertEquals(0, errors.size) assertTrue(facts.contains(Fact(repo, FactCodes.REPO_DATE_START, 0, (startAuthor1.time/1000).toString(), author1))) assertTrue(facts.contains(Fact(repo, FactCodes.REPO_DATE_START, 0, @@ -170,6 +194,91 @@ class FactHasherTest : Spek({ } + afterGroup { + testRepo.destroy() + } + } + + given("test of commit facts") { + val testRepo = TestRepo(repoPath + "commit-facts") + val emails = hashSetOf(authorEmail1, authorEmail2) + val mockApi = MockApi(mockRepo = repo) + val facts = mockApi.receivedFacts + val lines = listOf( + "All my rap, if shortly, is about the thing that", + "For so many years so many cities have been under hoof", + "To go uphill when gets lucky. Then downhill when feels sick", + "I'm not really a Gulliver, but still the city is under hoof", + "City under hoof, city under hoof", + "Traffic lights, state duties, charges and customs", + "I don't know whether this path is wade or to the bottom", + "You live under a thumb, I have a city under my hoof", + "All my rap, if shortly, is about the thing that", + "For so many years so many cities have been under hoof", + "To go uphill when gets lucky. Then downhill when feels sick", + "I'm not really a Gulliver, but still the city is under hoof", + "City under hoof, city under hoof", + "Traffic lights, state duties, charges and customs", + "I don't know whether this path is wade or to the bottom", + "You live under a thumb, I have a city under my hoof", + "All my rap, if shortly, is about the thing that", + "For so many years so many cities have been under hoof", + "To go uphill when gets lucky. Then downhill when feels sick", + "I'm not really a Gulliver, but still the city is under hoof", + "City under hoof, city under hoof", + "Traffic lights, state duties, charges and customs", + "I don't know whether this path is wade or to the bottom", + "You live under a thumb, I have a city under my hoof" + ) + val linesLenAvg = lines.fold (0) { acc, s -> acc + s.length } / + lines.size.toDouble() + + afterEachTest { + facts.clear() + } + + it("sends facts") { + testRepo.createFile("test1.txt", listOf()) + + testRepo.insertLines("test1.txt", 0, lines.subList(0, 3)) + testRepo.commit(message = "Commit 1", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(3, 6)) + testRepo.commit(message = "Commit 2", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(6, 9)) + testRepo.commit(message = "Commit 3", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(9, 12)) + testRepo.commit(message = "Commit 4", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(12, 16)) + testRepo.commit(message = "Commit 5", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(16, 24)) + testRepo.commit(message = "Commit 6", author = author1) + + val errors = mutableListOf() + val observable = CommitCrawler.getObservable(testRepo.git, repo) + FactHasher(repo, mockApi, + listOf("r1", "r2", "r3", "r4", "r5", "r6"), emails) + .updateFromObservable(observable, { e -> errors.add(e) }) + + assertEquals(0, errors.size) + assertFactInt(FactCodes.COMMIT_NUM, 0, 6, author1, facts) + assertFactDouble(FactCodes.COMMIT_LINE_NUM_AVG, 0, 4.0, author1, + facts) + assertFactInt(FactCodes.LINE_NUM, 0, 24, author1, facts) + assertFactDouble(FactCodes.LINE_LEN_AVG, 0, linesLenAvg, author1, + facts) + assertFactInt(FactCodes.COMMIT_NUM_TO_LINE_NUM, 3, 4, author1, + facts) + assertFactInt(FactCodes.COMMIT_NUM_TO_LINE_NUM, 4, 1, author1, + facts) + assertFactInt(FactCodes.COMMIT_NUM_TO_LINE_NUM, 8, 1, author1, + facts) + } + afterGroup { testRepo.destroy() } diff --git a/src/test/kotlin/test/utils/TestRepo.kt b/src/test/kotlin/test/utils/TestRepo.kt index 448fddcd..d5d123df 100644 --- a/src/test/kotlin/test/utils/TestRepo.kt +++ b/src/test/kotlin/test/utils/TestRepo.kt @@ -24,7 +24,7 @@ class TestRepo(val repoPath: String) { val userName = "Contributor" val userEmail = "test@sourcerer.com" - val git = Git.init().setDirectory(File(repoPath)).call() + val git = initGit() init { val config = git.repository.config @@ -33,6 +33,11 @@ class TestRepo(val repoPath: String) { config.save() } + private fun initGit(): Git { + destroy() // Remove repo directory if exists. + return Git.init().setDirectory(File(repoPath)).call() + } + fun createFile(fileName: String, content: List) { val file = File("$repoPath/$fileName") val writer = BufferedWriter(FileWriter(file))