From c5c6027b5f8055b9a484f9b838431a422f96c8a7 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Thu, 5 Oct 2017 14:26:08 +0300 Subject: [PATCH 1/7] wip: commit facts --- .gitignore | 2 +- src/main/kotlin/app/FactCodes.kt | 10 ++ src/main/kotlin/app/hashers/FactHasher.kt | 99 +++++++++++++------ src/main/kotlin/app/model/Commit.kt | 8 ++ .../test/tests/hashers/FactHasherTest.kt | 54 ++++++++++ src/test/kotlin/test/utils/TestRepo.kt | 7 +- 6 files changed, 148 insertions(+), 32 deletions(-) diff --git a/.gitignore b/.gitignore index 51c61766..3cfd2c6f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .DS_Store .gradle/ .idea/ +.sourcerer build/ sourcerer-app.iml sourcerer-app.ipr @@ -20,4 +21,3 @@ app.iws /ultimate/ideaSDK /ultimate/out /ultimate/tmp -src/main/resources/data/models/ diff --git a/src/main/kotlin/app/FactCodes.kt b/src/main/kotlin/app/FactCodes.kt index ccc5834a..5f01c63b 100644 --- a/src/main/kotlin/app/FactCodes.kt +++ b/src/main/kotlin/app/FactCodes.kt @@ -1,10 +1,20 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Anatoly Kislov (anatoly@sourcerer.io) + package app object FactCodes { val COMMITS_DAY_WEEK = 1 val COMMITS_DAY_TIME = 2 + val COMMITS_LINE_NUM_AVG = 8 + val COMMITS_NUM = 9 + // Used for number of commits per number of lines in a commit histogram. + // Key should be number of lines. Value number of commits. + val COMMITS_NUM_PER_LINE_NUM = 9 val LINE_LONGEVITY = 3 val LINE_LONGEVITY_REPO = 4 + val LINE_LEN_AVG = 10 + val LINE_NUM = 11 val REPO_DATE_START = 5 val REPO_DATE_END = 6 val REPO_TEAM_SIZE = 7 diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 19f44243..4cd5a6ef 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -25,6 +25,12 @@ class FactHasher(private val serverRepo: Repo = Repo(), private val fsRepoDateStart = hashMapOf() private val fsRepoDateEnd = hashMapOf() private val fsRepoTeamSize = hashSetOf() + private val fsCommitLineNumAvg = hashMapOf() + private val fsCommitNum = hashMapOf() + private val fsLineLenAvg = hashMapOf() + private val fsLineNum = hashMapOf() + + // TODO(anatoly): COMMIT_NUM_PER_LINE_NUM. init { for (author in emails) { @@ -32,6 +38,10 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsDayTime.put(author, Array(24) { 0 }) fsRepoDateStart.put(author, -1) fsRepoDateEnd.put(author, -1) + fsCommitLineNumAvg.put(author, 0.0) + fsCommitNum.put(author, 0) + fsLineLenAvg.put(author, 0.0) + fsLineNum.put(author, 0) } } @@ -39,36 +49,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), onError: (Throwable) -> Unit) { observable .filter { commit -> emails.contains(commit.author.email) } - .subscribe({ commit -> // OnNext. - // Calculate facts. - val email = commit.author.email - val timestamp = commit.dateTimestamp - val dateTime = LocalDateTime.ofEpochSecond(timestamp, 0, - ZoneOffset.ofTotalSeconds(commit.dateTimeZoneOffset * 60)) - - // DayWeek. - val factDayWeek = fsDayWeek[email] ?: Array(7) { 0 } - // The value is numbered from 1 (Monday) to 7 (Sunday). - factDayWeek[dateTime.dayOfWeek.value - 1] += 1 - fsDayWeek[email] = factDayWeek - - // DayTime. - val factDayTime = fsDayTime[email] ?: Array(24) { 0 } - // Hour from 0 to 23. - factDayTime[dateTime.hour] += 1 - fsDayTime[email] = factDayTime - - // RepoDateStart. - fsRepoDateStart[email] = timestamp - - // RepoDateEnd. - if ((fsRepoDateEnd[email] ?: -1) == -1L) { - fsRepoDateEnd[email] = timestamp - } - - // RepoTeamSize. - fsRepoTeamSize.add(email) - }, onError, { // OnComplete. + .subscribe(onNext, onError, { // OnComplete. try { postFactsToServer(createFacts()) } catch (e: Throwable) { @@ -77,6 +58,51 @@ class FactHasher(private val serverRepo: Repo = Repo(), }) } + private val onNext: (Commit) -> Unit = { commit -> + // Calculate facts. + val email = commit.author.email + val timestamp = commit.dateTimestamp + val dateTime = LocalDateTime.ofEpochSecond(timestamp, 0, + ZoneOffset.ofTotalSeconds(commit.dateTimeZoneOffset * 60)) + + // DayWeek. + val factDayWeek = fsDayWeek[email] ?: Array(7) { 0 } + // The value is numbered from 1 (Monday) to 7 (Sunday). + factDayWeek[dateTime.dayOfWeek.value - 1] += 1 + fsDayWeek[email] = factDayWeek + + // DayTime. + val factDayTime = fsDayTime[email] ?: Array(24) { 0 } + // Hour from 0 to 23. + factDayTime[dateTime.hour] += 1 + fsDayTime[email] = factDayTime + + // RepoDateStart. + fsRepoDateStart[email] = timestamp + + // RepoDateEnd. + if ((fsRepoDateEnd[email] ?: -1) == -1L) { + fsRepoDateEnd[email] = timestamp + } + + // RepoTeamSize. + fsRepoTeamSize.add(email) + + val numCommits = fsCommitNum[email]!! + 1 + val numLinesCurrent = commit.numLinesAdded - commit.numLinesDeleted + + fsCommitNum[email] = numCommits + fsCommitLineNumAvg[email] = calcIncAvg(fsCommitLineNumAvg[email]!!, + numLinesCurrent.toDouble(), numCommits) + + val addedLines = commit.getAllAdded() + addedLines.forEachIndexed { index, line -> + fsLineLenAvg[email] = calcIncAvg(fsLineLenAvg[email]!!, + line.length.toDouble(), fsLineNum[email]!! + index + 1) + } + fsLineNum[email] = fsLineNum[email]!! + addedLines.size + } + private fun createFacts(): List { val fs = mutableListOf() emails.forEach { email -> @@ -93,6 +119,14 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsRepoDateStart[email].toString(), author)) fs.add(Fact(serverRepo, FactCodes.REPO_DATE_END, 0, fsRepoDateEnd[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.COMMITS_NUM, 0, + fsCommitNum[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.COMMITS_LINE_NUM_AVG, 0, + fsCommitLineNumAvg[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.LINE_NUM, 0, + fsLineNum[email].toString(), author)) + fs.add(Fact(serverRepo, FactCodes.LINE_LEN_AVG, 0, + fsLineLenAvg[email].toString(), author)) } fs.add(Fact(serverRepo, FactCodes.REPO_TEAM_SIZE, 0, fsRepoTeamSize.size.toString())) @@ -105,4 +139,9 @@ class FactHasher(private val serverRepo: Repo = Repo(), Logger.info { "Sent ${facts.size} facts to server" } } } + + private fun calcIncAvg(prev: Double, element: Double, count: Long): + Double { + return prev * (1 - 1 / count) + element / count + } } diff --git a/src/main/kotlin/app/model/Commit.kt b/src/main/kotlin/app/model/Commit.kt index 9810a77a..32835788 100644 --- a/src/main/kotlin/app/model/Commit.kt +++ b/src/main/kotlin/app/model/Commit.kt @@ -86,4 +86,12 @@ data class Commit( override fun hashCode(): Int { return rehash.hashCode() } + + fun getAllAdded(): List { + return diffs.map { it.getAllAdded() }.flatten() + } + + fun getAllDeleted(): List { + return diffs.map { it.getAllDeleted() }.flatten() + } } diff --git a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt index 3a39b6c2..e80a64c5 100644 --- a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt @@ -170,6 +170,60 @@ class FactHasherTest : Spek({ } + afterGroup { + testRepo.destroy() + } + } + + given("test of commit facts") { + val testRepo = TestRepo(repoPath + "commit-facts") + val emails = hashSetOf(authorEmail1, authorEmail2) + val mockApi = MockApi(mockRepo = repo) + val facts = mockApi.receivedFacts + val lines = listOf( + "All my rap, if shortly, is about the thing that", + "For so many years so many cities have been under hoof", + "To go uphill when gets lucky. Then downhill when feels sick", + "I'm not really a Gulliver, but still the city is under hoof", + "City under hoof, city under hoof", + "Traffic lights, state duties, charges and customs", + "I don't know whether this path is wade or to the bottom", + "You live under a thumb, I have a city under my hoof" + ) + + afterEachTest { + facts.clear() + } + + it("sends facts") { + // TODO(anatoly): Finish tests. + testRepo.createFile("test1.txt", listOf()) + + testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) + testRepo.commit(message = "Commit 1", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(2, 5)) + testRepo.commit(message = "Commit 1", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) + testRepo.commit(message = "Commit 1", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) + testRepo.commit(message = "Commit 1", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) + testRepo.commit(message = "Commit 1", author = author1) + + testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) + testRepo.commit(message = "Commit 1", author = author1) + + val errors = mutableListOf() + val observable = CommitCrawler.getObservable(testRepo.git, repo) + FactHasher(repo, mockApi, emails) + .updateFromObservable(observable, { e -> errors.add(e) }) + } + + afterGroup { testRepo.destroy() } diff --git a/src/test/kotlin/test/utils/TestRepo.kt b/src/test/kotlin/test/utils/TestRepo.kt index 448fddcd..d5d123df 100644 --- a/src/test/kotlin/test/utils/TestRepo.kt +++ b/src/test/kotlin/test/utils/TestRepo.kt @@ -24,7 +24,7 @@ class TestRepo(val repoPath: String) { val userName = "Contributor" val userEmail = "test@sourcerer.com" - val git = Git.init().setDirectory(File(repoPath)).call() + val git = initGit() init { val config = git.repository.config @@ -33,6 +33,11 @@ class TestRepo(val repoPath: String) { config.save() } + private fun initGit(): Git { + destroy() // Remove repo directory if exists. + return Git.init().setDirectory(File(repoPath)).call() + } + fun createFile(fileName: String, content: List) { val file = File("$repoPath/$fileName") val writer = BufferedWriter(FileWriter(file)) From ab40081818bf0b5efc64ad4a7d82bc4aea4d66b9 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Fri, 3 Nov 2017 18:19:58 +0300 Subject: [PATCH 2/7] wip: lines per commit --- src/main/kotlin/app/FactCodes.kt | 2 +- src/main/kotlin/app/hashers/CommitCrawler.kt | 9 ++ src/main/kotlin/app/hashers/CommitHasher.kt | 9 -- src/main/kotlin/app/hashers/FactHasher.kt | 36 ++++++-- src/main/kotlin/app/hashers/RepoHasher.kt | 2 +- .../test/tests/hashers/FactHasherTest.kt | 83 +++++++++++++++---- 6 files changed, 108 insertions(+), 33 deletions(-) diff --git a/src/main/kotlin/app/FactCodes.kt b/src/main/kotlin/app/FactCodes.kt index 5f01c63b..8eaad34e 100644 --- a/src/main/kotlin/app/FactCodes.kt +++ b/src/main/kotlin/app/FactCodes.kt @@ -10,7 +10,7 @@ object FactCodes { val COMMITS_NUM = 9 // Used for number of commits per number of lines in a commit histogram. // Key should be number of lines. Value number of commits. - val COMMITS_NUM_PER_LINE_NUM = 9 + val COMMITS_NUM_PER_LINE_NUM = 12 val LINE_LONGEVITY = 3 val LINE_LONGEVITY_REPO = 4 val LINE_LEN_AVG = 10 diff --git a/src/main/kotlin/app/hashers/CommitCrawler.kt b/src/main/kotlin/app/hashers/CommitCrawler.kt index 6b0bb1d3..861cf728 100644 --- a/src/main/kotlin/app/hashers/CommitCrawler.kt +++ b/src/main/kotlin/app/hashers/CommitCrawler.kt @@ -48,6 +48,15 @@ object CommitCrawler { } new.diffs = getDiffFiles(git, new, old) Logger.debug { "Diff: ${new.diffs.size} entries" } + // Count lines on all non-binary files. This is additional + // statistics to CommitStats because not all file extensions + // may be supported. + new.numLinesAdded = new.diffs.fold(0) { total, file -> + total + file.getAllAdded().size + } + new.numLinesDeleted = new.diffs.fold(0) { total, file -> + total + file.getAllDeleted().size + } new.repo = repo new } diff --git a/src/main/kotlin/app/hashers/CommitHasher.kt b/src/main/kotlin/app/hashers/CommitHasher.kt index 4776b9dd..dd40535b 100644 --- a/src/main/kotlin/app/hashers/CommitHasher.kt +++ b/src/main/kotlin/app/hashers/CommitHasher.kt @@ -51,15 +51,6 @@ class CommitHasher(private val serverRepo: Repo = Repo(), Logger.info { "Stats: ${commit.stats.size} entries" } Logger.debug { commit.stats.toString() } - // Count lines on all non-binary files. This is additional - // statistics to CommitStats because not all file extensions - // may be supported. - commit.numLinesAdded = commit.diffs.fold(0) { total, file -> - total + file.getAllAdded().size - } - commit.numLinesDeleted = commit.diffs.fold(0) { total, file -> - total + file.getAllDeleted().size - } commit } .buffer(20, TimeUnit.SECONDS) // Group ready commits by time. diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 4cd5a6ef..9183c6bf 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -19,6 +19,7 @@ import java.time.ZoneOffset */ class FactHasher(private val serverRepo: Repo = Repo(), private val api: Api, + private val rehashes: List, private val emails: HashSet) { private val fsDayWeek = hashMapOf>() private val fsDayTime = hashMapOf>() @@ -26,11 +27,10 @@ class FactHasher(private val serverRepo: Repo = Repo(), private val fsRepoDateEnd = hashMapOf() private val fsRepoTeamSize = hashSetOf() private val fsCommitLineNumAvg = hashMapOf() - private val fsCommitNum = hashMapOf() + private val fsCommitNum = hashMapOf() private val fsLineLenAvg = hashMapOf() private val fsLineNum = hashMapOf() - - // TODO(anatoly): COMMIT_NUM_PER_LINE_NUM. + private val fsLinesPerCommits = hashMapOf>() init { for (author in emails) { @@ -42,6 +42,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsCommitNum.put(author, 0) fsLineLenAvg.put(author, 0.0) fsLineNum.put(author, 0) + fsLinesPerCommits.put(author, Array(rehashes.size) {0}) } } @@ -88,12 +89,13 @@ class FactHasher(private val serverRepo: Repo = Repo(), // RepoTeamSize. fsRepoTeamSize.add(email) + // Commits. val numCommits = fsCommitNum[email]!! + 1 val numLinesCurrent = commit.numLinesAdded - commit.numLinesDeleted fsCommitNum[email] = numCommits fsCommitLineNumAvg[email] = calcIncAvg(fsCommitLineNumAvg[email]!!, - numLinesCurrent.toDouble(), numCommits) + numLinesCurrent.toDouble(), numCommits.toLong()) val addedLines = commit.getAllAdded() addedLines.forEachIndexed { index, line -> @@ -101,6 +103,9 @@ class FactHasher(private val serverRepo: Repo = Repo(), line.length.toDouble(), fsLineNum[email]!! + index + 1) } fsLineNum[email] = fsLineNum[email]!! + addedLines.size + + fsLinesPerCommits[email]!![numCommits - 1] = + fsLinesPerCommits[email]!![numCommits - 1] + addedLines.size } private fun createFacts(): List { @@ -142,6 +147,27 @@ class FactHasher(private val serverRepo: Repo = Repo(), private fun calcIncAvg(prev: Double, element: Double, count: Long): Double { - return prev * (1 - 1 / count) + element / count + return prev * (1 - 1.0 / count) + element / count + } + + private fun calcCommitsPerLines(linesPerCommits: Array): Array { + val numBins = 10 + var max = linesPerCommits[0] + var min = linesPerCommits[0] + for (lines in linesPerCommits) { + if (lines > max) { + max = lines + } + if (lines < min) { + min = lines + } + } + val binSize = (max - min) / numBins.toDouble() + val bins = Array(numBins) { 0 } + for (numLines in linesPerCommits) { + val binId = Math.floor((numLines - min) / binSize).toInt() + bins[binId]++ + } + return bins } } diff --git a/src/main/kotlin/app/hashers/RepoHasher.kt b/src/main/kotlin/app/hashers/RepoHasher.kt index 52712fb4..7197e297 100644 --- a/src/main/kotlin/app/hashers/RepoHasher.kt +++ b/src/main/kotlin/app/hashers/RepoHasher.kt @@ -67,7 +67,7 @@ class RepoHasher(private val localRepo: LocalRepo, private val api: Api, .publish() CommitHasher(serverRepo, api, rehashes, filteredEmails) .updateFromObservable(observable, onError) - FactHasher(serverRepo, api, filteredEmails) + FactHasher(serverRepo, api, rehashes, filteredEmails) .updateFromObservable(observable, onError) // Start and synchronously wait until all subscribers complete. diff --git a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt index e80a64c5..7c76b38a 100644 --- a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt @@ -16,6 +16,7 @@ import org.jetbrains.spek.api.dsl.it import test.utils.TestRepo import java.util.* import kotlin.test.assertEquals +import kotlin.test.assertNotNull import kotlin.test.assertTrue class FactHasherTest : Spek({ @@ -35,6 +36,27 @@ class FactHasherTest : Spek({ return cal.time } + fun getFact(code: Int, key: Int, author: Author, facts: List): Fact { + val fact = facts.find { fact -> + fact.code == code && fact.key == key && fact.author == author + } + assertNotNull(fact) + return fact!! + } + + fun assertFactInt(code: Int, key: Int, value: Int, author: Author, + facts: List) { + val fact = getFact(code, key, author, facts) + assertEquals(value, fact.value.toInt()) + } + + fun assertFactDouble(code: Int, key: Int, value: Double, author: Author, + facts: List) { + val fact = getFact(code, key, author, facts) + assertTrue(Math.abs(value - fact.value.toDouble()) < 0.1, + "Expected approximately <$value>, actual <${fact.value}>") + } + given("commits for date facts") { val testRepo = TestRepo(repoPath + "date-facts") val emails = hashSetOf(authorEmail1, authorEmail2) @@ -54,7 +76,7 @@ class FactHasherTest : Spek({ val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, listOf("r1"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) @@ -79,7 +101,7 @@ class FactHasherTest : Spek({ val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, listOf("r1", "r2"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) @@ -154,9 +176,11 @@ class FactHasherTest : Spek({ val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, + listOf("r1", "r2", "r3", "r4", "r5", "r6"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) + assertEquals(0, errors.size) assertTrue(facts.contains(Fact(repo, FactCodes.REPO_DATE_START, 0, (startAuthor1.time/1000).toString(), author1))) assertTrue(facts.contains(Fact(repo, FactCodes.REPO_DATE_START, 0, @@ -181,6 +205,22 @@ class FactHasherTest : Spek({ val mockApi = MockApi(mockRepo = repo) val facts = mockApi.receivedFacts val lines = listOf( + "All my rap, if shortly, is about the thing that", + "For so many years so many cities have been under hoof", + "To go uphill when gets lucky. Then downhill when feels sick", + "I'm not really a Gulliver, but still the city is under hoof", + "City under hoof, city under hoof", + "Traffic lights, state duties, charges and customs", + "I don't know whether this path is wade or to the bottom", + "You live under a thumb, I have a city under my hoof", + "All my rap, if shortly, is about the thing that", + "For so many years so many cities have been under hoof", + "To go uphill when gets lucky. Then downhill when feels sick", + "I'm not really a Gulliver, but still the city is under hoof", + "City under hoof, city under hoof", + "Traffic lights, state duties, charges and customs", + "I don't know whether this path is wade or to the bottom", + "You live under a thumb, I have a city under my hoof", "All my rap, if shortly, is about the thing that", "For so many years so many cities have been under hoof", "To go uphill when gets lucky. Then downhill when feels sick", @@ -190,39 +230,48 @@ class FactHasherTest : Spek({ "I don't know whether this path is wade or to the bottom", "You live under a thumb, I have a city under my hoof" ) + val linesLenAvg = lines.fold (0) { acc, s -> acc + s.length } / + lines.size.toDouble() afterEachTest { facts.clear() } it("sends facts") { - // TODO(anatoly): Finish tests. testRepo.createFile("test1.txt", listOf()) - testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) + testRepo.insertLines("test1.txt", 0, lines.subList(0, 3)) testRepo.commit(message = "Commit 1", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(2, 5)) - testRepo.commit(message = "Commit 1", author = author1) + testRepo.insertLines("test1.txt", 0, lines.subList(3, 6)) + testRepo.commit(message = "Commit 2", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) - testRepo.commit(message = "Commit 1", author = author1) + testRepo.insertLines("test1.txt", 0, lines.subList(6, 9)) + testRepo.commit(message = "Commit 3", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) - testRepo.commit(message = "Commit 1", author = author1) + testRepo.insertLines("test1.txt", 0, lines.subList(9, 12)) + testRepo.commit(message = "Commit 4", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) - testRepo.commit(message = "Commit 1", author = author1) + testRepo.insertLines("test1.txt", 0, lines.subList(12, 15)) + testRepo.commit(message = "Commit 5", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(0, 2)) - testRepo.commit(message = "Commit 1", author = author1) + testRepo.insertLines("test1.txt", 0, lines.subList(15, 18)) + testRepo.commit(message = "Commit 6", author = author1) val errors = mutableListOf() val observable = CommitCrawler.getObservable(testRepo.git, repo) - FactHasher(repo, mockApi, emails) + FactHasher(repo, mockApi, + listOf("r1", "r2", "r3", "r4", "r5", "r6"), emails) .updateFromObservable(observable, { e -> errors.add(e) }) - } + assertEquals(0, errors.size) + assertFactInt(FactCodes.COMMITS_NUM, 0, 6, author1, facts) + assertFactDouble(FactCodes.COMMITS_LINE_NUM_AVG, 0, 3.0, author1, + facts) + assertFactInt(FactCodes.LINE_NUM, 0, 18, author1, facts) + assertFactDouble(FactCodes.LINE_LEN_AVG, 0, linesLenAvg, author1, + facts) + } afterGroup { testRepo.destroy() From 3d82b313273db0d9fe900e049b126c5f3e72d10e Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Mon, 6 Nov 2017 19:30:43 +0300 Subject: [PATCH 3/7] feat: commits per lines facts --- src/main/kotlin/app/hashers/FactHasher.kt | 25 ++++++++++++++++--- .../test/tests/hashers/FactHasherTest.kt | 14 ++++++++--- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 9183c6bf..9b67a78f 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -132,6 +132,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsLineNum[email].toString(), author)) fs.add(Fact(serverRepo, FactCodes.LINE_LEN_AVG, 0, fsLineLenAvg[email].toString(), author)) + addCommitsPerLinesFacts(fs, fsLinesPerCommits[email]!!, author) } fs.add(Fact(serverRepo, FactCodes.REPO_TEAM_SIZE, 0, fsRepoTeamSize.size.toString())) @@ -150,8 +151,9 @@ class FactHasher(private val serverRepo: Repo = Repo(), return prev * (1 - 1.0 / count) + element / count } - private fun calcCommitsPerLines(linesPerCommits: Array): Array { - val numBins = 10 + private fun addCommitsPerLinesFacts(fs: MutableList, + linesPerCommits: Array, + author: Author) { var max = linesPerCommits[0] var min = linesPerCommits[0] for (lines in linesPerCommits) { @@ -162,12 +164,27 @@ class FactHasher(private val serverRepo: Repo = Repo(), min = lines } } - val binSize = (max - min) / numBins.toDouble() + + val numBins = Math.min(10, max - min + 1) + val binSize = (max - min + 1) / numBins.toDouble() val bins = Array(numBins) { 0 } for (numLines in linesPerCommits) { + if (numLines == 0) { + continue + } + val binId = Math.floor((numLines - min) / binSize).toInt() bins[binId]++ } - return bins + + for ((binId, numCommits) in bins.withIndex()) { + if (numCommits == 0) { + continue + } + + val numLines = Math.floor(min + binId * binSize).toInt() + fs.add(Fact(serverRepo, FactCodes.COMMITS_NUM_PER_LINE_NUM, + numLines, numCommits.toString(), author)) + } } } diff --git a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt index 7c76b38a..5a83ed28 100644 --- a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt @@ -252,10 +252,10 @@ class FactHasherTest : Spek({ testRepo.insertLines("test1.txt", 0, lines.subList(9, 12)) testRepo.commit(message = "Commit 4", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(12, 15)) + testRepo.insertLines("test1.txt", 0, lines.subList(12, 16)) testRepo.commit(message = "Commit 5", author = author1) - testRepo.insertLines("test1.txt", 0, lines.subList(15, 18)) + testRepo.insertLines("test1.txt", 0, lines.subList(16, 24)) testRepo.commit(message = "Commit 6", author = author1) val errors = mutableListOf() @@ -266,11 +266,17 @@ class FactHasherTest : Spek({ assertEquals(0, errors.size) assertFactInt(FactCodes.COMMITS_NUM, 0, 6, author1, facts) - assertFactDouble(FactCodes.COMMITS_LINE_NUM_AVG, 0, 3.0, author1, + assertFactDouble(FactCodes.COMMITS_LINE_NUM_AVG, 0, 4.0, author1, facts) - assertFactInt(FactCodes.LINE_NUM, 0, 18, author1, facts) + assertFactInt(FactCodes.LINE_NUM, 0, 24, author1, facts) assertFactDouble(FactCodes.LINE_LEN_AVG, 0, linesLenAvg, author1, facts) + assertFactInt(FactCodes.COMMITS_NUM_PER_LINE_NUM, 3, 4, author1, + facts) + assertFactInt(FactCodes.COMMITS_NUM_PER_LINE_NUM, 4, 1, author1, + facts) + assertFactInt(FactCodes.COMMITS_NUM_PER_LINE_NUM, 8, 1, author1, + facts) } afterGroup { From 4ce2b5a9c967f108eaf51388a07eb40cf93ba866 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Tue, 7 Nov 2017 14:25:13 +0300 Subject: [PATCH 4/7] fix: pr comments --- src/main/kotlin/app/FactCodes.kt | 5 ++- src/main/kotlin/app/hashers/FactHasher.kt | 32 +++++++++++-------- .../test/tests/hashers/FactHasherTest.kt | 6 ++-- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/main/kotlin/app/FactCodes.kt b/src/main/kotlin/app/FactCodes.kt index 8eaad34e..0165c5b1 100644 --- a/src/main/kotlin/app/FactCodes.kt +++ b/src/main/kotlin/app/FactCodes.kt @@ -8,9 +8,8 @@ object FactCodes { val COMMITS_DAY_TIME = 2 val COMMITS_LINE_NUM_AVG = 8 val COMMITS_NUM = 9 - // Used for number of commits per number of lines in a commit histogram. - // Key should be number of lines. Value number of commits. - val COMMITS_NUM_PER_LINE_NUM = 12 + // A map of line numbers to commits number. Used in a commit histogram. + val COMMITS_NUM_TO_LINE_NUM = 12 val LINE_LONGEVITY = 3 val LINE_LONGEVITY_REPO = 4 val LINE_LEN_AVG = 10 diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 9b67a78f..29dfd4c3 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -25,7 +25,6 @@ class FactHasher(private val serverRepo: Repo = Repo(), private val fsDayTime = hashMapOf>() private val fsRepoDateStart = hashMapOf() private val fsRepoDateEnd = hashMapOf() - private val fsRepoTeamSize = hashSetOf() private val fsCommitLineNumAvg = hashMapOf() private val fsCommitNum = hashMapOf() private val fsLineLenAvg = hashMapOf() @@ -86,26 +85,22 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsRepoDateEnd[email] = timestamp } - // RepoTeamSize. - fsRepoTeamSize.add(email) - // Commits. val numCommits = fsCommitNum[email]!! + 1 - val numLinesCurrent = commit.numLinesAdded - commit.numLinesDeleted + val numLinesCurrent = commit.numLinesAdded + commit.numLinesDeleted fsCommitNum[email] = numCommits fsCommitLineNumAvg[email] = calcIncAvg(fsCommitLineNumAvg[email]!!, numLinesCurrent.toDouble(), numCommits.toLong()) - val addedLines = commit.getAllAdded() - addedLines.forEachIndexed { index, line -> + val lines = commit.getAllAdded() + commit.getAllDeleted() + lines.forEachIndexed { index, line -> fsLineLenAvg[email] = calcIncAvg(fsLineLenAvg[email]!!, line.length.toDouble(), fsLineNum[email]!! + index + 1) } - fsLineNum[email] = fsLineNum[email]!! + addedLines.size + fsLineNum[email] = fsLineNum[email]!! + lines.size - fsLinesPerCommits[email]!![numCommits - 1] = - fsLinesPerCommits[email]!![numCommits - 1] + addedLines.size + fsLinesPerCommits[email]!![numCommits - 1] += lines.size } private fun createFacts(): List { @@ -135,7 +130,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), addCommitsPerLinesFacts(fs, fsLinesPerCommits[email]!!, author) } fs.add(Fact(serverRepo, FactCodes.REPO_TEAM_SIZE, 0, - fsRepoTeamSize.size.toString())) + emails.size.toString())) return fs } @@ -146,8 +141,17 @@ class FactHasher(private val serverRepo: Repo = Repo(), } } - private fun calcIncAvg(prev: Double, element: Double, count: Long): - Double { + /** + * Used for incremental calculation of average of sequence. + * Calculated numbers is never bigger than maximum element of sequence. + * No overflow due to summing of elements. + * @param prev previous value of average + * @param element new element of sequence + * @param count number of element in sequence + * @return new value of average with considering of new element + */ + private fun calcIncAvg(prev: Double, element: Double, + count: Long): Double { return prev * (1 - 1.0 / count) + element / count } @@ -183,7 +187,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), } val numLines = Math.floor(min + binId * binSize).toInt() - fs.add(Fact(serverRepo, FactCodes.COMMITS_NUM_PER_LINE_NUM, + fs.add(Fact(serverRepo, FactCodes.COMMITS_NUM_TO_LINE_NUM, numLines, numCommits.toString(), author)) } } diff --git a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt index 5a83ed28..c77582f8 100644 --- a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt @@ -271,11 +271,11 @@ class FactHasherTest : Spek({ assertFactInt(FactCodes.LINE_NUM, 0, 24, author1, facts) assertFactDouble(FactCodes.LINE_LEN_AVG, 0, linesLenAvg, author1, facts) - assertFactInt(FactCodes.COMMITS_NUM_PER_LINE_NUM, 3, 4, author1, + assertFactInt(FactCodes.COMMITS_NUM_TO_LINE_NUM, 3, 4, author1, facts) - assertFactInt(FactCodes.COMMITS_NUM_PER_LINE_NUM, 4, 1, author1, + assertFactInt(FactCodes.COMMITS_NUM_TO_LINE_NUM, 4, 1, author1, facts) - assertFactInt(FactCodes.COMMITS_NUM_PER_LINE_NUM, 8, 1, author1, + assertFactInt(FactCodes.COMMITS_NUM_TO_LINE_NUM, 8, 1, author1, facts) } From c618a1720758f03d303dcd71f8c32f26768f83d1 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Tue, 7 Nov 2017 14:26:54 +0300 Subject: [PATCH 5/7] chore: add todo --- src/main/kotlin/app/hashers/FactHasher.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 29dfd4c3..5b7a7e98 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -41,6 +41,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), fsCommitNum.put(author, 0) fsLineLenAvg.put(author, 0.0) fsLineNum.put(author, 0) + // TODO(anatoly): Do the bin computations on the go. fsLinesPerCommits.put(author, Array(rehashes.size) {0}) } } From 23cc508812300b96cfbd955edba20454ede209ca Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Tue, 7 Nov 2017 18:17:22 +0300 Subject: [PATCH 6/7] fix: const names --- src/main/kotlin/app/FactCodes.kt | 10 ++++----- src/main/kotlin/app/hashers/FactHasher.kt | 12 +++++----- .../test/tests/hashers/FactHasherTest.kt | 22 +++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/main/kotlin/app/FactCodes.kt b/src/main/kotlin/app/FactCodes.kt index 0165c5b1..fbb77012 100644 --- a/src/main/kotlin/app/FactCodes.kt +++ b/src/main/kotlin/app/FactCodes.kt @@ -4,12 +4,12 @@ package app object FactCodes { - val COMMITS_DAY_WEEK = 1 - val COMMITS_DAY_TIME = 2 - val COMMITS_LINE_NUM_AVG = 8 - val COMMITS_NUM = 9 + val COMMIT_DAY_WEEK = 1 + val COMMIT_DAY_TIME = 2 + val COMMIT_LINE_NUM_AVG = 8 + val COMMIT_NUM = 9 // A map of line numbers to commits number. Used in a commit histogram. - val COMMITS_NUM_TO_LINE_NUM = 12 + val COMMIT_NUM_TO_LINE_NUM = 12 val LINE_LONGEVITY = 3 val LINE_LONGEVITY_REPO = 4 val LINE_LEN_AVG = 10 diff --git a/src/main/kotlin/app/hashers/FactHasher.kt b/src/main/kotlin/app/hashers/FactHasher.kt index 5b7a7e98..4d9a3c58 100644 --- a/src/main/kotlin/app/hashers/FactHasher.kt +++ b/src/main/kotlin/app/hashers/FactHasher.kt @@ -109,20 +109,20 @@ class FactHasher(private val serverRepo: Repo = Repo(), emails.forEach { email -> val author = Author(email = email) fsDayTime[email]?.forEachIndexed { hour, count -> if (count > 0) { - fs.add(Fact(serverRepo, FactCodes.COMMITS_DAY_TIME, hour, + fs.add(Fact(serverRepo, FactCodes.COMMIT_DAY_TIME, hour, count.toString(), author)) }} fsDayWeek[email]?.forEachIndexed { day, count -> if (count > 0) { - fs.add(Fact(serverRepo, FactCodes.COMMITS_DAY_WEEK, day, + fs.add(Fact(serverRepo, FactCodes.COMMIT_DAY_WEEK, day, count.toString(), author)) }} fs.add(Fact(serverRepo, FactCodes.REPO_DATE_START, 0, fsRepoDateStart[email].toString(), author)) fs.add(Fact(serverRepo, FactCodes.REPO_DATE_END, 0, fsRepoDateEnd[email].toString(), author)) - fs.add(Fact(serverRepo, FactCodes.COMMITS_NUM, 0, + fs.add(Fact(serverRepo, FactCodes.COMMIT_NUM, 0, fsCommitNum[email].toString(), author)) - fs.add(Fact(serverRepo, FactCodes.COMMITS_LINE_NUM_AVG, 0, + fs.add(Fact(serverRepo, FactCodes.COMMIT_LINE_NUM_AVG, 0, fsCommitLineNumAvg[email].toString(), author)) fs.add(Fact(serverRepo, FactCodes.LINE_NUM, 0, fsLineNum[email].toString(), author)) @@ -143,7 +143,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), } /** - * Used for incremental calculation of average of sequence. + * Computes the average of a numerical sequence. * Calculated numbers is never bigger than maximum element of sequence. * No overflow due to summing of elements. * @param prev previous value of average @@ -188,7 +188,7 @@ class FactHasher(private val serverRepo: Repo = Repo(), } val numLines = Math.floor(min + binId * binSize).toInt() - fs.add(Fact(serverRepo, FactCodes.COMMITS_NUM_TO_LINE_NUM, + fs.add(Fact(serverRepo, FactCodes.COMMIT_NUM_TO_LINE_NUM, numLines, numCommits.toString(), author)) } } diff --git a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt index c77582f8..33f51e42 100644 --- a/src/test/kotlin/test/tests/hashers/FactHasherTest.kt +++ b/src/test/kotlin/test/tests/hashers/FactHasherTest.kt @@ -80,9 +80,9 @@ class FactHasherTest : Spek({ .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_TIME, 13, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_TIME, 13, "1", author1))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_WEEK, 6, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_WEEK, 6, "1", author1))) } @@ -105,13 +105,13 @@ class FactHasherTest : Spek({ .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_TIME, 18, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_TIME, 18, "1", author2))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_WEEK, 0, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_WEEK, 0, "1", author2))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_TIME, 13, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_TIME, 13, "2", author1))) - assertTrue(facts.contains(Fact(repo, FactCodes.COMMITS_DAY_WEEK, 0, + assertTrue(facts.contains(Fact(repo, FactCodes.COMMIT_DAY_WEEK, 0, "1", author1))) } @@ -265,17 +265,17 @@ class FactHasherTest : Spek({ .updateFromObservable(observable, { e -> errors.add(e) }) assertEquals(0, errors.size) - assertFactInt(FactCodes.COMMITS_NUM, 0, 6, author1, facts) - assertFactDouble(FactCodes.COMMITS_LINE_NUM_AVG, 0, 4.0, author1, + assertFactInt(FactCodes.COMMIT_NUM, 0, 6, author1, facts) + assertFactDouble(FactCodes.COMMIT_LINE_NUM_AVG, 0, 4.0, author1, facts) assertFactInt(FactCodes.LINE_NUM, 0, 24, author1, facts) assertFactDouble(FactCodes.LINE_LEN_AVG, 0, linesLenAvg, author1, facts) - assertFactInt(FactCodes.COMMITS_NUM_TO_LINE_NUM, 3, 4, author1, + assertFactInt(FactCodes.COMMIT_NUM_TO_LINE_NUM, 3, 4, author1, facts) - assertFactInt(FactCodes.COMMITS_NUM_TO_LINE_NUM, 4, 1, author1, + assertFactInt(FactCodes.COMMIT_NUM_TO_LINE_NUM, 4, 1, author1, facts) - assertFactInt(FactCodes.COMMITS_NUM_TO_LINE_NUM, 8, 1, author1, + assertFactInt(FactCodes.COMMIT_NUM_TO_LINE_NUM, 8, 1, author1, facts) } From d8214dd717764899a88da72497df6a560c3755d7 Mon Sep 17 00:00:00 2001 From: Anatoly Kislov Date: Tue, 7 Nov 2017 18:50:06 +0300 Subject: [PATCH 7/7] chore: add comments for codes --- src/main/kotlin/app/FactCodes.kt | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/kotlin/app/FactCodes.kt b/src/main/kotlin/app/FactCodes.kt index fbb77012..572e1c0e 100644 --- a/src/main/kotlin/app/FactCodes.kt +++ b/src/main/kotlin/app/FactCodes.kt @@ -4,17 +4,17 @@ package app object FactCodes { - val COMMIT_DAY_WEEK = 1 - val COMMIT_DAY_TIME = 2 - val COMMIT_LINE_NUM_AVG = 8 - val COMMIT_NUM = 9 + val COMMIT_DAY_WEEK = 1 // Day of week fun fact and graph. + val COMMIT_DAY_TIME = 2 // Day time fun fact. + val COMMIT_LINE_NUM_AVG = 8 // Average number of lines per commit fun fact. + val COMMIT_NUM = 9 // Used for averaging COMMIT_LINE_NUM_AVG between repos. // A map of line numbers to commits number. Used in a commit histogram. val COMMIT_NUM_TO_LINE_NUM = 12 - val LINE_LONGEVITY = 3 - val LINE_LONGEVITY_REPO = 4 - val LINE_LEN_AVG = 10 - val LINE_NUM = 11 - val REPO_DATE_START = 5 - val REPO_DATE_END = 6 - val REPO_TEAM_SIZE = 7 + val LINE_LONGEVITY = 3 // Used for longevity graph. + val LINE_LONGEVITY_REPO = 4 // Used for longevity graph. + val LINE_LEN_AVG = 10 // Average length of line fun fact. + val LINE_NUM = 11 // Used for averaging LINE_LEN_AVG between repos. + val REPO_DATE_START = 5 // Repo summary info. Date of first contribution. + val REPO_DATE_END = 6 // Repo summary info. Date of last contribution. + val REPO_TEAM_SIZE = 7 // Repo summary info. Number of contributors. }