Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.DS_Store
.gradle/
.idea/
.sourcerer
build/
sourcerer-app.iml
sourcerer-app.ipr
Expand All @@ -20,4 +21,3 @@ app.iws
/ultimate/ideaSDK
/ultimate/out
/ultimate/tmp
src/main/resources/data/models/
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the change seems unrelated, why it is?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Models got deleted from resources, but we forgot to delete them from gitignore.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it

23 changes: 16 additions & 7 deletions src/main/kotlin/app/FactCodes.kt
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
// Copyright 2017 Sourcerer Inc. All Rights Reserved.
// Author: Anatoly Kislov (anatoly@sourcerer.io)

package app

object FactCodes {
val COMMITS_DAY_WEEK = 1
val COMMITS_DAY_TIME = 2
val LINE_LONGEVITY = 3
val LINE_LONGEVITY_REPO = 4
val REPO_DATE_START = 5
val REPO_DATE_END = 6
val REPO_TEAM_SIZE = 7
val COMMIT_DAY_WEEK = 1 // Day of week fun fact and graph.
val COMMIT_DAY_TIME = 2 // Day time fun fact.
val COMMIT_LINE_NUM_AVG = 8 // Average number of lines per commit fun fact.
val COMMIT_NUM = 9 // Used for averaging COMMIT_LINE_NUM_AVG between repos.
// A map of line numbers to commits number. Used in a commit histogram.
val COMMIT_NUM_TO_LINE_NUM = 12
val LINE_LONGEVITY = 3 // Used for longevity graph.
val LINE_LONGEVITY_REPO = 4 // Used for longevity graph.
val LINE_LEN_AVG = 10 // Average length of line fun fact.
val LINE_NUM = 11 // Used for averaging LINE_LEN_AVG between repos.
val REPO_DATE_START = 5 // Repo summary info. Date of first contribution.
val REPO_DATE_END = 6 // Repo summary info. Date of last contribution.
val REPO_TEAM_SIZE = 7 // Repo summary info. Number of contributors.
}
9 changes: 9 additions & 0 deletions src/main/kotlin/app/hashers/CommitCrawler.kt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ object CommitCrawler {
}
new.diffs = getDiffFiles(git, new, old)
Logger.debug { "Diff: ${new.diffs.size} entries" }
// Count lines on all non-binary files. This is additional
// statistics to CommitStats because not all file extensions
// may be supported.
new.numLinesAdded = new.diffs.fold(0) { total, file ->
total + file.getAllAdded().size
}
new.numLinesDeleted = new.diffs.fold(0) { total, file ->
total + file.getAllDeleted().size
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you move this code? I find it reasonable to keep this code in CommitHasher, which is all about a commit analysis/processing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because it's general commit stats that could be used in multiple places, e.g. in facthasher.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the point

new.repo = repo
new
}
Expand Down
9 changes: 0 additions & 9 deletions src/main/kotlin/app/hashers/CommitHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,6 @@ class CommitHasher(private val serverRepo: Repo = Repo(),
Logger.info { "Stats: ${commit.stats.size} entries" }
Logger.debug { commit.stats.toString() }

// Count lines on all non-binary files. This is additional
// statistics to CommitStats because not all file extensions
// may be supported.
commit.numLinesAdded = commit.diffs.fold(0) { total, file ->
total + file.getAllAdded().size
}
commit.numLinesDeleted = commit.diffs.fold(0) { total, file ->
total + file.getAllDeleted().size
}
commit
}
.buffer(20, TimeUnit.SECONDS) // Group ready commits by time.
Expand Down
155 changes: 121 additions & 34 deletions src/main/kotlin/app/hashers/FactHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,56 +19,38 @@ import java.time.ZoneOffset
*/
class FactHasher(private val serverRepo: Repo = Repo(),
private val api: Api,
private val rehashes: List<String>,
private val emails: HashSet<String>) {
private val fsDayWeek = hashMapOf<String, Array<Int>>()
private val fsDayTime = hashMapOf<String, Array<Int>>()
private val fsRepoDateStart = hashMapOf<String, Long>()
private val fsRepoDateEnd = hashMapOf<String, Long>()
private val fsRepoTeamSize = hashSetOf<String>()
private val fsCommitLineNumAvg = hashMapOf<String, Double>()
private val fsCommitNum = hashMapOf<String, Int>()
private val fsLineLenAvg = hashMapOf<String, Double>()
private val fsLineNum = hashMapOf<String, Long>()
private val fsLinesPerCommits = hashMapOf<String, Array<Int>>()

init {
for (author in emails) {
fsDayWeek.put(author, Array(7) { 0 })
fsDayTime.put(author, Array(24) { 0 })
fsRepoDateStart.put(author, -1)
fsRepoDateEnd.put(author, -1)
fsCommitLineNumAvg.put(author, 0.0)
fsCommitNum.put(author, 0)
fsLineLenAvg.put(author, 0.0)
fsLineNum.put(author, 0)
// TODO(anatoly): Do the bin computations on the go.
fsLinesPerCommits.put(author, Array(rehashes.size) {0})
}
}

fun updateFromObservable(observable: Observable<Commit>,
onError: (Throwable) -> Unit) {
observable
.filter { commit -> emails.contains(commit.author.email) }
.subscribe({ commit -> // OnNext.
// Calculate facts.
val email = commit.author.email
val timestamp = commit.dateTimestamp
val dateTime = LocalDateTime.ofEpochSecond(timestamp, 0,
ZoneOffset.ofTotalSeconds(commit.dateTimeZoneOffset * 60))

// DayWeek.
val factDayWeek = fsDayWeek[email] ?: Array(7) { 0 }
// The value is numbered from 1 (Monday) to 7 (Sunday).
factDayWeek[dateTime.dayOfWeek.value - 1] += 1
fsDayWeek[email] = factDayWeek

// DayTime.
val factDayTime = fsDayTime[email] ?: Array(24) { 0 }
// Hour from 0 to 23.
factDayTime[dateTime.hour] += 1
fsDayTime[email] = factDayTime

// RepoDateStart.
fsRepoDateStart[email] = timestamp

// RepoDateEnd.
if ((fsRepoDateEnd[email] ?: -1) == -1L) {
fsRepoDateEnd[email] = timestamp
}

// RepoTeamSize.
fsRepoTeamSize.add(email)
}, onError, { // OnComplete.
.subscribe(onNext, onError, { // OnComplete.
try {
postFactsToServer(createFacts())
} catch (e: Throwable) {
Expand All @@ -77,25 +59,79 @@ class FactHasher(private val serverRepo: Repo = Repo(),
})
}

private val onNext: (Commit) -> Unit = { commit ->
// Calculate facts.
val email = commit.author.email
val timestamp = commit.dateTimestamp
val dateTime = LocalDateTime.ofEpochSecond(timestamp, 0,
ZoneOffset.ofTotalSeconds(commit.dateTimeZoneOffset * 60))

// DayWeek.
val factDayWeek = fsDayWeek[email] ?: Array(7) { 0 }
// The value is numbered from 1 (Monday) to 7 (Sunday).
factDayWeek[dateTime.dayOfWeek.value - 1] += 1
fsDayWeek[email] = factDayWeek

// DayTime.
val factDayTime = fsDayTime[email] ?: Array(24) { 0 }
// Hour from 0 to 23.
factDayTime[dateTime.hour] += 1
fsDayTime[email] = factDayTime

// RepoDateStart.
fsRepoDateStart[email] = timestamp

// RepoDateEnd.
if ((fsRepoDateEnd[email] ?: -1) == -1L) {
fsRepoDateEnd[email] = timestamp
}

// Commits.
val numCommits = fsCommitNum[email]!! + 1
val numLinesCurrent = commit.numLinesAdded + commit.numLinesDeleted

fsCommitNum[email] = numCommits
fsCommitLineNumAvg[email] = calcIncAvg(fsCommitLineNumAvg[email]!!,
numLinesCurrent.toDouble(), numCommits.toLong())

val lines = commit.getAllAdded() + commit.getAllDeleted()
lines.forEachIndexed { index, line ->
fsLineLenAvg[email] = calcIncAvg(fsLineLenAvg[email]!!,
line.length.toDouble(), fsLineNum[email]!! + index + 1)
}
fsLineNum[email] = fsLineNum[email]!! + lines.size

fsLinesPerCommits[email]!![numCommits - 1] += lines.size
}

private fun createFacts(): List<Fact> {
val fs = mutableListOf<Fact>()
emails.forEach { email ->
val author = Author(email = email)
fsDayTime[email]?.forEachIndexed { hour, count -> if (count > 0) {
fs.add(Fact(serverRepo, FactCodes.COMMITS_DAY_TIME, hour,
fs.add(Fact(serverRepo, FactCodes.COMMIT_DAY_TIME, hour,
count.toString(), author))
}}
fsDayWeek[email]?.forEachIndexed { day, count -> if (count > 0) {
fs.add(Fact(serverRepo, FactCodes.COMMITS_DAY_WEEK, day,
fs.add(Fact(serverRepo, FactCodes.COMMIT_DAY_WEEK, day,
count.toString(), author))
}}
fs.add(Fact(serverRepo, FactCodes.REPO_DATE_START, 0,
fsRepoDateStart[email].toString(), author))
fs.add(Fact(serverRepo, FactCodes.REPO_DATE_END, 0,
fsRepoDateEnd[email].toString(), author))
fs.add(Fact(serverRepo, FactCodes.COMMIT_NUM, 0,
fsCommitNum[email].toString(), author))
fs.add(Fact(serverRepo, FactCodes.COMMIT_LINE_NUM_AVG, 0,
fsCommitLineNumAvg[email].toString(), author))
fs.add(Fact(serverRepo, FactCodes.LINE_NUM, 0,
fsLineNum[email].toString(), author))
fs.add(Fact(serverRepo, FactCodes.LINE_LEN_AVG, 0,
fsLineLenAvg[email].toString(), author))
addCommitsPerLinesFacts(fs, fsLinesPerCommits[email]!!, author)
}
fs.add(Fact(serverRepo, FactCodes.REPO_TEAM_SIZE, 0,
fsRepoTeamSize.size.toString()))
emails.size.toString()))
return fs
}

Expand All @@ -105,4 +141,55 @@ class FactHasher(private val serverRepo: Repo = Repo(),
Logger.info { "Sent ${facts.size} facts to server" }
}
}

/**
* Computes the average of a numerical sequence.
* Calculated numbers is never bigger than maximum element of sequence.
* No overflow due to summing of elements.
* @param prev previous value of average
* @param element new element of sequence
* @param count number of element in sequence
* @return new value of average with considering of new element
*/
private fun calcIncAvg(prev: Double, element: Double,
count: Long): Double {
return prev * (1 - 1.0 / count) + element / count
}

private fun addCommitsPerLinesFacts(fs: MutableList<Fact>,
linesPerCommits: Array<Int>,
author: Author) {
var max = linesPerCommits[0]
var min = linesPerCommits[0]
for (lines in linesPerCommits) {
if (lines > max) {
max = lines
}
if (lines < min) {
min = lines
}
}

val numBins = Math.min(10, max - min + 1)
val binSize = (max - min + 1) / numBins.toDouble()
val bins = Array(numBins) { 0 }
for (numLines in linesPerCommits) {
if (numLines == 0) {
continue
}

val binId = Math.floor((numLines - min) / binSize).toInt()
bins[binId]++
}

for ((binId, numCommits) in bins.withIndex()) {
if (numCommits == 0) {
continue
}

val numLines = Math.floor(min + binId * binSize).toInt()
fs.add(Fact(serverRepo, FactCodes.COMMIT_NUM_TO_LINE_NUM,
numLines, numCommits.toString(), author))
}
}
}
2 changes: 1 addition & 1 deletion src/main/kotlin/app/hashers/RepoHasher.kt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class RepoHasher(private val localRepo: LocalRepo, private val api: Api,
.publish()
CommitHasher(serverRepo, api, rehashes, filteredEmails)
.updateFromObservable(observable, onError)
FactHasher(serverRepo, api, filteredEmails)
FactHasher(serverRepo, api, rehashes, filteredEmails)
.updateFromObservable(observable, onError)

// Start and synchronously wait until all subscribers complete.
Expand Down
8 changes: 8 additions & 0 deletions src/main/kotlin/app/model/Commit.kt
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,12 @@ data class Commit(
override fun hashCode(): Int {
return rehash.hashCode()
}

fun getAllAdded(): List<String> {
return diffs.map { it.getAllAdded() }.flatten()
}

fun getAllDeleted(): List<String> {
return diffs.map { it.getAllDeleted() }.flatten()
}
}
Loading