Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,7 @@ dependencies {
compile 'com.github.kittinunf.fuel:fuel-rxjava:1.11.0'
compile 'org.eclipse.jgit:org.eclipse.jgit:4.9.0.201710071750-r'
compile 'org.slf4j:slf4j-nop:1.7.2'
compile 'org.jpmml:pmml-evaluator:1.3.9'
compile 'io.sentry:sentry:1.6.0'
compile 'net.razorvine:pyrolite:4.19'
compile 'org.jpmml:jpmml-converter:1.2.6'

compile name: 'jpmml-sklearn-1.3-SNAPSHOT'

testCompile 'org.jetbrains.kotlin:kotlin-test'
testCompile 'org.jetbrains.spek:spek-api:1.1.5'
Expand Down
Binary file removed libs/jpmml-sklearn-1.3-SNAPSHOT.jar
Binary file not shown.
9 changes: 7 additions & 2 deletions src/main/kotlin/app/extractors/Classifier.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
package app.extractors

import app.ModelsProtos
import com.google.common.collect.Lists
import com.google.protobuf.InvalidProtocolBufferException
import java.security.InvalidParameterException

Expand All @@ -20,7 +19,7 @@ class Classifier {
tokens = proto.tokensList
libraries = proto.librariesList
idf = tokens.zip(proto.idfList).toMap()
weights = libraries.zip(Lists.partition(proto.weightsList, tokens.size)
weights = libraries.zip(proto.weightsList.partition(tokens.size)
.map {it: List<Float> -> tokens.zip(it).toMap()}).toMap()
biases = libraries.zip(proto.biasesList).toMap()
}
Expand Down Expand Up @@ -51,4 +50,10 @@ class Classifier {
fun getCategories(): List<String> {
return libraries
}

private fun <T> List<T>.partition(size: Int): List<List<T>> {
return this.withIndex()
.groupBy { it.index / size }
.map { group -> group.value.map { it.value } }
}
}
5 changes: 4 additions & 1 deletion src/main/kotlin/app/extractors/CppExtractor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ class CppExtractor : ExtractorInterface {
val evaluator by lazy {
ExtractorInterface.getLibraryClassifier(LANGUAGE_NAME)
}
val MULTI_IMPORT_TO_LIB =
ExtractorInterface.getMultipleImportsToLibraryMap(LANGUAGE_NAME)
}

override fun extract(files: List<DiffFile>): List<CommitStats> {
Expand All @@ -33,7 +35,8 @@ class CppExtractor : ExtractorInterface {
}
}

return imports.toList()
val libraries = imports.map { MULTI_IMPORT_TO_LIB.getOrDefault(it, it) }
return libraries
}

override fun tokenize(line: String): List<String> {
Expand Down
23 changes: 10 additions & 13 deletions src/main/kotlin/app/extractors/ExtractorInterface.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,7 @@ import app.BuildConfig
import app.Logger
import app.model.DiffFile
import app.model.CommitStats
import org.dmg.pmml.FieldName
import org.dmg.pmml.PMML
import org.jpmml.evaluator.Evaluator
import org.jpmml.evaluator.FieldValue
import org.jpmml.evaluator.ModelEvaluatorFactory
import org.jpmml.evaluator.ProbabilityDistribution
import org.jpmml.model.PMMLUtil
import org.jpmml.sklearn.PickleUtil
import sklearn.pipeline.Pipeline
import sklearn2pmml.PMMLPipeline
import java.io.InputStream
import org.jpmml.sklearn.CompressedInputStreamStorage
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.file.Files
import java.nio.file.Paths
Expand All @@ -31,7 +19,6 @@ import java.io.File
interface ExtractorInterface {
companion object {
private val librariesCache = hashMapOf<String, Set<String>>()
private val evaluatorsCache = hashMapOf<String, Evaluator>()
private val classifiersCache = hashMapOf<String, Classifier>()

private fun getResource(path: String): InputStream {
Expand All @@ -49,6 +36,15 @@ interface ExtractorInterface {
return libraries
}

fun getMultipleImportsToLibraryMap(name: String): Map<String, String> {
val importToLibrary = getResource("data/imports/$name.txt")
.bufferedReader().readLines().map {
val mapping = it.split(":")
Pair(mapping[0], mapping[1])
}.toMap()
return importToLibrary
}

private fun downloadModel(name: String, outputDir: String) {
val url = BuildConfig.LIBRARY_MODELS_URL + "$name.pb"
val outputPath = "$outputDir/$name.pb"
Expand Down Expand Up @@ -227,4 +223,5 @@ interface ExtractorInterface {
val lineLibraries = fileLibraries.filter { it in selectedCategories }
return lineLibraries
}

}
4 changes: 4 additions & 0 deletions src/main/resources/data/imports/cpp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
opencv:opencv
opencv2:opencv
protobuf:protobuf
pb:protobuf
26 changes: 25 additions & 1 deletion src/test/kotlin/test/tests/extractors/ExtractorTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package test.tests.extractors

import app.extractors.*
import junit.framework.TestCase.assertTrue
import org.jetbrains.spek.api.Spek
import org.jetbrains.spek.api.dsl.given
import org.jetbrains.spek.api.dsl.it
Expand All @@ -13,7 +14,7 @@ fun assertExtractsLineLibraries(expectedLibrary: String, actualLine: String,
extractor: ExtractorInterface) {
val actualLineLibraries =
extractor.getLineLibraries(actualLine, listOf(expectedLibrary))
assert(expectedLibrary in actualLineLibraries)
assertTrue(expectedLibrary in actualLineLibraries)
}

fun assertExtractsNoLibraries(actualLine: String,
Expand All @@ -23,7 +24,14 @@ fun assertExtractsNoLibraries(actualLine: String,
assertEquals(listOf(), actualLineLibraries)
}

fun assertExtractsImport(expectedImport: String, actualLine: String,
extractor: ExtractorInterface) {
val actualLineImport = extractor.extractImports(listOf(actualLine))
assertTrue(expectedImport in actualLineImport)
}

class ExtractorTest : Spek({

given(" code line contains library code" ) {
it("python extractor extracts the library") {
val line = "with tf.Session() as sess"
Expand Down Expand Up @@ -154,4 +162,20 @@ class ExtractorTest : Spek({
assertExtractsNoLibraries(line, CExtractor())
}
}

given("import name.h") {
it("imports name") {
assertExtractsImport("protobuf", "#include \"protobuf.h\"", CppExtractor())
}
}

given("import library with multiple ways to import") {
it("imports in both cases") {
var lib = "opencv"
val line1 = "#include \"opencv/module/header.h\""
assertExtractsImport(lib, line1, CppExtractor())
val line2 = "#include \"opencv2/module/header.h\""
assertExtractsImport(lib, line2, CppExtractor())
}
}
})