diff --git a/build.gradle b/build.gradle index 84d929b2..463927bd 100644 --- a/build.gradle +++ b/build.gradle @@ -94,12 +94,7 @@ dependencies { compile 'com.github.kittinunf.fuel:fuel-rxjava:1.11.0' compile 'org.eclipse.jgit:org.eclipse.jgit:4.9.0.201710071750-r' compile 'org.slf4j:slf4j-nop:1.7.2' - compile 'org.jpmml:pmml-evaluator:1.3.9' compile 'io.sentry:sentry:1.6.0' - compile 'net.razorvine:pyrolite:4.19' - compile 'org.jpmml:jpmml-converter:1.2.6' - - compile name: 'jpmml-sklearn-1.3-SNAPSHOT' testCompile 'org.jetbrains.kotlin:kotlin-test' testCompile 'org.jetbrains.spek:spek-api:1.1.5' diff --git a/libs/jpmml-sklearn-1.3-SNAPSHOT.jar b/libs/jpmml-sklearn-1.3-SNAPSHOT.jar deleted file mode 100644 index c3c36381..00000000 Binary files a/libs/jpmml-sklearn-1.3-SNAPSHOT.jar and /dev/null differ diff --git a/src/main/kotlin/app/extractors/Classifier.kt b/src/main/kotlin/app/extractors/Classifier.kt index c93d919c..7c261ced 100644 --- a/src/main/kotlin/app/extractors/Classifier.kt +++ b/src/main/kotlin/app/extractors/Classifier.kt @@ -4,7 +4,6 @@ package app.extractors import app.ModelsProtos -import com.google.common.collect.Lists import com.google.protobuf.InvalidProtocolBufferException import java.security.InvalidParameterException @@ -20,7 +19,7 @@ class Classifier { tokens = proto.tokensList libraries = proto.librariesList idf = tokens.zip(proto.idfList).toMap() - weights = libraries.zip(Lists.partition(proto.weightsList, tokens.size) + weights = libraries.zip(proto.weightsList.partition(tokens.size) .map {it: List -> tokens.zip(it).toMap()}).toMap() biases = libraries.zip(proto.biasesList).toMap() } @@ -51,4 +50,10 @@ class Classifier { fun getCategories(): List { return libraries } + + private fun List.partition(size: Int): List> { + return this.withIndex() + .groupBy { it.index / size } + .map { group -> group.value.map { it.value } } + } } diff --git a/src/main/kotlin/app/extractors/CppExtractor.kt b/src/main/kotlin/app/extractors/CppExtractor.kt index 5739eeb8..2e40cb6f 100644 --- a/src/main/kotlin/app/extractors/CppExtractor.kt +++ b/src/main/kotlin/app/extractors/CppExtractor.kt @@ -14,6 +14,8 @@ class CppExtractor : ExtractorInterface { val evaluator by lazy { ExtractorInterface.getLibraryClassifier(LANGUAGE_NAME) } + val MULTI_IMPORT_TO_LIB = + ExtractorInterface.getMultipleImportsToLibraryMap(LANGUAGE_NAME) } override fun extract(files: List): List { @@ -33,7 +35,8 @@ class CppExtractor : ExtractorInterface { } } - return imports.toList() + val libraries = imports.map { MULTI_IMPORT_TO_LIB.getOrDefault(it, it) } + return libraries } override fun tokenize(line: String): List { diff --git a/src/main/kotlin/app/extractors/ExtractorInterface.kt b/src/main/kotlin/app/extractors/ExtractorInterface.kt index f0b6ae31..bb67495f 100644 --- a/src/main/kotlin/app/extractors/ExtractorInterface.kt +++ b/src/main/kotlin/app/extractors/ExtractorInterface.kt @@ -8,19 +8,7 @@ import app.BuildConfig import app.Logger import app.model.DiffFile import app.model.CommitStats -import org.dmg.pmml.FieldName -import org.dmg.pmml.PMML -import org.jpmml.evaluator.Evaluator -import org.jpmml.evaluator.FieldValue -import org.jpmml.evaluator.ModelEvaluatorFactory -import org.jpmml.evaluator.ProbabilityDistribution -import org.jpmml.model.PMMLUtil -import org.jpmml.sklearn.PickleUtil -import sklearn.pipeline.Pipeline -import sklearn2pmml.PMMLPipeline import java.io.InputStream -import org.jpmml.sklearn.CompressedInputStreamStorage -import java.io.FileInputStream import java.io.FileOutputStream import java.nio.file.Files import java.nio.file.Paths @@ -31,7 +19,6 @@ import java.io.File interface ExtractorInterface { companion object { private val librariesCache = hashMapOf>() - private val evaluatorsCache = hashMapOf() private val classifiersCache = hashMapOf() private fun getResource(path: String): InputStream { @@ -49,6 +36,15 @@ interface ExtractorInterface { return libraries } + fun getMultipleImportsToLibraryMap(name: String): Map { + val importToLibrary = getResource("data/imports/$name.txt") + .bufferedReader().readLines().map { + val mapping = it.split(":") + Pair(mapping[0], mapping[1]) + }.toMap() + return importToLibrary + } + private fun downloadModel(name: String, outputDir: String) { val url = BuildConfig.LIBRARY_MODELS_URL + "$name.pb" val outputPath = "$outputDir/$name.pb" @@ -227,4 +223,5 @@ interface ExtractorInterface { val lineLibraries = fileLibraries.filter { it in selectedCategories } return lineLibraries } + } diff --git a/src/main/resources/data/imports/cpp.txt b/src/main/resources/data/imports/cpp.txt new file mode 100644 index 00000000..f8b23502 --- /dev/null +++ b/src/main/resources/data/imports/cpp.txt @@ -0,0 +1,4 @@ +opencv:opencv +opencv2:opencv +protobuf:protobuf +pb:protobuf \ No newline at end of file diff --git a/src/test/kotlin/test/tests/extractors/ExtractorTest.kt b/src/test/kotlin/test/tests/extractors/ExtractorTest.kt index 30397edc..ea61a440 100644 --- a/src/test/kotlin/test/tests/extractors/ExtractorTest.kt +++ b/src/test/kotlin/test/tests/extractors/ExtractorTest.kt @@ -4,6 +4,7 @@ package test.tests.extractors import app.extractors.* +import junit.framework.TestCase.assertTrue import org.jetbrains.spek.api.Spek import org.jetbrains.spek.api.dsl.given import org.jetbrains.spek.api.dsl.it @@ -13,7 +14,7 @@ fun assertExtractsLineLibraries(expectedLibrary: String, actualLine: String, extractor: ExtractorInterface) { val actualLineLibraries = extractor.getLineLibraries(actualLine, listOf(expectedLibrary)) - assert(expectedLibrary in actualLineLibraries) + assertTrue(expectedLibrary in actualLineLibraries) } fun assertExtractsNoLibraries(actualLine: String, @@ -23,7 +24,14 @@ fun assertExtractsNoLibraries(actualLine: String, assertEquals(listOf(), actualLineLibraries) } +fun assertExtractsImport(expectedImport: String, actualLine: String, + extractor: ExtractorInterface) { + val actualLineImport = extractor.extractImports(listOf(actualLine)) + assertTrue(expectedImport in actualLineImport) +} + class ExtractorTest : Spek({ + given(" code line contains library code" ) { it("python extractor extracts the library") { val line = "with tf.Session() as sess" @@ -154,4 +162,20 @@ class ExtractorTest : Spek({ assertExtractsNoLibraries(line, CExtractor()) } } + + given("import name.h") { + it("imports name") { + assertExtractsImport("protobuf", "#include \"protobuf.h\"", CppExtractor()) + } + } + + given("import library with multiple ways to import") { + it("imports in both cases") { + var lib = "opencv" + val line1 = "#include \"opencv/module/header.h\"" + assertExtractsImport(lib, line1, CppExtractor()) + val line2 = "#include \"opencv2/module/header.h\"" + assertExtractsImport(lib, line2, CppExtractor()) + } + } })