diff --git a/data/libraries/cs_libraries.txt b/data/libraries/cs_libraries.txt new file mode 100644 index 00000000..65781a89 --- /dev/null +++ b/data/libraries/cs_libraries.txt @@ -0,0 +1,45 @@ +Dapper +Microsoft.AspNet.SignalR +Nancy +Microsoft.EntityFrameworkCore +AutoMapper +Newtonsoft.Json +Microsoft.AspNetCore.Mvc +RestSharp +Microsoft.Xna.Framework +OpenRA +Microsoft.Bot +ServiceStack +PushSharp +CefSharp +MaterialDesign* +ReactiveUI +Abp +Orleans +Hangfire +Humanizer +Polly +NLog +FluentValidation +Avalonia +Akka +StackExchange.Redis +Senparc +BenchmarkDotNet +MvvmCross +EventStore +System.Reactive +Accord +Quartz +IdentityServer3 +Moq +Swashbuckle +Ninject +Refit +Stateless +ImageProcessor +Prism +MongoDB.Driver +LanguageExt +Massive +LiteDB \ No newline at end of file diff --git a/data/libraries/java_libraries.txt b/data/libraries/java_libraries.txt new file mode 100644 index 00000000..a9cd6510 --- /dev/null +++ b/data/libraries/java_libraries.txt @@ -0,0 +1,255 @@ +io.reactivex +retrofit2 +okhttp3 +com.google.common +butterknife +com.github.mikephil.charting +com.bumptech.glide +com.squareup.leakcanary +org.springframework +org.greenrobot.eventbus +com.nostra13.universalimageloader +org.springframework.boot +com.airbnb.lottie +com.google.zxing +com.squareup.picasso +io.reactivex +com.facebook.fresco +com.blankj.utilcode +com.badlogic.gdx +com.github.chrisbanes.photoview +io.netty +com.afollestad.material-dialogs +com.netflix.hystrix +com.jeremyfeinstein.slidingmenu +com.alibaba.dubbo +com.alibaba.fastjson +com.loopj.android.http +com.tencent.tinker +org.androidannotations +com.viewpagerindicator +com.chad.library +com.daimajia.swipelayout +org.greenrobot.greendao +com.gc.materialdesign +com.handmark.pulltorefresh +com.facebook.stetho +io.realm +com.alibaba.druid +com.orhanobut.logger +org.deeplearning4j +com.google.android.agera +spark +io.vertx +com.airbnb.android.react.lottie +zipkin +com.facebook.presto +dagger +junit +com.lmax.disruptor +com.jakewharton.rxbinding2 +org.physical_web +org.apache.kafka +com.google.inject +com.google.auto +io.druid +org.mockito +com.zaxxer.hikari +com.codahale.metrics +us.codecraft.webmagic +com.squareup.otto +okio +org.apache.hadoop +opennlp +org.apache.velocity +freemarker +com.google.gwt +org.hamcrest +org.hornetq +org.eclipse.hudson +org.quartz +gnu.trove +org.dozer +com.googlecode.jmapper +org.mapstruct +org.modelmapper +ma.glasnost.orika +fr.xebia.extras.selma +net.sf.cglib +com.github.benmanes.caffeine +com.github.sviperll.adt4j +com.google.auto.common +org.inferred.freebuilder +com.squareup.javapoet +io.airlift.airline +com.typesafe.config +info.macias.kaconf +org.aeonbits.owner +org.jacop +org.apache.commons.csv +com.fasterxml.jackson.dataformat.csv +com.univocity.parsers +net.openhft.chronicle +org.exist +com.vladmihalcea.flexypool +com.zaxxer.hikari +redis.clients.jedis +io.searchbox +com.justinsb.etcd +org.jinq +ch.vorburger.mariadb4j +com.facebook.presto +io.realm +org.redisson +io.requery +com.speedment +org.apache.avro +com.leansoft.bigqueue +com.google.protobuf +uk.co.real_logic.sbe +com.squareup.tape2 +com.squareup.wire +com.hypotemoose.cal +net.fortuna.ical4j +org.joda.time +org.threeten.bp +net.time4j +org.codejargon.feather +com.netflix.governator +com.google.inject +org.zalando.fauxpas +org.hotswap.agent +com.github.javaparser +com.github.javaparser.symbolsolver +org.springsource +com.github.mtakaki.dropwizard.circuitbreaker +net.jodah.failsafe +com.netflix.hystrix +io.github.resilience4j +com.netflix.zuul +com.badlogicgames.packr +org.skife.waffles +janala +com.aol.cyclops +org.derive4j +org.jooq.lambda +com.codepoetics.protonpack +one.util.streamex +com.github.davidmoten.geo +com.graphhopper +org.mapsforge +org.locationtech.spatial4j +org.agrona +org.eclipse.collections +org.jctools +com.koloboke +org.asynchttpclient +feign +restql.core +com.netflix.ribbon +org.zalando.riptide +com.github.jsonldjava +com.google.code.siren4j +org.imgscalr +net.sourceforge.tess4j +net.coobird.thumbnailator +com.twelvemonkeys +com.google.zxing +com.google.gson +com.zaxxer.hikari.json +org.zalando.jackson.datatype.money +com.cedarsoftware.util.io +com.bluelinelabs.logansquare +com.squareup.moshi +com.alibaba.fastjson +com.bazaarvoice.jolt +com.jayway.jsonpath +org.jsfr.json +org.zalando.logbook +org.zalando.tracer +jsat +com.cloudera.oryx +io.aeron +zmq +org.zalando.nakadi +org.jivesoftware.smack +com.googlecode.cqengine +net.jodah.failsafe +io.github.resilience4j.circuitbreaker +me.ramswaroop.jbot +com.google.common.jimfs +com.googlecode.lanterna +net.jodah.typetools +com.ecwid.consul.v1 +com.netflix.eureka +org.automon +net.bull.javamelody +com.googlecode.jmxtrans +nudge4j +com.navercorp.pinpoint +org.stagemonitor +com.palantir.opensource.sysmon +org.bytedeco.javacpp +com.sun.jna +jnr.ffi +edu.illinois.cs.cogcomp +comsat +com.twitter.finagle +io.grpc +com.facebook.nifty +de.slub.urn +org.simpleflatmapper +org.xhtmlrenderer +com.networknt +org.orienteer +org.reactivestreams +io.reactivex +com.linkedin.restli +org.restexpress +org.restlet +com.hendrix.erdos +org.graphstream +org.jgrapht +com.mxgraph +edu.mines.jtk +tech.tablesaw +org.hdiv +io.jsonwebtoken +org.keyczar +me.gosimple.nbvcxz +org.pac4j +com.google.flatbuffers +org.nustaq.serialization +com.esotericsoftware.kryo +org.msgpack +org.nanohttpd +com.tngtech.archunit +org.awaitility +com.squareup.burst +net.jodah.concurrentunit +cucumber +lv.ctco.cukes +br.com.six2six.fixturefactory +com.galenframework +j8spec +io.codearte.jfairy +com.tngtech.java.junit.dataprovider +com.insightfullogic.lambdabehave +org.mockito +com.github.dreamhead.moco +org.mutabilitydetector +org.powermock +guru.nidi.ramltester +com.carrotsearch.randomizedtesting +io.restassured +org.spockframework +org.testcontainers +com.google.common.truth +com.dexvis.dex +org.gephi +com.google.common +ru.lanwen.verbalregex +io.minio +com.github.underscore +edu.uci.ics.crawler4j +com.blade \ No newline at end of file diff --git a/data/libraries/js_libraries.txt b/data/libraries/js_libraries.txt new file mode 100644 index 00000000..c179587d --- /dev/null +++ b/data/libraries/js_libraries.txt @@ -0,0 +1,64 @@ +react +d3 +vue +angular +jquery +meteor +socketio +three +express +redux +moment +material-ui +jquery-file-upload +lodash +axios +react-router +async +underscore +fullpage +immutable +pdf +ember +koa +request +postcss +angular +material +fetch +pjax +pug +less +bootstrap +flux +q +mongoose +mocha +handlebars +riot +tesseract +passport +browserify +mustache +sequelize +react-bootstrap +jquery-ui +jquery-mobile +mysql +datepicker +smartcrop +normalizr +vuex +react-redux +knockout +wtf +jquery-cookie +redis +winston +validate +gql +graphql +uglify +http-proxy +purify +ng-file-upload \ No newline at end of file diff --git a/src/main/kotlin/app/extractors/CExtractor.kt b/src/main/kotlin/app/extractors/CExtractor.kt new file mode 100644 index 00000000..10d1cc47 --- /dev/null +++ b/src/main/kotlin/app/extractors/CExtractor.kt @@ -0,0 +1,35 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class CExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "c" + val FILE_EXTS = listOf("c") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val regex = Regex("""#include\s+["<](\w+)[/\w+]*\.\w+[">]""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val lineLib = res.groupValues.last() + libraries.add(lineLib) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/CSharpExtractor.kt b/src/main/kotlin/app/extractors/CSharpExtractor.kt new file mode 100644 index 00000000..5ff978c9 --- /dev/null +++ b/src/main/kotlin/app/extractors/CSharpExtractor.kt @@ -0,0 +1,46 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile +import java.io.File + +class CSharpExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "cs" + val FILE_EXTS = listOf("cs") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + // TODO(anatoly): Load file statically. + val csLibraries = File("data/libraries/cs_libraries.txt") + .inputStream().bufferedReader() + .readLines() + .toSet() + + val regex = Regex("""using\s+(\w+[.\w+]*)""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val importedName = res.groupValues[1] + csLibraries.forEach { library -> + if (importedName.startsWith(library)) { + libraries.add(library) + } + } + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/CommonExtractor.kt b/src/main/kotlin/app/extractors/CommonExtractor.kt deleted file mode 100644 index 6a02b2a5..00000000 --- a/src/main/kotlin/app/extractors/CommonExtractor.kt +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2017 Sourcerer Inc. All Rights Reserved. -// Author: Anatoly Kislov (anatoly@sourcerer.io) - -package app.extractors - -import app.model.DiffFile -import app.model.CommitStats - -/** - * Common extractor that get basic stats and assigns it to specified language. - */ -class CommonExtractor(val language: String) : ExtractorInterface { - - override fun extract(files: List): List { - val stats = mutableListOf() - - stats.add(CommitStats( - numLinesAdded = files.fold(0) { total, file -> - total + file.getAllAdded().size }, - numLinesDeleted = files.fold(0) { total, file -> - total + file.getAllDeleted().size }, - type = Extractor.TYPE_LANGUAGE, - tech = language)) - - return stats - } -} diff --git a/src/main/kotlin/app/extractors/CppExtractor.kt b/src/main/kotlin/app/extractors/CppExtractor.kt new file mode 100644 index 00000000..60686777 --- /dev/null +++ b/src/main/kotlin/app/extractors/CppExtractor.kt @@ -0,0 +1,35 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class CppExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "cpp" + val FILE_EXTS = listOf("cc", "cpp", "cxx", "c++") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val regex = Regex("""#include\s+["<](\w+)[/\w+]*\.\w+[">]""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val lineLib = res.groupValues.last() + libraries.add(lineLib) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/EmptyExtractor.kt b/src/main/kotlin/app/extractors/EmptyExtractor.kt index 2634d712..eeb4cb11 100644 --- a/src/main/kotlin/app/extractors/EmptyExtractor.kt +++ b/src/main/kotlin/app/extractors/EmptyExtractor.kt @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) package app.extractors @@ -10,4 +11,8 @@ class EmptyExtractor : ExtractorInterface { override fun extract(files: List): List { return listOf() } + + override fun extractImports(fileContent: List): List { + return listOf() + } } diff --git a/src/main/kotlin/app/extractors/Extractor.kt b/src/main/kotlin/app/extractors/Extractor.kt index 5a9ec2da..d90b537f 100644 --- a/src/main/kotlin/app/extractors/Extractor.kt +++ b/src/main/kotlin/app/extractors/Extractor.kt @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) package app.extractors @@ -7,39 +8,25 @@ import app.model.CommitStats import app.model.DiffFile class Extractor : ExtractorInterface { - companion object Constants { + companion object { val TYPE_LANGUAGE = 1 val TYPE_KEYWORD = 2 - val SEPARATOR = ">" - - val JAVASCRIPT_FILE_EXTS = listOf("js") - val JAVA_FILE_EXTS = listOf("java") - val PYTHON_FILE_EXTS = listOf("py", "py3") - val RUBY_FILE_EXTS = listOf("rb", "rbw") - val PHP_FILE_EXTS = listOf("php", "phtml", "php4", "php3", "php5", - "phps") - val C_FILE_EXTS = listOf("c") - val CPP_FILE_EXTS = listOf("cc", "cpp", "cxx", "c++") - val CS_FILE_EXTS = listOf("cs") - val GO_FILE_EXTS = listOf("go") - val OC_FILE_EXTS = listOf("h", "m", "mm") - val SWIFT_FILE_EXTS = listOf("swift") } fun create(extension: String): ExtractorInterface { return when (extension) { - in JAVASCRIPT_FILE_EXTS -> CommonExtractor("js") - in JAVA_FILE_EXTS -> JavaExtractor() - in PYTHON_FILE_EXTS -> CommonExtractor("python") - in RUBY_FILE_EXTS -> CommonExtractor("ruby") - in PHP_FILE_EXTS -> CommonExtractor("php") - in C_FILE_EXTS -> CommonExtractor("c") - in CPP_FILE_EXTS -> CommonExtractor("cpp") - in CS_FILE_EXTS -> CommonExtractor("cs") - in GO_FILE_EXTS -> CommonExtractor("go") - in OC_FILE_EXTS -> CommonExtractor("oc") - in SWIFT_FILE_EXTS -> CommonExtractor("swift") + in JavascriptExtractor.FILE_EXTS -> JavascriptExtractor() + in JavaExtractor.FILE_EXTS -> JavaExtractor() + in PythonExtractor.FILE_EXTS -> PythonExtractor() + in RubyExtractor.FILE_EXTS -> RubyExtractor() + in PhpExtractor.FILE_EXTS -> PhpExtractor() + in CExtractor.FILE_EXTS -> CExtractor() + in CppExtractor.FILE_EXTS -> CppExtractor() + in CSharpExtractor.FILE_EXTS -> CSharpExtractor() + in GoExtractor.FILE_EXTS -> GoExtractor() + in ObjectiveCExtractor.FILE_EXTS -> ObjectiveCExtractor() + in SwiftExtractor.FILE_EXTS -> SwiftExtractor() else -> EmptyExtractor() } } diff --git a/src/main/kotlin/app/extractors/ExtractorInterface.kt b/src/main/kotlin/app/extractors/ExtractorInterface.kt index 75ad93b3..f2b8b9c7 100644 --- a/src/main/kotlin/app/extractors/ExtractorInterface.kt +++ b/src/main/kotlin/app/extractors/ExtractorInterface.kt @@ -1,5 +1,6 @@ // Copyright 2017 Sourcerer Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) package app.extractors @@ -7,5 +8,25 @@ import app.model.DiffFile import app.model.CommitStats interface ExtractorInterface { - fun extract(files: List): List + fun extract(files: List): List { + files.map { file -> + file.old.imports = extractImports(file.old.content) + file.new.imports = extractImports(file.new.content) + file + } + + return files.filter { file -> file.language.isNotBlank() } + .groupBy { file -> file.language } + .map { (language, files) -> CommitStats( + numLinesAdded = files.fold(0) { total, file -> + total + file.getAllAdded().size }, + numLinesDeleted = files.fold(0) { total, file -> + total + file.getAllDeleted().size }, + type = Extractor.TYPE_LANGUAGE, + tech = language)} + } + + fun extractImports(fileContent: List): List { + return listOf() + } } diff --git a/src/main/kotlin/app/extractors/GoExtractor.kt b/src/main/kotlin/app/extractors/GoExtractor.kt new file mode 100644 index 00000000..ec98cfee --- /dev/null +++ b/src/main/kotlin/app/extractors/GoExtractor.kt @@ -0,0 +1,44 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class GoExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "go" + val FILE_EXTS = listOf("go") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val singleImportRegex = Regex("""import\s+"(\w+)"""") + fileContent.forEach { + val res = singleImportRegex.find(it) + if (res != null) { + val lineLib = res.groupValues.last() + libraries.add(lineLib) + } + } + val multipleImportRegex = Regex("""import[\s\t\n]+\((.+?)\)""", + RegexOption.DOT_MATCHES_ALL) + val contentJoined = fileContent.joinToString(separator = "") + multipleImportRegex.findAll(contentJoined).forEach { matchResult -> + libraries.addAll(matchResult.groupValues.last() + .split(Regex("""(\t+|\n+|\s+)""")) + .filter { it.isNotEmpty() } + .map { it -> it.replace("\"", "") }) + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/JavaExtractor.kt b/src/main/kotlin/app/extractors/JavaExtractor.kt index 82283145..6fb4bf25 100644 --- a/src/main/kotlin/app/extractors/JavaExtractor.kt +++ b/src/main/kotlin/app/extractors/JavaExtractor.kt @@ -1,12 +1,18 @@ // Copyright 2017 Sourcerer Inc. All Rights Reserved. // Author: Anatoly Kislov (anatoly@sourcerer.io) +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) package app.extractors import app.model.CommitStats import app.model.DiffFile +import java.io.File class JavaExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "java" + val FILE_EXTS = listOf("java") + } val NAME = "Java" val KEYWORDS = listOf("abstract", "continue", "for", "new", "switch", @@ -19,7 +25,9 @@ class JavaExtractor : ExtractorInterface { "volatile", "const", "float", "native", "super", "while") override fun extract(files: List): List { - val stats = mutableListOf() + files.map { file -> file.language = GoExtractor.LANGUAGE_NAME } + + val stats = super.extract(files).toMutableList() val added = files.fold(mutableListOf(), { total, file -> total.addAll(file.getAllAdded()) @@ -31,13 +39,6 @@ class JavaExtractor : ExtractorInterface { total }) - // Language stats. - stats.add(CommitStats( - numLinesAdded = added.size, - numLinesDeleted = deleted.size, - type = Extractor.TYPE_LANGUAGE, - tech = NAME)) - // Keywords stats. // TODO(anatoly): ANTLR parsing. KEYWORDS.forEach { keyword -> @@ -54,4 +55,29 @@ class JavaExtractor : ExtractorInterface { return stats } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + // TODO(anatoly): Load file statically. + val javaLibraries = File("data/libraries/java_libraries.txt") + .inputStream().bufferedReader() + .readLines() + .toSet() + + val regex = Regex("""import\s+(\w+[.\w+]*)""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val importedName = res.groupValues[1] + javaLibraries.forEach { library -> + if (importedName.startsWith(library)) { + libraries.add(library) + } + } + } + } + + return libraries.toList() + } } diff --git a/src/main/kotlin/app/extractors/JavascriptExtractor.kt b/src/main/kotlin/app/extractors/JavascriptExtractor.kt new file mode 100644 index 00000000..914533da --- /dev/null +++ b/src/main/kotlin/app/extractors/JavascriptExtractor.kt @@ -0,0 +1,39 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile +import java.io.File + +class JavascriptExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "js" + val FILE_EXTS = listOf("js") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + // TODO(anatoly): Load file statically. + val jsLibraries = File("data/libraries/js_libraries.txt") + .inputStream().bufferedReader() + .readLines() + .toSet() + + val splitRegex = + Regex("""\s+|,|;|:|\\*|\n|\(|\)|\\[|]|\{|}|\+|=|\.|>|<|#|@|\$""") + val fileTokens = fileContent.joinToString(separator = " ") + .split(splitRegex) + libraries.addAll(fileTokens.filter { token -> token in jsLibraries }) + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt b/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt new file mode 100644 index 00000000..eecd0c32 --- /dev/null +++ b/src/main/kotlin/app/extractors/ObjectiveCExtractor.kt @@ -0,0 +1,39 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class ObjectiveCExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "objectivec" + val FILE_EXTS = listOf("h", "m", "mm") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val sharpImportIncludeRegex = + Regex("""#(import|include)\s+[">](\w+)[/\w+]*\.\w+[">]""") + val atImportRegex = Regex("""@import\s+(\w+)""") + + fileContent.forEach { + val res = sharpImportIncludeRegex.findAll(it) + + atImportRegex.findAll(it) + if (res.toList().isNotEmpty()) { + val lineLib = res.toList().map { it.groupValues }.last().last() + libraries.add(lineLib) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/PhpExtractor.kt b/src/main/kotlin/app/extractors/PhpExtractor.kt new file mode 100644 index 00000000..60b91215 --- /dev/null +++ b/src/main/kotlin/app/extractors/PhpExtractor.kt @@ -0,0 +1,37 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class PhpExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "php" + val FILE_EXTS = listOf("php", "phtml", "php4", "php3", "php5", "phps") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val useRegex = Regex("""use\s+(\w+)[\\\w+]*""") + val requireIncludeRegex = Regex("""(require|require_once|include|""" + + """"include_once)\s*[(]?'(\w+)[.\w+]*'[)]?""") + fileContent.forEach { + val res = useRegex.findAll(it) + requireIncludeRegex.findAll(it) + if (res.toList().isNotEmpty()) { + val lineLib = res.toList().map { it.groupValues }.last().last() + libraries.add(lineLib) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/PythonExtractor.kt b/src/main/kotlin/app/extractors/PythonExtractor.kt new file mode 100644 index 00000000..604027f3 --- /dev/null +++ b/src/main/kotlin/app/extractors/PythonExtractor.kt @@ -0,0 +1,37 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class PythonExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "python" + val FILE_EXTS = listOf("py", "py3") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val regex = + Regex("""(from\s+(\w+)[.\w+]*\s+import|import\s+(\w+[,\s*\w+]*))""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val lineLibs = res.groupValues.last { it != "" } + .split(Regex(""",\s*""")) + libraries.addAll(lineLibs) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/RubyExtractor.kt b/src/main/kotlin/app/extractors/RubyExtractor.kt new file mode 100644 index 00000000..98adaf27 --- /dev/null +++ b/src/main/kotlin/app/extractors/RubyExtractor.kt @@ -0,0 +1,35 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class RubyExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "ruby" + val FILE_EXTS = listOf("rb", "rbw") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val regex = Regex("""(require\s+'(\w+)'|load\s+'(\w+)\.\w+')""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val lineLib = res.groupValues.last { it -> it != "" } + libraries.add(lineLib) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/extractors/SwiftExtractor.kt b/src/main/kotlin/app/extractors/SwiftExtractor.kt new file mode 100644 index 00000000..95d21848 --- /dev/null +++ b/src/main/kotlin/app/extractors/SwiftExtractor.kt @@ -0,0 +1,35 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Liubov Yaronskaya (lyaronskaya@sourcerer.io) +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.extractors + +import app.model.CommitStats +import app.model.DiffFile + +class SwiftExtractor : ExtractorInterface { + companion object { + val LANGUAGE_NAME = "swift" + val FILE_EXTS = listOf("swift") + } + + override fun extract(files: List): List { + files.map { file -> file.language = LANGUAGE_NAME } + return super.extract(files) + } + + override fun extractImports(fileContent: List): List { + val libraries = mutableSetOf() + + val regex = Regex("""import\s+(\w+)""") + fileContent.forEach { + val res = regex.find(it) + if (res != null) { + val lineLib = res.groupValues[1] + libraries.add(lineLib) + } + } + + return libraries.toList() + } +} diff --git a/src/main/kotlin/app/hashers/CommitHasher.kt b/src/main/kotlin/app/hashers/CommitHasher.kt index 38d8bc74..71bba8c7 100644 --- a/src/main/kotlin/app/hashers/CommitHasher.kt +++ b/src/main/kotlin/app/hashers/CommitHasher.kt @@ -8,8 +8,10 @@ import app.api.Api import app.config.Configurator import app.extractors.Extractor import app.model.Commit +import app.model.DiffContent import app.model.DiffEdit import app.model.DiffFile +import app.model.DiffRange import app.model.LocalRepo import app.model.Repo import app.utils.RepoHelper @@ -73,6 +75,7 @@ class CommitHasher(private val localRepo: LocalRepo, // OnNext }, { t -> // OnError Logger.error("Error while hashing: $t") + t.printStackTrace() }) } @@ -90,18 +93,16 @@ class CommitHasher(private val localRepo: LocalRepo, .map { diff -> val new = getContentByObjectId(diff.newId.toObjectId()) val old = getContentByObjectId(diff.oldId.toObjectId()) - val edits = formatter.toFileHeader(diff).toEditList() - val path = when (diff.changeType) { DiffEntry.ChangeType.DELETE -> diff.oldPath else -> diff.newPath } - DiffFile(path = path, - contentOld = old, - contentNew = new, - edits = edits.map { DiffEdit(it) }) + old = DiffContent(old, edits.map { edit -> + DiffRange(edit.beginA, edit.endA) }), + new = DiffContent(new, edits.map { edit -> + DiffRange(edit.beginB, edit.endB) })) } } } diff --git a/src/main/kotlin/app/model/DiffContent.kt b/src/main/kotlin/app/model/DiffContent.kt new file mode 100644 index 00000000..82cb7d1e --- /dev/null +++ b/src/main/kotlin/app/model/DiffContent.kt @@ -0,0 +1,14 @@ +package app.model + +class DiffContent( + val content: List = listOf(), + val ranges: List = listOf(), + var imports: List = listOf() +) { + fun getAllDiffs(): List { + return ranges.fold(mutableListOf()) { total, range -> + total.addAll(content.subList(range.start, range.end)) + total + } + } +} diff --git a/src/main/kotlin/app/model/DiffEdit.kt b/src/main/kotlin/app/model/DiffEdit.kt index 022cabd2..33bab111 100644 --- a/src/main/kotlin/app/model/DiffEdit.kt +++ b/src/main/kotlin/app/model/DiffEdit.kt @@ -6,13 +6,11 @@ package app.model import org.eclipse.jgit.diff.Edit /** - * Edit is partial change of file. [delStart] (inclusive) and [delEnd] - * (exclusive) specifies range of deleted lines in old content, [addStart] - * (inclusive) and [addEnd] (exlusive) specifies range of added lines instead - * of deleted lines. Made to decouple statistics classes from JGit. + * Edit is partial change of file. [del] specifies range of deleted lines in old + * content, [add] specifies range of added lines instead of deleted lines. + * Made to decouple statistics classes from JGit. */ -data class DiffEdit(val delStart: Int, val delEnd: Int, - val addStart: Int, val addEnd: Int) { - constructor(edit: Edit) : this(edit.beginA, edit.endA, - edit.beginB, edit.endB) +data class DiffEdit(val del: DiffRange, val add: DiffRange) { + constructor(edit: Edit) : this(DiffRange(edit.beginA, edit.endA), + DiffRange(edit.beginB, edit.endB)) } diff --git a/src/main/kotlin/app/model/DiffFile.kt b/src/main/kotlin/app/model/DiffFile.kt index 12df1a10..66a06a17 100644 --- a/src/main/kotlin/app/model/DiffFile.kt +++ b/src/main/kotlin/app/model/DiffFile.kt @@ -7,25 +7,17 @@ import app.utils.FileHelper class DiffFile( val path: String = "", - val contentOld: List = listOf(), - val contentNew: List = listOf(), - val imports: List = listOf(), - val language: String = "", - val edits: List = listOf() + val old: DiffContent = DiffContent(), + val new: DiffContent = DiffContent(), + var language: String = "" ) { val extension: String = FileHelper.getFileExtension(path) fun getAllAdded(): List { - return edits.fold(mutableListOf()) { total, edit -> - total.addAll(contentNew.subList(edit.addStart, edit.addEnd)) - total - } + return new.getAllDiffs() } fun getAllDeleted(): List { - return edits.fold(mutableListOf()) { total, edit -> - total.addAll(contentOld.subList(edit.delStart, edit.delEnd)) - total - } + return old.getAllDiffs() } } diff --git a/src/main/kotlin/app/model/DiffRange.kt b/src/main/kotlin/app/model/DiffRange.kt new file mode 100644 index 00000000..d78680cc --- /dev/null +++ b/src/main/kotlin/app/model/DiffRange.kt @@ -0,0 +1,9 @@ +// Copyright 2017 Sourcerer Inc. All Rights Reserved. +// Author: Anatoly Kislov (anatoly@sourcerer.io) + +package app.model + +/** + * [start] (inclusive) and [end] (exclusive) specifies range of changed lines. + */ +data class DiffRange(val start: Int, val end: Int)