Skip to content

Commit

Permalink
Text Analytics - Add Opinion mining (#1192)
Browse files Browse the repository at this point in the history
* fix: fix setLinkedService issues in Synapse (#1177)

* doc: add predictive maintenence notebook

squash

* fix: fix cog service test flakes

* feat: add NERPii

* fix: fix scala style error

* fix: rename NERPii to PII

* fix: fix anomaly detector test cases

* fix: fix cognitive service errors (#1176)

fix Left & Right errors
Enhancement for text translator

* chore: Add script to clean and back up ACR

* fix: fix setLinkedService in Synapse

* initial commit

* resolving comments

* updated opinions test

Co-authored-by: wenqing xu <80103478+xuwq1993@users.noreply.github.com>
Co-authored-by: Mark <mhamilton723@gmail.com>
Co-authored-by: xuwq1993 <wenqx@microsoft.com>
Co-authored-by: serena-ruan <82044803+serena-ruan@users.noreply.github.com>
  • Loading branch information
5 people committed Sep 23, 2021
1 parent f11724f commit 8a3bcec
Show file tree
Hide file tree
Showing 24 changed files with 1,156 additions and 201 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,25 @@

package com.microsoft.ml.spark.cognitive

import java.net.URL
import com.microsoft.ml.spark.core.utils.AsyncUtils
import com.microsoft.ml.spark.logging.BasicLogging
import com.microsoft.ml.spark.stages.Lambda
import org.apache.commons.io.IOUtils
import org.apache.http.client.methods.{HttpGet, HttpRequestBase}
import org.apache.http.entity.AbstractHttpEntity
import org.apache.spark.binary.ConfUtils
import org.apache.spark.injections.UDFUtils
import org.apache.spark.ml.ComplexParamsReadable
import org.apache.spark.ml.param.ServiceParam
import org.apache.spark.ml.util._
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.functions.{col, explode, udf}
import org.apache.spark.sql.functions.{col, explode}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row}
import spray.json.DefaultJsonProtocol._

import java.net.URL
import scala.concurrent.duration.Duration
import scala.concurrent.{ExecutionContext, Future}
import spray.json.DefaultJsonProtocol._

object BingImageSearch extends ComplexParamsReadable[BingImageSearch] with Serializable {

Expand Down Expand Up @@ -68,13 +66,15 @@ object BingImageSearch extends ComplexParamsReadable[BingImageSearch] with Seria

class BingImageSearch(override val uid: String)
extends CognitiveServicesBase(uid)
with HasCognitiveServiceInput with HasInternalJsonOutputParser with BasicLogging {
with HasCognitiveServiceInput with HasInternalJsonOutputParser with BasicLogging with HasSetLinkedService {
logClass()

override protected lazy val pyInternalWrapper = true

def this() = this(Identifiable.randomUID("BingImageSearch"))

def urlPath: String = "/v7.0/images/search"

setDefault(url -> "https://api.bing.microsoft.com/v7.0/images/search")

override def prepareMethod(): HttpRequestBase = new HttpGet()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,19 @@ trait HasSetLinkedService extends Wrappable with HasURL with HasSubscriptionKey
}
}

trait HasSetLinkedServiceUsingLocation extends HasSetLinkedService with HasSetLocation {
override def setLinkedService(v: String): this.type = {
val classPath = "mssparkutils.cognitiveService"
val linkedServiceClass = ScalaClassLoader(getClass.getClassLoader).tryToLoadClass(classPath)
val locationMethod = linkedServiceClass.get.getMethod("getLocation", v.getClass)
val keyMethod = linkedServiceClass.get.getMethod("getKey", v.getClass)
val location = locationMethod.invoke(linkedServiceClass.get, v).toString
val key = keyMethod.invoke(linkedServiceClass.get, v).toString
setLocation(location)
setSubscriptionKey(key)
}
}

trait HasSetLocation extends Wrappable with HasURL with HasUrlPath {
override def pyAdditionalMethods: String = super.pyAdditionalMethods + {
"""
Expand Down Expand Up @@ -277,6 +290,12 @@ abstract class CognitiveServicesBaseNoHandler(val uid: String) extends Transform
assert(badColumns.isEmpty,
s"Could not find dynamic columns: $badColumns in columns: ${schema.fieldNames.toSet}")

val missingRequiredParams = this.getRequiredParams.filter {
p => this.get(p).isEmpty && this.getDefault(p).isEmpty
}
assert(missingRequiredParams.isEmpty,
s"Missing required params: ${missingRequiredParams.map(s => s.name).mkString("(", ", ", ")")}")

val dynamicParamCols = getVectorParamMap.values.toList.map(col) match {
case Nil => Seq(lit(false).alias("placeholder"))
case l => l
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ class RecognizeText(override val uid: String)
"printed text recognition is performed. If 'Handwritten' is specified," +
" handwriting recognition is performed",
{
case Left(_) => true
case Right(s) => Set("Printed", "Handwritten")(s)
case Left(s) => Set("Printed", "Handwritten")(s)
case Right(_) => true
}, isURLParam = true)

def getMode: String = getScalarParam(mode)
Expand Down Expand Up @@ -361,8 +361,8 @@ class ReadImage(override val uid: String)
" so only provide a language code if you would like to force the documented" +
" to be processed as that specific language.",
{
case Left(_) => true
case Right(s) => Set("en", "nl", "fr", "de", "it", "pt", "es")(s)
case Left(s) => Set("en", "nl", "fr", "de", "it", "pt", "es")(s)
case Right(_) => true
}, isURLParam = true)

def setLanguage(v: String): this.type = setScalarParam(language, v)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
import spray.json._

import java.net.URI
import scala.reflect.internal.util.ScalaClassLoader

trait DocumentTranslatorAsyncReply extends BasicAsyncReply {

Expand Down Expand Up @@ -137,6 +138,17 @@ class DocumentTranslator(override val uid: String) extends CognitiveServicesBase
))).toJson.compactPrint, ContentType.APPLICATION_JSON))
}

override def setLinkedService(v: String): this.type = {
val classPath = "mssparkutils.cognitiveService"
val linkedServiceClass = ScalaClassLoader(getClass.getClassLoader).tryToLoadClass(classPath)
val nameMethod = linkedServiceClass.get.getMethod("getName", v.getClass)
val keyMethod = linkedServiceClass.get.getMethod("getKey", v.getClass)
val name = nameMethod.invoke(linkedServiceClass.get, v).toString
val key = keyMethod.invoke(linkedServiceClass.get, v).toString
setServiceName(name)
setSubscriptionKey(key)
}

override def setServiceName(v: String): this.type = {
super.setServiceName(v)
setUrl(s"https://$getServiceName.cognitiveservices.azure.com/" + urlPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ trait HasModelID extends HasServiceParams {
trait HasLocale extends HasServiceParams {
val locale = new ServiceParam[String](this, "locale", "Locale of the receipt. Supported" +
" locales: en-AU, en-CA, en-GB, en-IN, en-US.", {
case Left(_) => true
case Right(s) => Set("en-AU", "en-CA", "en-GB", "en-IN", "en-US")(s)
case Left(s) => Set("en-AU", "en-CA", "en-GB", "en-IN", "en-US")(s)
case Right(_) => true
}, isURLParam = true)

def setLocale(v: String): this.type = setScalarParam(locale, v)
Expand Down Expand Up @@ -258,7 +258,7 @@ object ListCustomModels extends ComplexParamsReadable[ListCustomModels]

class ListCustomModels(override val uid: String) extends CognitiveServicesBase(uid)
with HasCognitiveServiceInput with HasInternalJsonOutputParser
with HasSetLocation with BasicLogging {
with HasSetLocation with HasSetLinkedService with BasicLogging {
logClass()

def this() = this(Identifiable.randomUID("ListCustomModels"))
Expand All @@ -283,7 +283,7 @@ object GetCustomModel extends ComplexParamsReadable[GetCustomModel]

class GetCustomModel(override val uid: String) extends CognitiveServicesBase(uid)
with HasCognitiveServiceInput with HasInternalJsonOutputParser
with HasSetLocation with BasicLogging with HasModelID {
with HasSetLocation with HasSetLinkedService with BasicLogging with HasModelID {
logClass()

def this() = this(Identifiable.randomUID("GetCustomModel"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ object SpeechToText extends ComplexParamsReadable[SpeechToText] with Serializabl

class SpeechToText(override val uid: String) extends CognitiveServicesBase(uid)
with HasCognitiveServiceInput with HasInternalJsonOutputParser with HasSetLocation with BasicLogging
with HasSetLinkedService {
with HasSetLinkedServiceUsingLocation {
logClass()

def this() = this(Identifiable.randomUID("SpeechToText"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private[ml] class BlockingQueueIterator[T](lbq: LinkedBlockingQueue[Option[T]],
abstract class SpeechSDKBase extends Transformer
with HasSetLocation with HasServiceParams
with HasOutputCol with HasURL with HasSubscriptionKey with ComplexParamsWritable with BasicLogging
with HasSetLinkedService {
with HasSetLinkedServiceUsingLocation {

type ResponseType <: SharedSpeechFields

Expand Down Expand Up @@ -198,16 +198,6 @@ abstract class SpeechSDKBase extends Transformer

def urlPath: String = "/sts/v1.0/issuetoken"

override def setLinkedService(v: String): this.type = {
val classPath = "mssparkutils.cognitiveService"
val linkedServiceClass = ScalaClassLoader(getClass.getClassLoader).tryToLoadClass(classPath)
val locationMethod = linkedServiceClass.get.getMethod("getLocation", v.getClass)
val keyMethod = linkedServiceClass.get.getMethod("getKey", v.getClass)
val location = locationMethod.invoke(linkedServiceClass.get, v).toString
val key = keyMethod.invoke(linkedServiceClass.get, v).toString
setLocation(location)
setSubscriptionKey(key)
}

setDefault(language -> Left("en-us"))
setDefault(profanity -> Left("Masked"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ abstract class TextAnalyticsBase(override val uid: String) extends CognitiveServ
override protected def getInternalTransformer(schema: StructType): PipelineModel = {
val dynamicParamColName = DatasetExtensions.findUnusedColumnName("dynamic", schema)

val missingRequiredParams = this.getRequiredParams.filter {
p => this.get(p).isEmpty && this.getDefault(p).isEmpty
}
assert(missingRequiredParams.isEmpty,
s"Missing required params: ${missingRequiredParams.map(s => s.name).mkString("(", ", ", ")")}")

def reshapeToArray(parameterName: String): Option[(Transformer, String, String)] = {
val reshapedColName = DatasetExtensions.findUnusedColumnName(parameterName, schema)
getVectorParamMap.get(parameterName).flatMap {
Expand Down Expand Up @@ -296,6 +302,18 @@ class NER(override val uid: String) extends TextAnalyticsBase(uid) with BasicLog
def urlPath: String = "/text/analytics/v3.0/entities/recognition/general"
}

object PII extends ComplexParamsReadable[PII]

class PII(override val uid: String) extends TextAnalyticsBase(uid) with BasicLogging {
logClass()

def this() = this(Identifiable.randomUID("PII"))

override def responseDataType: StructType = PIIResponseV3.schema

def urlPath: String = "/text/analytics/v3.1/entities/recognition/pii"
}

object LanguageDetector extends ComplexParamsReadable[LanguageDetector]

class LanguageDetector(override val uid: String)
Expand Down
Loading

0 comments on commit 8a3bcec

Please sign in to comment.