Skip to content

Commit

Permalink
feat: Add TextAnalyze transformer to add support for Text Analytics `…
Browse files Browse the repository at this point in the history
…/analyze` endpoint (#1267)

* Initial implementation work

* Avoid using fixed task result to determine document count

* Add options for TextAnalyze tasks to run

* Add example notebook for TextAnalyze

* fix style

* Fix typo in build helpers

* chore: some TA spring cleaning

* minor rename of notebook
  • Loading branch information
stuartleeks committed Dec 2, 2021
1 parent 3898ad9 commit 6ea8a9a
Show file tree
Hide file tree
Showing 10 changed files with 710 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,4 @@ abstract class CognitiveServicesBaseNoHandler(val uid: String) extends Transform
}

abstract class CognitiveServicesBase(uid: String) extends
CognitiveServicesBaseNoHandler(uid) with HasHandler {
setDefault(handler -> HandlingUtils.advancedUDF(100)) //scalastyle:ignore magic.number

override def handlingFunc(client: CloseableHttpClient,
request: HTTPRequestData): HTTPResponseData =
getHandler(client, request)
}
CognitiveServicesBaseNoHandler(uid) with HasHandler
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ trait BasicAsyncReply extends HasAsyncReply {
request: HTTPRequestData): HTTPResponseData = {
val response = HandlingUtils.advanced(getBackoffs: _*)(client, request)
if (response.statusLine.statusCode == 202) {
val location = new URI(response.headers.filter(_.name == "Operation-Location").head.value)
val location = new URI(response.headers.filter(_.name.toLowerCase() == "operation-location").head.value)
val maxTries = getMaxPollingRetries
val key = request.headers.find(_.name == "Ocp-Apim-Subscription-Key").map(_.value)
val it = (0 to maxTries).toIterator.flatMap { _ =>
Expand All @@ -255,7 +255,6 @@ trait BasicAsyncReply extends HasAsyncReply {
}
}


trait HasAsyncReply extends Params {
val backoffs: IntArrayParam = new IntArrayParam(
this, "backoffs", "array of backoffs to use in the handler")
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.azure.synapse.ml.cognitive

import com.microsoft.azure.synapse.ml.core.schema.SparkBindings

// Text Analytics /analyze endpoint schemas

case class TAAnalyzeAnalysisInput(documents: Seq[TADocument])

object TAAnalyzeAnalysisInput extends SparkBindings[TAAnalyzeAnalysisInput]

case class TAAnalyzeTask(parameters: Map[String, String])

object TAAnalyzeTask extends SparkBindings[TAAnalyzeTask]

case class TAAnalyzeTasks(entityRecognitionTasks: Seq[TAAnalyzeTask],
entityLinkingTasks: Seq[TAAnalyzeTask],
entityRecognitionPiiTasks: Seq[TAAnalyzeTask],
keyPhraseExtractionTasks: Seq[TAAnalyzeTask],
sentimentAnalysisTasks: Seq[TAAnalyzeTask])

object TAAnalyzeTasks extends SparkBindings[TAAnalyzeTasks]

case class TAAnalyzeRequest(displayName: String,
analysisInput: TAAnalyzeAnalysisInput,
tasks: TAAnalyzeTasks)

object TAAnalyzeRequest extends SparkBindings[TAAnalyzeRequest]


case class TAAnalyzeResponseTaskResults[T](documents: Seq[T],
errors: Seq[TAError],
modelVersion: String)

case class TAAnalyzeResponseTask[T](state: String,
results: TAAnalyzeResponseTaskResults[T])

case class TAAnalyzeResponseTasks(completed: Int,
failed: Int,
inProgress: Int,
total: Int,
entityRecognitionTasks: Option[Seq[TAAnalyzeResponseTask[NERDocV3]]],
entityLinkingTasks: Option[Seq[TAAnalyzeResponseTask[DetectEntitiesScoreV3]]],
entityRecognitionPiiTasks: Option[Seq[TAAnalyzeResponseTask[PIIDocV3]]],
keyPhraseExtractionTasks: Option[Seq[TAAnalyzeResponseTask[KeyPhraseScoreV3]]],
sentimentAnalysisTasks: Option[Seq[TAAnalyzeResponseTask[SentimentScoredDocumentV3]]]
)

// API call response
case class TAAnalyzeResponse(status: String,
errors: Option[Seq[TAError]],
displayName: String,
tasks: TAAnalyzeResponseTasks)

object TAAnalyzeResponse extends SparkBindings[TAAnalyzeResponse]

case class TAAnalyzeResultTaskResults[T](result: Option[T],
error: Option[TAError])

case class TAAnalyzeResult(entityRecognition: Option[Seq[TAAnalyzeResultTaskResults[NERDocV3]]],
entityLinking: Option[Seq[TAAnalyzeResultTaskResults[DetectEntitiesScoreV3]]],
entityRecognitionPii: Option[Seq[TAAnalyzeResultTaskResults[PIIDocV3]]],
keyPhraseExtraction: Option[Seq[TAAnalyzeResultTaskResults[KeyPhraseScoreV3]]],
sentimentAnalysis: Option[Seq[TAAnalyzeResultTaskResults[SentimentScoredDocumentV3]]])

object TAAnalyzeResults extends SparkBindings[TAAnalyzeResult]
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package com.microsoft.azure.synapse.ml.cognitive

import com.microsoft.azure.synapse.ml.core.schema.SparkBindings
import spray.json.RootJsonFormat
import org.apache.spark.ml.param.Params
import org.apache.spark.ml.param.ParamValidators

// General Text Analytics Schemas

Expand Down Expand Up @@ -36,7 +38,10 @@ object TAJSONFormat {

implicit val DocumentFormat: RootJsonFormat[TADocument] = jsonFormat3(TADocument.apply)
implicit val RequestFormat: RootJsonFormat[TARequest] = jsonFormat1(TARequest.apply)

implicit val AnalysisInputsFormat: RootJsonFormat[TAAnalyzeAnalysisInput] = jsonFormat1(TAAnalyzeAnalysisInput.apply)
implicit val AnalysisTaskFormat: RootJsonFormat[TAAnalyzeTask] = jsonFormat1(TAAnalyzeTask.apply)
implicit val AnalysisTasksFormat: RootJsonFormat[TAAnalyzeTasks] = jsonFormat5(TAAnalyzeTasks.apply)
implicit val AnalyzeRequestFormat: RootJsonFormat[TAAnalyzeRequest] = jsonFormat3(TAAnalyzeRequest.apply)
}

// SentimentV3 Schemas
Expand Down
Loading

0 comments on commit 6ea8a9a

Please sign in to comment.