Skip to content

Commit

Permalink
Merge branch 'master' into jupyter
Browse files Browse the repository at this point in the history
  • Loading branch information
tovbinm committed Mar 7, 2019
2 parents c84758f + 4da94f8 commit 41bd221
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ private[op] trait TransmogrifierDefaults {
val MaxCategoricalCardinality = 30
val CircularDateRepresentations: Seq[TimePeriod] = Seq(TimePeriod.HourOfDay, TimePeriod.DayOfWeek,
TimePeriod.DayOfMonth, TimePeriod.DayOfYear)

val DefaultRegion: String = PhoneNumberParser.DefaultRegion
val AutoDetectLanguage: Boolean = TextTokenizer.AutoDetectLanguage
val MinTokenLength: Int = TextTokenizer.MinTokenLength
val ToLowercase: Boolean = TextTokenizer.ToLowercase
}

private[op] object TransmogrifierDefaults extends TransmogrifierDefaults
Expand Down Expand Up @@ -178,7 +183,7 @@ private[op] case object Transmogrifier {
trackNulls = TrackNulls, trackInvalid = TrackInvalid, minInfoGain = MinInfoGain, label = label)
case t if t =:= weakTypeOf[PhoneMap] =>
val (f, other) = castAs[PhoneMap](g) // TODO make better default
f.vectorize(defaultRegion = PhoneNumberParser.DefaultRegion, others = other, trackNulls = TrackNulls)
f.vectorize(defaultRegion = DefaultRegion, others = other, trackNulls = TrackNulls)
case t if t =:= weakTypeOf[PickListMap] =>
val (f, other) = castAs[PickListMap](g)
f.vectorize(topK = TopK, minSupport = MinSupport, cleanText = CleanText, cleanKeys = CleanKeys,
Expand All @@ -190,15 +195,15 @@ private[op] case object Transmogrifier {
case t if t =:= weakTypeOf[TextAreaMap] =>
val (f, other) = castAs[TextAreaMap](g)
f.smartVectorize(maxCategoricalCardinality = MaxCategoricalCardinality,
numHashes = DefaultNumOfFeatures, autoDetectLanguage = TextTokenizer.AutoDetectLanguage,
minTokenLength = TextTokenizer.MinTokenLength, toLowercase = TextTokenizer.ToLowercase,
numHashes = DefaultNumOfFeatures, autoDetectLanguage = AutoDetectLanguage,
minTokenLength = MinTokenLength, toLowercase = ToLowercase,
prependFeatureName = PrependFeatureName, cleanText = CleanText, cleanKeys = CleanKeys,
others = other, trackNulls = TrackNulls)
case t if t =:= weakTypeOf[TextMap] =>
val (f, other) = castAs[TextMap](g)
f.smartVectorize(maxCategoricalCardinality = MaxCategoricalCardinality,
numHashes = DefaultNumOfFeatures, autoDetectLanguage = TextTokenizer.AutoDetectLanguage,
minTokenLength = TextTokenizer.MinTokenLength, toLowercase = TextTokenizer.ToLowercase,
numHashes = DefaultNumOfFeatures, autoDetectLanguage = AutoDetectLanguage,
minTokenLength = MinTokenLength, toLowercase = ToLowercase,
prependFeatureName = PrependFeatureName, cleanText = CleanText, cleanKeys = CleanKeys,
others = other, trackNulls = TrackNulls)
case t if t =:= weakTypeOf[URLMap] =>
Expand Down Expand Up @@ -285,7 +290,7 @@ private[op] case object Transmogrifier {
others = other)
case t if t =:= weakTypeOf[Phone] =>
val (f, other) = castAs[Phone](g)
f.vectorize(defaultRegion = PhoneNumberParser.DefaultRegion, others = other)
f.vectorize(defaultRegion = DefaultRegion, others = other)
case t if t =:= weakTypeOf[PickList] =>
val (f, other) = castAs[PickList](g)
f.vectorize(topK = TopK, minSupport = MinSupport, cleanText = CleanText, trackNulls = TrackNulls,
Expand All @@ -294,15 +299,15 @@ private[op] case object Transmogrifier {
val (f, other) = castAs[Text](g)
f.smartVectorize(maxCategoricalCardinality = MaxCategoricalCardinality,
trackNulls = TrackNulls, numHashes = DefaultNumOfFeatures,
hashSpaceStrategy = defaults.HashSpaceStrategy, autoDetectLanguage = TextTokenizer.AutoDetectLanguage,
minTokenLength = TextTokenizer.MinTokenLength, toLowercase = TextTokenizer.ToLowercase,
hashSpaceStrategy = defaults.HashSpaceStrategy, autoDetectLanguage = AutoDetectLanguage,
minTokenLength = MinTokenLength, toLowercase = ToLowercase,
prependFeatureName = PrependFeatureName, others = other)
case t if t =:= weakTypeOf[TextArea] =>
val (f, other) = castAs[TextArea](g)
f.smartVectorize(maxCategoricalCardinality = MaxCategoricalCardinality,
trackNulls = TrackNulls, numHashes = DefaultNumOfFeatures,
hashSpaceStrategy = defaults.HashSpaceStrategy, autoDetectLanguage = TextTokenizer.AutoDetectLanguage,
minTokenLength = TextTokenizer.MinTokenLength, toLowercase = TextTokenizer.ToLowercase,
hashSpaceStrategy = defaults.HashSpaceStrategy, autoDetectLanguage = AutoDetectLanguage,
minTokenLength = MinTokenLength, toLowercase = ToLowercase,
prependFeatureName = PrependFeatureName, others = other)
case t if t =:= weakTypeOf[URL] =>
val (f, other) = castAs[URL](g)
Expand Down
4 changes: 4 additions & 0 deletions docs/talks/index.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Talks

**2019**
* [Automated Machine Learning with TransmogrifAI](https://vaultanalytics.com/podcast/automated-machine-learning-with-transmogrifai/), Till Bergmann, Data Crunch Podcast
* [Automated ML Pipelines For Unseen Customer Data](https://www.youtube.com/watch?v=IZyceNOSitI), Kevin Moore, PAPIs.io, [Slides](https://drive.google.com/file/d/1MStBS4tR1yuklCuDCrZHrejNAFk1j_k9/view)

**2018**
* [AutoML: The Assembly Line of Machine Learning](http://www.dataengconf.com/automl-the-assembly-line-of-machine-learning), Mayukh Bhaowal, DataEngConf
* [The Black Swan of Perfectly Interpretable Models](https://www.infoq.com/presentations/salesforce-einstein-ml), Leah McGuire and Mayukh Bhaowal, QCon.ai
Expand Down

0 comments on commit 41bd221

Please sign in to comment.