/
MultiFeatureSpec.scala
135 lines (119 loc) · 4.34 KB
/
MultiFeatureSpec.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*
* Copyright 2017 Spotify AB.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.featran
import com.spotify.featran.transformers.Settings
/** Companion object for [[MultiFeatureSpec]]. */
object MultiFeatureSpec {
def apply[T](specs: FeatureSpec[T]*): MultiFeatureSpec[T] = {
val nameToSpec: Map[String, Int] = specs.iterator.zipWithIndex.flatMap { case (spec, index) =>
spec.features.map(_.transformer.name -> index)
}.toMap
new MultiFeatureSpec(
nameToSpec,
specs.map(_.features).reduce(_ ++ _),
specs.map(_.crossings).reduce(_ ++ _)
)
}
}
/** Wrapper for [[FeatureSpec]] that allows for combination and separation of different specs. */
class MultiFeatureSpec[T](
private[featran] val mapping: Map[String, Int],
private[featran] val features: Array[Feature[T, _, _, _]],
private[featran] val crossings: Crossings
) {
private def multiFeatureSet: MultiFeatureSet[T] =
new MultiFeatureSet[T](features, crossings, mapping)
/**
* Extract features from a input collection.
*
* This is done in two steps, a `reduce` step over the collection to aggregate feature summary,
* and a `map` step to transform values using the summary.
*
* @param input
* input collection
* @tparam M
* input collection type, e.g. `Array`, `List`
*/
def extract[M[_]: CollectionType](input: M[T]): MultiFeatureExtractor[M, T] = {
import CollectionType.ops._
val fs = input.pure(multiFeatureSet)
new MultiFeatureExtractor[M, T](fs, input, None)
}
/**
* Creates a new MultiFeatureSpec with only the features that respect the given predicate.
*
* @param predicate
* Function determining whether or not to include the feature
*/
def filter(predicate: Feature[T, _, _, _] => Boolean): MultiFeatureSpec[T] = {
val filteredFeatures = features.filter(predicate)
val featuresByName = {
val b = Map.newBuilder[String, Feature[T, _, _, _]]
b ++= filteredFeatures.iterator.map(f => f.transformer.name -> f)
b.result()
}
val filteredMapping = mapping.filter(kv => featuresByName.contains(kv._1))
val filteredCrossings = crossings.filter(featuresByName.contains)
new MultiFeatureSpec[T](filteredMapping, filteredFeatures, filteredCrossings)
}
/**
* Extract features from a input collection using settings from a previous session.
*
* This bypasses the `reduce` step in [[extract]] and uses feature summary from settings exported
* in a previous session.
* @param input
* input collection
* @param settings
* JSON settings from a previous session
* @tparam M
* input collection type, e.g. `Array`, `List`
*/
def extractWithSettings[M[_]: CollectionType](
input: M[T],
settings: M[String]
): MultiFeatureExtractor[M, T] = {
import CollectionType.ops._
val fs = input.pure(multiFeatureSet)
new MultiFeatureExtractor[M, T](fs, input, Some(settings))
}
/**
* Extract features from a input collection using partial settings from a previous session.
*
* This bypasses the `reduce` step in [[extract]] and uses feature summary from settings exported
* in a previous session.
* @param input
* input collection
* @param settings
* JSON settings from a previous session
* @tparam M
* input collection type, e.g. `Array`, `List`
*/
def extractWithSubsetSettings[M[_]: CollectionType](
input: M[T],
settings: M[String]
): MultiFeatureExtractor[M, T] = {
import json._
import CollectionType.ops._
val featureSet = settings.map { s =>
val settingsJson = decode[Seq[Settings]](s).right.get
val predicate: Feature[T, _, _, _] => Boolean =
f => settingsJson.exists(x => x.name == f.transformer.name)
filter(predicate).multiFeatureSet
}
new MultiFeatureExtractor[M, T](featureSet, input, Some(settings))
}
}