Skip to content
This repository has been archived by the owner on Oct 8, 2019. It is now read-only.

Commit

Permalink
Added extract_weight(string featureVectors)::weights UDF
Browse files Browse the repository at this point in the history
  • Loading branch information
myui committed Jul 4, 2014
1 parent c3d722d commit 88af7b4
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
3 changes: 3 additions & 0 deletions scripts/ddl/define-all.hive
Expand Up @@ -176,6 +176,9 @@ create temporary function sortByFeature as 'hivemall.ftvec.SortByFeatureUDF';
drop temporary function extract_feature;
create temporary function extract_feature as 'hivemall.ftvec.ExtractFeatureUDF';

drop temporary function extract_weight;
create temporary function extract_weight as 'hivemall.ftvec.ExtractWeightUDF';

--------------------------
-- Regression functions --
--------------------------
Expand Down
2 changes: 2 additions & 0 deletions scripts/ddl/define-ftvec-udf.hive
Expand Up @@ -57,3 +57,5 @@ create temporary function sortByFeature as 'hivemall.ftvec.SortByFeatureUDF';
drop temporary function extract_feature;
create temporary function extract_feature as 'hivemall.ftvec.ExtractFeatureUDF';

drop temporary function extract_weight;
create temporary function extract_weight as 'hivemall.ftvec.ExtractWeightUDF';
65 changes: 65 additions & 0 deletions src/main/hivemall/ftvec/ExtractWeightUDF.java
@@ -0,0 +1,65 @@
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2013
* National Institute of Advanced Industrial Science and Technology (AIST)
* Registration Number: H25PRO-1520
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package hivemall.ftvec;

import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.FloatWritable;

@Description(name = "extract_weight", value = "_FUNC_(feature_vector in array<string>) - Returns the weights of features in array<string>")
@UDFType(deterministic = true, stateful = false)
public class ExtractWeightUDF extends UDF {

public FloatWritable evaluate(String featureVector) throws UDFArgumentException {
return extractWeights(featureVector);
}

public List<FloatWritable> evaluate(List<String> featureVectors) throws UDFArgumentException {
if(featureVectors == null) {
return null;
}
final int size = featureVectors.size();
final FloatWritable[] output = new FloatWritable[size];
for(int i = 0; i < size; i++) {
String ftvec = featureVectors.get(i);
output[i] = extractWeights(ftvec);
}
return Arrays.asList(output);
}

private static FloatWritable extractWeights(String ftvec) throws UDFArgumentException {
if(ftvec == null) {
return null;
}
String[] splits = ftvec.split(":");
if(splits.length != 2) {
throw new UDFArgumentException("Unexpected feature vector representation: " + ftvec);
}
float f = Float.valueOf(splits[1]);
return new FloatWritable(f);
}

}

0 comments on commit 88af7b4

Please sign in to comment.