From f9b6bf2f83d9dad273aa36d65d0560d35b941cc2 Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Mon, 29 Jun 2015 18:50:23 -0700 Subject: [PATCH] [SPARK-7667] [MLLIB] MLlib Python API consistency check MLlib Python API consistency check Author: Yanbo Liang Closes #6856 from yanboliang/spark-7667 and squashes the following commits: 21bae35 [Yanbo Liang] remove duplicate code eb12f95 [Yanbo Liang] fix doc inherit problem 9e7ec3c [Yanbo Liang] address comments e763d32 [Yanbo Liang] MLlib Python API consistency check --- python/pyspark/mllib/feature.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index f00bb93b7bf40..b5138773fd61b 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -111,6 +111,15 @@ class JavaVectorTransformer(JavaModelWrapper, VectorTransformer): """ def transform(self, vector): + """ + Applies transformation on a vector or an RDD[Vector]. + + Note: In Python, transform cannot currently be used within + an RDD transformation or action. + Call transform directly on the RDD instead. + + :param vector: Vector or RDD of Vector to be transformed. + """ if isinstance(vector, RDD): vector = vector.map(_convert_to_vector) else: @@ -191,7 +200,7 @@ def fit(self, dataset): Computes the mean and variance and stores as a model to be used for later scaling. - :param data: The data used to compute the mean and variance + :param dataset: The data used to compute the mean and variance to build the transformation model. :return: a StandardScalarModel """ @@ -346,10 +355,6 @@ def transform(self, x): vector :return: an RDD of TF-IDF vectors or a TF-IDF vector """ - if isinstance(x, RDD): - return JavaVectorTransformer.transform(self, x) - - x = _convert_to_vector(x) return JavaVectorTransformer.transform(self, x) def idf(self):