In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.master("local").appName("logistic-regression").getOrCreate()

In [2]:
from pyspark.ml.linalg import Vectors
from pyspark.ml.classification import LogisticRegression

In [3]:
training = spark.createDataFrame([
    (1.0, Vectors.dense([0.0, 1.1, 0.1])),
    (0.0, Vectors.dense([2.0, 1.0, -1.0])),
    (0.0, Vectors.dense([2.0, 1.3, 1.0])),
    (1.0, Vectors.dense([0.0, 1.2, -0.5]))], ["label", "features"])

In [4]:
training.show()

+-----+--------------+
|label|      features|
+-----+--------------+
|  1.0| [0.0,1.1,0.1]|
|  0.0|[2.0,1.0,-1.0]|
|  0.0| [2.0,1.3,1.0]|
|  1.0|[0.0,1.2,-0.5]|
+-----+--------------+



In [5]:
lr = LogisticRegression(maxIter=30, regParam=0.01)

In [6]:
model = lr.fit(training)

In [7]:
test = spark.createDataFrame([
    (1.0, Vectors.dense([-1.0, 1.5, 1.3])),
    (0.0, Vectors.dense([3.0, 2.0, -0.1])),
    (1.0, Vectors.dense([0.0, 2.2, -1.5]))], ["label", "features"])

In [8]:
prediction = model.transform(test)

In [9]:
prediction.show()

+-----+--------------+--------------------+--------------------+----------+
|label|      features|       rawPrediction|         probability|prediction|
+-----+--------------+--------------------+--------------------+----------+
|  1.0|[-1.0,1.5,1.3]|[-6.2435550918400...|[0.00193916823498...|       1.0|
|  0.0|[3.0,2.0,-0.1]|[5.45228608726759...|[0.99573180142693...|       0.0|
|  1.0|[0.0,2.2,-1.5]|[-4.4104172202339...|[0.01200425500655...|       1.0|
+-----+--------------+--------------------+--------------------+----------+

