The following example demonstrates training an elastic net regularized linear regression model and extracting model summary statistics.

## Imports

In [None]:
from os import getlogin, path, environ

environ["SPARK_HOME"] = "/home/students/spark-2.2.0"

import findspark
findspark.init()

from pyspark import SparkContext
from pyspark.sql import SQLContext

from pyspark.ml.classification import NaiveBayes
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

## Get Some Context

In [None]:
# Create a SparkContext and a SQLContext context to use
sc = SparkContext(appName="Naive Bayes Classification with Spark")
sqlContext = SQLContext(sc)

## Load the Data

In [None]:
# Directories 
DATA_FILE   = "/home/students/data/mllib/sample_libsvm_data.txt"

In [None]:
# Load the training data
data = sqlContext.read.format("libsvm").load(DATA_FILE)
data.show()

In [None]:
data.take(1)

## Train a Model

In [None]:
# Split the data into train and test
splits = data.randomSplit([0.6, 0.4], 1234)
train = splits[0]
test = splits[1]

train.show(5)

In [None]:
splits[1]

In [None]:
# Create the trainer and set its parameters
nb = NaiveBayes(smoothing=1.0, modelType="multinomial")
print(nb)

In [None]:
# train the model
nb_model = nb.fit(train)
print(nb_model)

In [None]:
# select example rows to display.
predictions = nb_model.transform(test)
predictions.show()

In [None]:
# compute accuracy on the test set
evaluator = MulticlassClassificationEvaluator(labelCol="label",
                                              predictionCol="prediction",
                                              metricName="accuracy")

accuracy = evaluator.evaluate(predictions)
print("Test set accuracy = {}".format(accuracy))

## Close it down

In [None]:
sc.stop()