The following example demonstrates training an elastic net regularized linear regression model and extracting model summary statistics and saving the model to disk.

## Imports

In [None]:
import os
from os import getlogin, path, environ

environ["SPARK_HOME"] = "/home/students/spark-2.2.0"

import findspark
findspark.init()


import pickle

from pyspark import SparkContext
from pyspark.sql import SQLContext

from pyspark.ml.regression import LinearRegression, LinearRegressionModel

## Get Some Context

In [None]:
# Create a SparkContext and a SQLContext context to use
sc = SparkContext(appName="Linear Regression with Spark")
sqlContext = SQLContext(sc)

## Load the Data

In [None]:
# Directories 
SAVE_DIR = '{}/model'.format(os.getcwd())
DATA_FILE  = "/home/students/data/mllib/sample_linear_regression_data.txt"

In [None]:
# Load the training data into a dataframe
training = sqlContext.read.format("libsvm").load(DATA_FILE)
type(training)

## Train a Model

In [None]:
# Create an instance of a LinearRegression model
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
lr

In [None]:
# Fit (train) the model
lr_model = lr.fit(training)
lr_model

In [None]:
# Show some summary
lr_model.coefficients

## Serialize the Model

In [None]:
lr_model.write().overwrite().save(SAVE_DIR)

## Deserialize the model

In [None]:
new_lr_model = LinearRegressionModel.load(SAVE_DIR)
print(new_lr_model)

In [None]:
# Show some summary
new_lr_model.coefficients

## Clean Up

In [None]:
sc.stop()