In [13]:
import findspark
findspark.init()

import pyspark

from pyspark.sql import SparkSession
from pyspark.ml.feature import RegexTokenizer, VectorAssembler, Normalizer, StandardScaler, MinMaxScaler
from pyspark.sql.functions import udf, concat, lit, col, avg
from pyspark.sql.types import IntegerType

from pyspark.ml.feature import CountVectorizer, IDF, StringIndexer

from pyspark.ml.regression import LinearRegression
from pyspark.ml.classification import LogisticRegression

import re

In [14]:
# create a SparkSession: note this step was left out of the screencast
spark = SparkSession.builder \
    .master("local") \
    .appName("Word Count") \
    .getOrCreate()

Py4JError: org.apache.spark.api.python.PythonUtils.getEncryptionEnabled does not exist in the JVM

## Read in the dataset

In [8]:
stack_overflow_data = 'data/Train_onetag_small.json'

In [9]:
df = spark.read.json(stack_overflow_data)

In [10]:
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php')

### Tokenizacion

Tokenization splits strings into separate words. Spark has a Tokenizer class as well as RegexTokenizer, which allows for more control over the tokenization process.

In [11]:
# split the body text into separate words
# turn body into lowercase words, with no punctuations and special characters
regexTokenizer = RegexTokenizer(inputCol="Body", outputCol="words", pattern="\\W")
df = regexTokenizer.transform(df)
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'which

In [23]:
# count the number of words in each body tag

body_length = udf(lambda x: len(x), IntegerType())
df = df.withColumn("BodyLength", body_length(df.words))

In [24]:
# count the number of paragraphs and links in each body tag

number_of_paragraphs = udf(lambda x: len(re.findall("</p>", x)), IntegerType())
number_of_links = udf(lambda x: len(re.findall("</a>", x)), IntegerType())

In [25]:
df = df.withColumn("NumParagraphs", number_of_paragraphs(df.Body))
df = df.withColumn("NumLinks", number_of_links(df.Body))

In [26]:
df.head(2)

[Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'whic

### VectorAssembler
Combine the body length, number of paragraphs, and number of links columns into a vector

In [27]:
assembler = VectorAssembler(inputCols=["BodyLength", "NumParagraphs", "NumLinks"], outputCol="NumFeatures")
df = assembler.transform(df)

In [28]:
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'which

### Normalize the Vectors


In [29]:
scaler = Normalizer(inputCol="NumFeatures", outputCol="ScaledNumFeatures")
df = scaler.transform(df)

In [30]:
df.head(2)

[Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'whic

### Scale the Vectors

In [31]:
scaler2 = StandardScaler(inputCol="NumFeatures", outputCol="ScaledNumFeatures2", withStd=True)
scalerModel = scaler2.fit(df)
df = scalerModel.transform(df)

In [33]:
df.head(2)

[Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'whic

In [35]:

cv = CountVectorizer(inputCol="words", outputCol="TF", vocabSize=1000)

In [36]:
cvmodel = cv.fit(df)

In [38]:
df = cvmodel.transform(df)

In [39]:
df.take(1)

[Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'whic

In [45]:
cvmodel.vocabulary

['p',
 'the',
 'i',
 'to',
 'code',
 'a',
 'gt',
 'lt',
 'is',
 'and',
 'pre',
 'in',
 'this',
 'of',
 'it',
 'that',
 'for',
 '0',
 '1',
 'have',
 'my',
 'if',
 'on',
 'but',
 'with',
 'can',
 'not',
 'be',
 'as',
 't',
 'li',
 'from',
 '2',
 's',
 'http',
 'an',
 'm',
 'strong',
 'new',
 'how',
 'do',
 'com',
 'so',
 'or',
 'at',
 'using',
 'when',
 'am',
 'like',
 'class',
 'id',
 'there',
 'get',
 'are',
 'name',
 'what',
 'any',
 'file',
 'string',
 'data',
 'all',
 'which',
 'want',
 'would',
 'amp',
 'use',
 'java',
 'function',
 'public',
 'some',
 '3',
 'text',
 'error',
 'android',
 'value',
 'c',
 'x',
 'href',
 'you',
 'one',
 'by',
 'user',
 'me',
 'server',
 'type',
 'here',
 'way',
 'return',
 'int',
 'will',
 'div',
 'need',
 'then',
 'set',
 'e',
 'system',
 'has',
 'problem',
 'out',
 'php',
 'no',
 'just',
 '4',
 'org',
 'know',
 'html',
 'only',
 'where',
 'page',
 'application',
 '5',
 'thanks',
 'var',
 'br',
 'we',
 'd',
 'should',
 'does',
 'add',
 'n',
 'true',

In [48]:
idf = IDF(inputCol="TF", outputCol="TFIDF")


In [49]:
idfmodel = idf.fit(df)

In [50]:
df = idfmodel.transform(df)

In [51]:
df.take(1)

[Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'whic

In [61]:

indexer = StringIndexer(inputCol="oneTag", outputCol="label")
df = indexer.fit(df).transform(df)

In [62]:
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', words=['p', 'i', 'd', 'like', 'to', 'check', 'if', 'an', 'uploaded', 'file', 'is', 'an', 'image', 'file', 'e', 'g', 'png', 'jpg', 'jpeg', 'gif', 'bmp', 'or', 'another', 'file', 'the', 'problem', 'is', 'that', 'i', 'm', 'using', 'uploadify', 'to', 'upload', 'the', 'files', 'which', 'changes', 'the', 'mime', 'type', 'and', 'gives', 'a', 'text', 'octal', 'or', 'something', 'as', 'the', 'mime', 'type', 'no', 'matter', 'which

In [71]:
df.filter(df.Id == 1112).select("BodyLength").show()

+----------+
|BodyLength|
+----------+
|        63|
+----------+



### Create a new column that concatenates the question title and body. Apply the same functions we used before to compute the number of words in this combined column. What's the value in this new column for Id = 5123?

In [5]:
# count the number of words in each body tag

df=df.select("*", concat(col("Title"), lit(" "), col("Body")).alias("CCQT"))


In [6]:
regexTokenizer = RegexTokenizer(inputCol="CCQT", outputCol="Nwords", pattern="\\W")

In [7]:
df = regexTokenizer.transform(df)
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', CCQT="How to check if an uploaded file is an image without mime type? <p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an imag

In [8]:
# count the number of words in each body tag

NWords_length = udf(lambda x: len(x), IntegerType())
df = df.withColumn("BodyLength", NWords_length(df.Nwords))

In [9]:
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', CCQT="How to check if an uploaded file is an image without mime type? <p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an imag

In [35]:
df.filter(df.Id == 5123).select("BodyLength").show()

+----------+
|BodyLength|
+----------+
|       135|
+----------+



### Create a vector from the combined Title + Body length column. In the next few questions, you'll try different normalizer/scaler methods on this new column.


In [10]:
# QUESTION 3 OF 5
# Using the Normalizer method what's the normalized value for question Id = 512?

# count the number of paragraphs and links in each body tag

number_of_paragraphs = udf(lambda x: len(re.findall("</p>", x)), IntegerType())
df = df.withColumn("NumParagraphs", number_of_paragraphs(df.CCQT))

number_of_links = udf(lambda x: len(re.findall("</a>", x)), IntegerType())
df = df.withColumn("NumLinks", number_of_links(df.CCQT))

assembler = VectorAssembler(inputCols=["BodyLength", "NumParagraphs", "NumLinks"], outputCol="NumFeatures")
df = assembler.transform(df)

In [11]:
scaler = Normalizer(inputCol="NumFeatures", outputCol="ScaledNumFeatures")
df = scaler.transform(df)


In [47]:
df.filter(df.Id == 512).select("ScaledNumFeatures").show()

+--------------------+
|   ScaledNumFeatures|
+--------------------+
|[0.99938499379176...|
+--------------------+



In [12]:
scaler2 = StandardScaler(inputCol="NumFeatures", outputCol="ScaledNumFeatures2", withMean=True, withStd=True)
scalerModel = scaler2.fit(df)
df = scalerModel.transform(df)

In [13]:
df.filter(df.Id == 512).select("ScaledNumFeatures2").show()

+--------------------+
|  ScaledNumFeatures2|
+--------------------+
|[-0.6417314460998...|
+--------------------+



In [22]:
# Using the MinMAxScaler method what's the normalized value for question Id = 512?

scalermM = MinMaxScaler(inputCol="NumFeatures", outputCol="ScaledNumFeatures4")
scalerModel = scalermM.fit(df)
df = scalerModel.transform(df)

In [23]:
df.filter(df.Id == 512).select("ScaledNumFeatures4").show()

+--------------------+
|  ScaledNumFeatures4|
+--------------------+
|[0.00624833820792...|
+--------------------+



### Linear Regression

In [None]:
# Build a linear regression model using the length of the combined Title + Body fields.
# What is the value of r^2 when fitting a model with maxIter=5, regParam=0.0, 
# fitIntercept=False, solver="normal"?



In [27]:
# count the number of tags in each question

nTags = udf(lambda x: len(x.split(" ")), IntegerType())
df = df.withColumn("NumTags", nTags(df.Tags))

In [32]:
df.groupby("NumTags").count().orderBy("NumTags").show()

+-------+-----+
|NumTags|count|
+-------+-----+
|      1|13858|
|      2|26540|
|      3|28769|
|      4|19108|
|      5|11725|
+-------+-----+



In [36]:
df.groupby("NumTags").agg(avg(col("BodyLength"))).orderBy("NumTags").show()

+-------+------------------+
|NumTags|   avg(BodyLength)|
+-------+------------------+
|      1|143.68776158175783|
|      2| 162.1539186134137|
|      3|181.26021064340088|
|      4|201.46530249110322|
|      5|227.64375266524522|
+-------+------------------+



In [39]:
assembler = VectorAssembler(inputCols=["BodyLength"], outputCol="lengthFeature")
df = assembler.transform(df)

In [46]:
lp = LinearRegression(maxIter=5, regParam=0.0, fitIntercept=False, solver='normal')


In [62]:
data = df.select(col("NumTags").alias("label"), col("LengthFeature").alias("features"))



In [69]:
data.head(3)

[Row(label=5, features=DenseVector([96.0])),
 Row(label=1, features=DenseVector([83.0])),
 Row(label=3, features=DenseVector([3168.0]))]

In [48]:
lrmodel = lp.fit(data)

In [49]:
lrmodel.coefficients

DenseVector([0.0079])

In [50]:
lrmodel.intercept

0.0

In [51]:
lrmodelSummary = lrmodel.summary

In [54]:
lrmodelSummary.r2

0.4455149596308462

In [71]:
df.columns

['Body',
 'Id',
 'Tags',
 'Title',
 'oneTag',
 'CCQT',
 'Nwords',
 'BodyLength',
 'NumParagraphs',
 'NumLinks',
 'NumFeatures',
 'ScaledNumFeatures',
 'ScaledNumFeatures2',
 'ScaledNumFeatures3',
 'ScaledNumFeatures4',
 'NumTags',
 'lengthFeature']

### Logistic Regresion

In [75]:
cv = CountVectorizer(inputCol="Nwords", outputCol="TF", vocabSize=1000)
cvmodel = cv.fit(df)
df = cvmodel.transform(df)


In [None]:
df.head()

In [77]:
idf = IDF(inputCol="TF", outputCol="TFIDF")
idfmodel = idf.fit(df)
df = idfmodel.transform(df)

In [93]:
indexer = StringIndexer(inputCol="oneTag", outputCol="label")
df = indexer.fit(df).transform(df)


In [94]:
df.head()

Row(Body="<p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an image apart from checking the file extension using PHP?</p>\n", Id=1, Tags='php image-processing file-upload upload mime-types', Title='How to check if an uploaded file is an image without mime type?', oneTag='php', CCQT="How to check if an uploaded file is an image without mime type? <p>I'd like to check if an uploaded file is an image file (e.g png, jpg, jpeg, gif, bmp) or another file. The problem is that I'm using Uploadify to upload the files, which changes the mime type and gives a 'text/octal' or something as the mime type, no matter which file type you upload.</p>\n\n<p>Is there a way to check if the uploaded file is an imag

In [95]:
data2 = df.select(col("label").alias("label"), col("TFIDF").alias("features"))


In [96]:
lr2 = LogisticRegression(maxIter=10, regParam=0.0)

In [97]:
lrModel2 = lr2.fit(data2)

In [98]:
lrModel2.coefficientMatrix

DenseMatrix(301, 1000, [11.2339, 0.0477, 0.0243, 0.1272, 0.0219, -0.0368, 0.0018, 0.0008, ..., -0.0012, -0.0001, 0.0024, -0.0004, -0.0005, -0.001, -0.0003, -0.0003], 1)

In [99]:
lrModel2.interceptVector

DenseVector([5.0482, 4.2935, 4.2032, 4.0664, 3.9603, 3.8377, 3.4016, 3.4384, 3.3173, 3.226, 2.924, 2.8149, 2.7794, 2.752, 2.6304, 2.5881, 2.4528, 2.4364, 2.3989, 2.3596, 2.1037, 2.0606, 2.0563, 1.7941, 1.7813, 1.7797, 1.7132, 1.537, 1.516, 1.4134, 1.3652, 1.3645, 1.3461, 1.3323, 1.3387, 1.2471, 1.1942, 1.1996, 1.203, 1.2009, 1.1802, 1.1352, 1.1005, 1.0419, 0.9546, 0.9411, 0.9234, 0.9185, 0.8868, 0.859, 0.848, 0.8341, 0.8305, 0.808, 0.7977, 0.7915, 0.7821, 0.7774, 0.7605, 0.7617, 0.7353, 0.7298, 0.6965, 0.657, 0.6588, 0.6571, 0.6443, 0.6452, 0.6077, 0.6097, 0.5205, 0.507, 0.5048, 0.501, 0.4667, 0.4619, 0.452, 0.4329, 0.4236, 0.41, 0.4065, 0.3856, 0.355, 0.348, 0.345, 0.3236, 0.2893, 0.2843, 0.2751, 0.2604, 0.245, 0.2192, 0.1951, 0.1859, 0.1861, 0.1711, 0.1693, 0.1624, 0.1453, 0.1241, 0.1162, 0.1068, 0.0995, 0.0712, 0.0679, 0.0216, 0.0137, 0.0014, 0.0023, -0.0088, -0.0196, -0.0285, -0.0292, -0.0399, -0.0407, -0.0503, -0.0715, -0.0722, -0.0732, -0.0728, -0.0838, -0.0848, -0.1074, -0.1063,

In [100]:
lrModel2.summary.accuracy

0.39868