In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.feature import VectorAssembler
import os

In [2]:
# Create spark session
spark = SparkSession.builder.appName("ICP 7").getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

In [3]:
# Load data and select feature and label columns
data = spark.read.format("csv").option("header", True)\
.option("inferSchema", True).option("delimiter", ",").load("/Users/neerajpadarthi/Neeraj/BDP/Spark ICP 7/car.csv")
data = data.withColumn("label", when(col("num-of-doors") == "four", 1).otherwise(0)).select("label", "length", "width", "height")

In [4]:
# Create vector assembler for feature columns
assembler = VectorAssembler(inputCols=data.columns[1:], outputCol="features")
data = assembler.transform(data)

lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)

# Fit the model
model = lr.fit(data)

# Print the coefficients and intercept for logistic regression
print("Coefficients: " + str(model.coefficients))
print("Intercept: " + str(model.intercept))



Coefficients: [0.0,0.0,0.000100509510875788]
Intercept: 0.22531532410664368


In [5]:
# We can also use the multinomial family for binary classification
mlr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8, family="multinomial")

# Fit the model
mlr_model = mlr.fit(data)

# Print the coefficients and intercepts for logistic regression with multinomial family
print("Multinomial coefficients: " + str(mlr_model.coefficientMatrix))
print("Multinomial intercepts: " + str(mlr_model.interceptVector))

Multinomial coefficients: DenseMatrix([[ 0.00000000e+00,  0.00000000e+00, -7.35292649e-05],
             [ 0.00000000e+00,  0.00000000e+00,  7.35292649e-05]])
Multinomial intercepts: [-0.11156262444620539,0.11156262444620539]


In [11]:
summary = mlr_model.summary

In [12]:
roc = summary.roc.toPandas()
print('areaUnderROC: ' + str(summary.areaUnderROC))

areaUnderROC: 0.8154038943512628
