In [None]:
%%bash
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
$HADOOP_HOME/bin/mapred --daemon start historyserver

Starting namenodes on [localhost]
Starting datanodes
Starting secondary namenodes [p-e9af0478-2590-480d-aeec-5261adc1b393]
Starting resourcemanager
Starting nodemanagers


In [None]:
! $HADOOP_HOME/bin/hdfs dfs -put iris.csv

In [None]:
%%file iris_classification.py
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.ml.feature import RFormula
from pyspark.ml.classification import LogisticRegression

spark = SparkSession.builder.appName("Iris cclassification").getOrCreate()

mySchema = StructType([
    StructField("sepal_length", FloatType(), False),
    StructField("sepal_width", FloatType(), False),
    StructField("petal_length", FloatType(), False),
    StructField("petal_width", FloatType(), False),
    StructField("class", StringType(), False),
    ])

df = spark.read.format("csv").schema(mySchema).load("iris.csv")

formula = RFormula(formula="class ~ sepal_length + sepal_width + petal_length + petal_width")
preparedDF = formula.fit(df).transform(df)

train, test = preparedDF.randomSplit([0.7,0.3])

lr = LogisticRegression(featuresCol='features', labelCol='label')
lrModel = lr.fit(train)

predictedDF = lrModel.transform(test)

predictedDF.selectExpr("sepal_length","sepal_width", \
    "petal_length", "petal_width", "label as true_label", \
    "prediction").sample(0.1).show()


In [None]:
! $SPARK_HOME/bin/spark-submit --master yarn iris_classification.py

In [None]:
%%bash
$HADOOP_HOME/bin/mapred --daemon stop historyserver
$HADOOP_HOME/sbin/stop-yarn.sh
$HADOOP_HOME/sbin/stop-dfs.sh


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=2e2dd3fa-79b3-4d63-ad74-397ef775c511' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>