### Using MLRun with a remote Spark service

### Build a simple read CSV function using Spark

In [None]:
#!/conda/bin/python

import mlrun
from mlrun.datastore import DataItem
from mlrun.execution import MLClientCtx

from pyspark.sql import SparkSession


def describe_spark(context: MLClientCtx, dataset: DataItem, artifact_path):

    # get file location
    location = dataset.local()

    # build spark session
    spark = SparkSession.builder.appName("Spark job").getOrCreate()

    # read csv
    df = spark.read.csv(location, header=True, inferSchema=True)

    # show
    df.show(5)

    # sample for logging
    df_to_log = df.sample(False, 0.1).toPandas()

    # log final report
    context.log_dataset(
        "df_sample",
        df=df_to_log,
        format="csv",
        index=False,
        artifact_path=context.artifact_subpath("data"),
    )

    spark.stop()

In [None]:
# mlrun: end-code

### Create a remote-spark MLRun function

In [None]:
fn = mlrun.code_to_function(handler="describe_spark", kind="remote-spark")

In [None]:
fn.with_spark_service(spark_service="iguazio-spark-service-name")
fn.deploy()

In [None]:
fn.run(inputs={"dataset": "iris_dataset.csv"}, artifact_path="/User")