In [69]:
# Run this cell to import pyspark and to define start_spark() and stop_spark()

import findspark

findspark.init()

import getpass
import pandas
import pyspark
import random
import re

from IPython.display import display, HTML
from pyspark import SparkContext
from pyspark.sql import SparkSession
import pyspark.sql.functions as F


# Constants used to interact with Azure Blob Storage using the hdfs command or Spark

global username

username = re.sub('@.*', '', getpass.getuser())

global azure_account_name
global azure_data_container_name
global azure_user_container_name
global azure_user_token

azure_account_name = "madsstorage002"
azure_data_container_name = "campus-data"
azure_user_container_name = "campus-user"
azure_user_token = r"sp=racwdl&st=2025-08-01T09:41:33Z&se=2026-12-30T16:56:33Z&spr=https&sv=2024-11-04&sr=c&sig=GzR1hq7EJ0lRHj92oDO1MBNjkc602nrpfB5H8Cl7FFY%3D"


# Functions used below

def dict_to_html(d):
    """Convert a Python dictionary into a two column table for display.
    """

    html = []

    html.append(f'<table width="100%" style="width:100%; font-family: monospace;">')
    for k, v in d.items():
        html.append(f'<tr><td style="text-align:left;">{k}</td><td>{v}</td></tr>')
    html.append(f'</table>')

    return ''.join(html)


def show_as_html(df, n=20):
    """Leverage existing pandas jupyter integration to show a spark dataframe as html.
    
    Args:
        n (int): number of rows to show (default: 20)
    """

    display(df.limit(n).toPandas())

    
def display_spark():
    """Display the status of the active Spark session if one is currently running.
    """
    
    if 'spark' in globals() and 'sc' in globals():

        name = sc.getConf().get("spark.app.name")

        html = [
            f'<p><b>Spark</b></p>',
            f'<p>The spark session is <b><span style="color:green">active</span></b>, look for <code>{name}</code> under the running applications section in the Spark UI.</p>',
            f'<ul>',
            f'<li><a href="http://localhost:{sc.uiWebUrl.split(":")[-1]}" target="_blank">Spark Application UI</a></li>',
            f'</ul>',
            f'<p><b>Config</b></p>',
            dict_to_html(dict(sc.getConf().getAll())),
            f'<p><b>Notes</b></p>',
            f'<ul>',
            f'<li>The spark session <code>spark</code> and spark context <code>sc</code> global variables have been defined by <code>start_spark()</code>.</li>',
            f'<li>Please run <code>stop_spark()</code> before closing the notebook or restarting the kernel or kill <code>{name}</code> by hand using the link in the Spark UI.</li>',
            f'</ul>',
        ]
        display(HTML(''.join(html)))
        
    else:
        
        html = [
            f'<p><b>Spark</b></p>',
            f'<p>The spark session is <b><span style="color:red">stopped</span></b>, confirm that <code>{username} (notebook)</code> is under the completed applications section in the Spark UI.</p>',
            f'<ul>',
            f'<li><a href="http://mathmadslinux2p.canterbury.ac.nz:8080/" target="_blank">Spark UI</a></li>',
            f'</ul>',
        ]
        display(HTML(''.join(html)))


# Functions to start and stop spark

def start_spark(executor_instances=2, executor_cores=1, worker_memory=1, master_memory=1):
    """Start a new Spark session and define globals for SparkSession (spark) and SparkContext (sc).
    
    Args:
        executor_instances (int): number of executors (default: 2)
        executor_cores (int): number of cores per executor (default: 1)
        worker_memory (float): worker memory (default: 1)
        master_memory (float): master memory (default: 1)
    """

    global spark
    global sc

    cores = executor_instances * executor_cores
    partitions = cores * 4
    port = 4000 + random.randint(1, 999)

    spark = (
        SparkSession.builder
        .config("spark.driver.extraJavaOptions", f"-Dderby.system.home=/tmp/{username}/spark/")
        .config("spark.dynamicAllocation.enabled", "false")
        .config("spark.executor.instances", str(executor_instances))
        .config("spark.executor.cores", str(executor_cores))
        .config("spark.cores.max", str(cores))
        .config("spark.driver.memory", f'{master_memory}g')
        .config("spark.executor.memory", f'{worker_memory}g')
        .config("spark.driver.maxResultSize", "0")
        .config("spark.sql.shuffle.partitions", str(partitions))
        .config("spark.kubernetes.container.image", "madsregistry001.azurecr.io/hadoop-spark:v3.3.5-openjdk-8")
        .config("spark.kubernetes.container.image.pullPolicy", "IfNotPresent")
        .config("spark.kubernetes.memoryOverheadFactor", "0.3")
        .config("spark.memory.fraction", "0.1")
        .config(f"fs.azure.sas.{azure_user_container_name}.{azure_account_name}.blob.core.windows.net",  azure_user_token)
        .config("spark.app.name", f"{username} (notebook)")
        .getOrCreate()
    )
    sc = SparkContext.getOrCreate()
    
    display_spark()

    
def stop_spark():
    """Stop the active Spark session and delete globals for SparkSession (spark) and SparkContext (sc).
    """

    global spark
    global sc

    if 'spark' in globals() and 'sc' in globals():

        spark.stop()

        del spark
        del sc

    display_spark()


# Make css changes to improve spark output readability

html = [
    '<style>',
    'pre { white-space: pre !important; }',
    'table.dataframe td { white-space: nowrap !important; }',
    'table.dataframe thead th:first-child, table.dataframe tbody th { display: none; }',
    '</style>',
]
display(HTML(''.join(html)))

In [70]:
# Run this cell to start a spark session in this notebook

start_spark(executor_instances=4, executor_cores=4, worker_memory=8, master_memory=8)

0,1
spark.dynamicAllocation.enabled,false
spark.fs.azure.sas.campus-user.madsstorage002.blob.core.windows.net,"""sp=racwdl&st=2025-08-01T09:41:33Z&se=2026-12-30T16:56:33Z&spr=https&sv=2024-11-04&sr=c&sig=GzR1hq7EJ0lRHj92oDO1MBNjkc602nrpfB5H8Cl7FFY%3D"""
spark.kubernetes.driver.pod.name,spark-master-driver
spark.executor.instances,4
spark.app.name,rsh224 (notebook)
spark.cores.max,16
spark.sql.warehouse.dir,file:/home/rsh224/data-420-spark/Assignment%202/Processing/spark-warehouse
spark.app.startTime,1759617025075
spark.kubernetes.container.image.pullPolicy,IfNotPresent
spark.kubernetes.namespace,rsh224


In [71]:
# Write your imports here or insert cells below

from pyspark.sql import functions as F
from pyspark.sql.types import *

In [72]:
directory_path = f'wasbs://{azure_data_container_name}@{azure_account_name}.blob.core.windows.net/msd'

In [73]:
!hdfs dfs -ls {directory_path}

Found 4 items
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/main
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile


In [74]:
!hdfs dfs -ls {directory_path}/main

Found 2 items
-rwxrwxrwx   1   58658141 2025-02-18 15:22 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/main/analysis.csv.gz
-rwxrwxrwx   1  124211304 2025-02-18 15:22 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/main/metadata.csv.gz


In [75]:
!hdfs dfs -ls {directory_path}/genre

Found 3 items
-rwxrwxrwx   1   11625230 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre/msd-MAGD-genreAssignment.tsv
-rwxrwxrwx   1    8820054 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre/msd-MASD-styleAssignment.tsv
-rwxrwxrwx   1   11140605 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre/msd-topMAGD-genreAssignment.tsv


In [76]:
!hdfs dfs -ls {directory_path}/audio

Found 2 items
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features


In [77]:
!hdfs dfs -ls {directory_path}/audio/attributes

Found 13 items
-rwxrwxrwx   1       1051 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes/msd-jmir-area-of-moments-all-v1.0.attributes.csv
-rwxrwxrwx   1        671 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes/msd-jmir-lpc-all-v1.0.attributes.csv
-rwxrwxrwx   1        484 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes/msd-jmir-methods-of-moments-all-v1.0.attributes.csv
-rwxrwxrwx   1        898 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes/msd-jmir-mfcc-all-v1.0.attributes.csv
-rwxrwxrwx   1        777 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes/msd-jmir-spectral-all-all-v1.0.attributes.csv
-rwxrwxrwx   1        777 2024-09-13 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes/msd-jmir-spectral-derivatives-all-all

In [78]:
!hdfs dfs -ls {directory_path}/audio/features

Found 13 items
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features/msd-jmir-area-of-moments-all-v1.0.csv
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features/msd-jmir-lpc-all-v1.0.csv
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features/msd-jmir-methods-of-moments-all-v1.0.csv
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features/msd-jmir-mfcc-all-v1.0.csv
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features/msd-jmir-spectral-all-all-v1.0.csv
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features/msd-jmir-spectral-derivatives-all-all-v1.0.csv
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus

In [79]:
!hdfs dfs -ls {directory_path}/tasteprofile

Found 2 items
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile/mismatches
drwxrwxrwx   -          0 1970-01-01 12:00 wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile/triplets.tsv


In [80]:
!hdfs dfs -ls {directory_path}/msd/tasteprofile/mismatches

ls: `wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/msd/tasteprofile/mismatches': No such file or directory


In [81]:
!hdfs dfs -du -s -h {directory_path}

12.9 G  12.9 G  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd


In [82]:
!hdfs dfs -du -h {directory_path}/main/

55.9 M   55.9 M   wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/main/analysis.csv.gz
118.5 M  118.5 M  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/main/metadata.csv.gz


In [83]:
!hdfs dfs -du -h {directory_path}/genre

8.4 M   8.4 M   wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre/msd-MASD-styleAssignment.tsv
10.6 M  10.6 M  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre/msd-topMAGD-genreAssignment.tsv
11.1 M  11.1 M  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/genre/msd-MAGD-genreAssignment.tsv


In [84]:
!hdfs dfs -du -h {directory_path}/audio

103.0 K  103.0 K  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/attributes
12.2 G   12.2 G   wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/audio/features


In [85]:
!hdfs dfs -du -h {directory_path}/tasteprofile

488.4 M  488.4 M  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile/triplets.tsv
2.0 M    2.0 M    wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile/mismatches


In [86]:
!hdfs dfs -du -h {directory_path}/tasteprofile/mismatches

1.9 M   1.9 M   wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile/mismatches/sid_mismatches.txt
89.2 K  89.2 K  wasbs://campus-data@madsstorage002.blob.core.windows.net/msd/tasteprofile/mismatches/sid_matches_manually_accepted.txt


In [87]:
metadata = spark.read.csv(
    f'{directory_path}/main/metadata.csv.gz',
    inferSchema=True,
    header=True
)

                                                                                

In [88]:
metadata.printSchema()

root
 |-- analyzer_version: string (nullable = true)
 |-- artist_7digitalid: integer (nullable = true)
 |-- artist_familiarity: double (nullable = true)
 |-- artist_hotttnesss: double (nullable = true)
 |-- artist_id: string (nullable = true)
 |-- artist_latitude: double (nullable = true)
 |-- artist_location: string (nullable = true)
 |-- artist_longitude: string (nullable = true)
 |-- artist_mbid: string (nullable = true)
 |-- artist_name: string (nullable = true)
 |-- artist_playmeid: string (nullable = true)
 |-- genre: string (nullable = true)
 |-- idx_artist_terms: integer (nullable = true)
 |-- idx_similar_artists: integer (nullable = true)
 |-- release: string (nullable = true)
 |-- release_7digitalid: integer (nullable = true)
 |-- song_hotttnesss: string (nullable = true)
 |-- song_id: string (nullable = true)
 |-- title: string (nullable = true)
 |-- track_7digitalid: string (nullable = true)



In [89]:
metadata.count()

                                                                                

1000000

In [90]:
analysis = spark.read.csv(
    f'{directory_path}/main/analysis.csv.gz',
    inferSchema=True,
    header=True
)

                                                                                

In [91]:
analysis.printSchema()

root
 |-- analysis_sample_rate: integer (nullable = true)
 |-- audio_md5: string (nullable = true)
 |-- danceability: double (nullable = true)
 |-- duration: double (nullable = true)
 |-- end_of_fade_in: double (nullable = true)
 |-- energy: double (nullable = true)
 |-- idx_bars_confidence: integer (nullable = true)
 |-- idx_bars_start: integer (nullable = true)
 |-- idx_beats_confidence: integer (nullable = true)
 |-- idx_beats_start: integer (nullable = true)
 |-- idx_sections_confidence: integer (nullable = true)
 |-- idx_sections_start: integer (nullable = true)
 |-- idx_segments_confidence: integer (nullable = true)
 |-- idx_segments_loudness_max: integer (nullable = true)
 |-- idx_segments_loudness_max_time: integer (nullable = true)
 |-- idx_segments_loudness_start: integer (nullable = true)
 |-- idx_segments_pitches: integer (nullable = true)
 |-- idx_segments_start: integer (nullable = true)
 |-- idx_segments_timbre: integer (nullable = true)
 |-- idx_tatums_confidence: integ

In [92]:
analysis.count()

                                                                                

1000000

In [93]:
magd = spark.read.csv(
    f'{directory_path}/genre/msd-MAGD-genreAssignment.tsv',
    sep='\t',
    inferSchema=True
)

In [126]:
magd.show(20, False)

+------------------+--------------+
|_c0               |_c1           |
+------------------+--------------+
|TRAAAAK128F9318786|Pop_Rock      |
|TRAAAAV128F421A322|Pop_Rock      |
|TRAAAAW128F429D538|Rap           |
|TRAAABD128F429CF47|Pop_Rock      |
|TRAAACV128F423E09E|Pop_Rock      |
|TRAAADT12903CCC339|Easy_Listening|
|TRAAAED128E0783FAB|Vocal         |
|TRAAAEF128F4273421|Pop_Rock      |
|TRAAAEM128F93347B9|Electronic    |
|TRAAAFD128F92F423A|Pop_Rock      |
|TRAAAFP128F931B4E3|Rap           |
|TRAAAGR128F425B14B|Pop_Rock      |
|TRAAAGW12903CC1049|Blues         |
|TRAAAHD128F42635A5|Pop_Rock      |
|TRAAAHE12903C9669C|Pop_Rock      |
|TRAAAHJ128F931194C|Pop_Rock      |
|TRAAAHZ128E0799171|Rap           |
|TRAAAIR128F1480971|RnB           |
|TRAAAJG128F9308A25|Folk          |
|TRAAAMO128F1481E7F|Religious     |
+------------------+--------------+
only showing top 20 rows



In [119]:
magd.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)



In [94]:
magd.count()

422714

In [95]:
masd = spark.read.csv(
    f'{directory_path}/genre/msd-MASD-styleAssignment.tsv',
    inferSchema=True,
    sep='\t'
)

In [120]:
masd.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)



In [96]:
masd.count()

273936

In [97]:
topMagd = spark.read.csv(
    f'{directory_path}/genre/msd-topMAGD-genreAssignment.tsv',
    inferSchema=True,
    sep='\t'
)

In [121]:
topMagd.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)



In [98]:
topMagd.count()

406427

In [99]:
attributes = spark.read.csv(
    f'{directory_path}/audio/attributes/',
    inferSchema=True
)

In [123]:
attributes.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)



In [130]:
attributes.show(20, False)

+------------+-------+
|_c0         |_c1    |
+------------+-------+
|component_1 |NUMERIC|
|component_2 |NUMERIC|
|component_3 |NUMERIC|
|component_4 |NUMERIC|
|component_5 |NUMERIC|
|component_6 |NUMERIC|
|component_7 |NUMERIC|
|component_8 |NUMERIC|
|component_9 |NUMERIC|
|component_10|NUMERIC|
|component_11|NUMERIC|
|component_12|NUMERIC|
|component_13|NUMERIC|
|component_14|NUMERIC|
|component_15|NUMERIC|
|component_16|NUMERIC|
|component_17|NUMERIC|
|component_18|NUMERIC|
|component_19|NUMERIC|
|component_20|NUMERIC|
+------------+-------+
only showing top 20 rows



In [100]:
attributes.count()

3929

In [None]:
triplets = spark.read.csv(
    f'{directory_path}/tasteprofile/triplets.tsv',
    inferSchema=True,
    sep='\t'
)

In [127]:
triplets.show(20, False)

+----------------------------------------+------------------+---+
|_c0                                     |_c1               |_c2|
+----------------------------------------+------------------+---+
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOQEFDN12AB017C52B|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOQOIUJ12A6701DAA7|2  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOQOKKD12A6701F92E|4  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOSDVHO12AB01882C7|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOSKICX12A6701F932|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOSNUPV12A8C13939B|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOSVMII12A6701F92D|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOTUNHI12B0B80AFE2|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOTXLTZ12AB017C535|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOTZDDX12A6701F935|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOTZTVF12A58A79B9F|1  |
|f1bfc2a4597a3642f232e7a4e5d5ab2a99cf80e5|SOUGTZZ12A8C13B8CC|1  |
|f1bfc2a45

In [124]:
triplets.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: integer (nullable = true)



In [None]:
triplets.count()

In [None]:
mismatches = spark.read.text(
    f'{directory_path}/tasteprofile/mismatches/sid_mismatches.txt'
)

In [None]:
mismatches.count()

In [None]:
manually_accepted = spark.read.text(
    f'{directory_path}/tasteprofile/mismatches/sid_matches_manually_accepted.txt'
)

In [68]:
manually_accepted.count()

938

In [115]:
features = spark.read.csv(
    f'{directory_path}/audio/features/msd-marsyas-timbral-v1.0.csv',
    inferSchema=True
)

                                                                                

In [116]:
features.show(20, False)

+--------+--------+--------+--------+----------+--------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+---------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+---------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+-

In [117]:
features.count()

                                                                                

995001

In [118]:
features.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: double (nullable = true)
 |-- _c2: double (nullable = true)
 |-- _c3: double (nullable = true)
 |-- _c4: double (nullable = true)
 |-- _c5: double (nullable = true)
 |-- _c6: double (nullable = true)
 |-- _c7: double (nullable = true)
 |-- _c8: double (nullable = true)
 |-- _c9: double (nullable = true)
 |-- _c10: double (nullable = true)
 |-- _c11: double (nullable = true)
 |-- _c12: double (nullable = true)
 |-- _c13: double (nullable = true)
 |-- _c14: double (nullable = true)
 |-- _c15: double (nullable = true)
 |-- _c16: double (nullable = true)
 |-- _c17: double (nullable = true)
 |-- _c18: double (nullable = true)
 |-- _c19: double (nullable = true)
 |-- _c20: double (nullable = true)
 |-- _c21: double (nullable = true)
 |-- _c22: double (nullable = true)
 |-- _c23: double (nullable = true)
 |-- _c24: double (nullable = true)
 |-- _c25: double (nullable = true)
 |-- _c26: double (nullable = true)
 |-- _c27: double (nullable = tru

In [131]:
stop_spark()

25/10/05 14:00:30 WARN ExecutorPodsWatchSnapshotSource: Kubernetes client has been closed.
