In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, isnan, when, isnull, sum as spark_sum, countDistinct
from pyspark.sql.types import NumericType, TimestampType, DateType
from pyspark.sql.functions import col
from pyspark.sql.functions import col, min, max, count, lag, datediff
from pyspark.sql.window import Window
import pyspark.sql.functions as F

In [3]:
# Initialize Spark Session
spark = SparkSession.builder.appName("Predictive Maintenance System").getOrCreate()

# Motor maintenance


In [4]:
from pyspark.shell import spark

# Load data from CSV
df = spark.read.csv(r"C:\Users\Son Phan\Scalable\Predictive-Maintenance-System-using-Apache-Spark\Maintenance recommendation\dataset_maintenance_recommendation.csv", header=True, inferSchema=True)

# 1. Phương pháp 1: Tính ngưỡng dựa trên Phân phối Thống kê
stats_df = df.select(
    F.mean("vibration").alias("mean_vibration"),
    F.stddev("vibration").alias("std_vibration"),
    F.mean("noise_level").alias("mean_noise"),
    F.stddev("noise_level").alias("std_noise"),
    F.mean("temperature").alias("mean_temperature"),
    F.stddev("temperature").alias("std_temperature"),
    F.mean("current").alias("mean_current"),
    F.stddev("current").alias("std_current"),
    F.mean("power_output").alias("mean_power_output"),
    F.stddev("power_output").alias("std_power_output")
).collect()[0]

# Xác định ngưỡng cho mỗi chỉ số bằng cách sử dụng 3 lần độ lệch chuẩn
VIBRATION_THRESHOLD_STAT = stats_df['mean_vibration'] + 3 * stats_df['std_vibration']
NOISE_THRESHOLD_STAT = stats_df['mean_noise'] + 3 * stats_df['std_noise']
TEMPERATURE_THRESHOLD_STAT = stats_df['mean_temperature'] + 3 * stats_df['std_temperature']
CURRENT_THRESHOLD_STAT = stats_df['mean_current'] + 3 * stats_df['std_current']
POWER_OUTPUT_THRESHOLD_STAT = stats_df['mean_power_output'] - 3 * stats_df['std_power_output']

# 2. Phương pháp 2: Tính ngưỡng dựa trên Dữ liệu Bảo Trì Lịch Sử (Motor Failure)
failure_stats_df = df.filter(F.col("parts_replaced") == "Motor").select(
    F.mean("vibration").alias("motor_failure_vibration"),
    F.mean("noise_level").alias("motor_failure_noise"),
    F.mean("temperature").alias("motor_failure_temperature"),
    F.mean("current").alias("motor_failure_current"),
    F.mean("power_output").alias("motor_failure_power_output")
).collect()[0]

VIBRATION_THRESHOLD_HIST = failure_stats_df['motor_failure_vibration']
NOISE_THRESHOLD_HIST = failure_stats_df['motor_failure_noise']
TEMPERATURE_THRESHOLD_HIST = failure_stats_df['motor_failure_temperature']
CURRENT_THRESHOLD_HIST = failure_stats_df['motor_failure_current']
POWER_OUTPUT_THRESHOLD_HIST = failure_stats_df['motor_failure_power_output']

# 3. Chọn ngưỡng cuối cùng cho mỗi chỉ số bằng cách lấy giá trị nhỏ hơn từ hai phương pháp
VIBRATION_THRESHOLD = VIBRATION_THRESHOLD_HIST if VIBRATION_THRESHOLD_HIST < VIBRATION_THRESHOLD_STAT else VIBRATION_THRESHOLD_STAT
NOISE_THRESHOLD = NOISE_THRESHOLD_HIST if NOISE_THRESHOLD_HIST < NOISE_THRESHOLD_STAT else NOISE_THRESHOLD_STAT
TEMPERATURE_THRESHOLD = TEMPERATURE_THRESHOLD_HIST if TEMPERATURE_THRESHOLD_HIST < TEMPERATURE_THRESHOLD_STAT else TEMPERATURE_THRESHOLD_STAT
CURRENT_THRESHOLD = CURRENT_THRESHOLD_HIST if CURRENT_THRESHOLD_HIST < CURRENT_THRESHOLD_STAT else CURRENT_THRESHOLD_STAT
POWER_OUTPUT_THRESHOLD = POWER_OUTPUT_THRESHOLD_HIST if POWER_OUTPUT_THRESHOLD_HIST < POWER_OUTPUT_THRESHOLD_STAT else POWER_OUTPUT_THRESHOLD_STAT

# 4. Xác định Motor Failure cho các thiết bị có `maintenance_needed == "Maintenance required"`
df = df.withColumn(
    "maintenance_item",
    F.when(
        (F.col("maintenance_needed") == "Maintenance required") &
        #(F.col("maintenance_type").isin("Repair", "Replacement")) &
        (F.col("parts_replaced") == "Motor") &
        (
            (F.col("vibration") > VIBRATION_THRESHOLD) |
            (F.col("noise_level") > NOISE_THRESHOLD) |
            (F.col("temperature") > TEMPERATURE_THRESHOLD) |
            (F.col("current") > CURRENT_THRESHOLD) |
            (F.col("power_output") < POWER_OUTPUT_THRESHOLD)
        ),
        "Motor"  # Set 'Motor' for rows meeting the condition
    )
)

# Display rows to confirm the new 'maintenance_item' column
# Filter and display rows where maintenance_needed is "Maintenance required"
df.filter(F.col("maintenance_needed") == "Maintenance required") \
  .select('equipment_id', 'vibration', 'noise_level', 'temperature', 'current', 'power_output', 'parts_replaced', 'maintenance_type', 'maintenance_needed', 'maintenance_item') \
  .show()

Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.5.3
      /_/

Using Python version 3.9.0 (tags/v3.9.0:9cf6752, Oct  5 2020 15:34:40)
Spark context Web UI available at http://192.168.1.11:4040
Spark context available as 'sc' (master = local[*], app id = local-1748074066816).
SparkSession available as 'spark'.
+------------+----------+-----------+-----------+----------+------------+--------------+----------------+--------------------+----------------+
|equipment_id| vibration|noise_level|temperature|   current|power_output|parts_replaced|maintenance_type|  maintenance_needed|maintenance_item|
+------------+----------+-----------+-----------+----------+------------+--------------+----------------+--------------------+----------------+
|           1|0.38600662|   69.76519|   62.29391| 105.57472|    470.0774|         Seals|         Routine|Maintenance required|            NULL|
|           1|  0.528641|

# Bearing Maintenance

In [5]:
stats_df = df.select(
    F.mean("vibration").alias("mean_vibration"),
    F.stddev("vibration").alias("std_vibration"),
    F.mean("noise_level").alias("mean_noise"),
    F.stddev("noise_level").alias("std_noise"),
    F.mean("temperature").alias("mean_temperature"),
    F.stddev("temperature").alias("std_temperature"),
    F.mean("rotational_speed").alias("mean_rotational_speed"),
    F.stddev("rotational_speed").alias("std_rotational_speed")
).collect()[0]

VIBRATION_THRESHOLD_STAT = stats_df['mean_vibration'] + 3 * stats_df['std_vibration']
NOISE_THRESHOLD_STAT = stats_df['mean_noise'] + 3 * stats_df['std_noise']
TEMPERATURE_THRESHOLD_STAT = stats_df['mean_temperature'] + 3 * stats_df['std_temperature']
ROTATIONAL_SPEED_THRESHOLD_STAT = stats_df['mean_rotational_speed'] - 3 * stats_df['std_rotational_speed']

# 2. Historical Thresholds
failure_stats_df = df.filter(F.col("parts_replaced") == "Bearings").select(
    F.mean("vibration").alias("bearing_failure_vibration"),
    F.mean("noise_level").alias("bearing_failure_noise"),
    F.mean("temperature").alias("bearing_failure_temperature"),
    F.mean("rotational_speed").alias("bearing_failure_rotational_speed")
).collect()[0]

VIBRATION_THRESHOLD_HIST = failure_stats_df['bearing_failure_vibration']
NOISE_THRESHOLD_HIST = failure_stats_df['bearing_failure_noise']
TEMPERATURE_THRESHOLD_HIST = failure_stats_df['bearing_failure_temperature']
ROTATIONAL_SPEED_THRESHOLD_HIST = failure_stats_df['bearing_failure_rotational_speed']

# 3. Final Thresholds (Choose the lower of the two)
VIBRATION_THRESHOLD = VIBRATION_THRESHOLD_HIST if VIBRATION_THRESHOLD_HIST < VIBRATION_THRESHOLD_STAT else VIBRATION_THRESHOLD_STAT
NOISE_THRESHOLD = NOISE_THRESHOLD_HIST if NOISE_THRESHOLD_HIST < NOISE_THRESHOLD_STAT else NOISE_THRESHOLD_STAT
TEMPERATURE_THRESHOLD = TEMPERATURE_THRESHOLD_HIST if TEMPERATURE_THRESHOLD_HIST < TEMPERATURE_THRESHOLD_STAT else TEMPERATURE_THRESHOLD_STAT
ROTATIONAL_SPEED_THRESHOLD = ROTATIONAL_SPEED_THRESHOLD_HIST if ROTATIONAL_SPEED_THRESHOLD_HIST < ROTATIONAL_SPEED_THRESHOLD_STAT else ROTATIONAL_SPEED_THRESHOLD_STAT

# 4. Add Maintenance Types for Bearing Failures
df = df.withColumn(
    "maintenance_item",
    F.when(
        (F.col("maintenance_item").isNull()) &  
        (F.col("maintenance_needed") == "Maintenance required") &
        #(F.col("maintenance_type").isin("Repair", "Replacement")) &
        (F.col("parts_replaced") == "Bearings") &
        (
            (F.col("vibration") > VIBRATION_THRESHOLD) |
            (F.col("noise_level") > NOISE_THRESHOLD) |
            (F.col("temperature") > TEMPERATURE_THRESHOLD) |
            (F.col("rotational_speed") < ROTATIONAL_SPEED_THRESHOLD)
        ),
        "Bearings"  # Set 'Bearing' for matching rows
    ).otherwise(F.col("maintenance_item"))  # Retain existing values
)

# Display rows to verify
df.filter(F.col("maintenance_item") == "Bearings").select(
    'equipment_id', 'maintenance_item', 'vibration', 'noise_level', 'temperature', 'rotational_speed',
    'parts_replaced', 'maintenance_type', 'maintenance_needed'
).show()

+------------+----------------+----------+-----------+-----------+----------------+--------------+----------------+--------------------+
|equipment_id|maintenance_item| vibration|noise_level|temperature|rotational_speed|parts_replaced|maintenance_type|  maintenance_needed|
+------------+----------------+----------+-----------+-----------+----------------+--------------+----------------+--------------------+
|          10|        Bearings|0.52938277|   71.73415|   74.98893|       1001.4659|      Bearings|      Inspection|Maintenance required|
|          10|        Bearings|0.60423136|   75.11588|   74.98735|       997.34814|      Bearings|      Inspection|Maintenance required|
|          10|        Bearings| 0.3501465|   73.20602|     86.713|        972.7553|      Bearings|      Inspection|Maintenance required|
|          10|        Bearings| 0.5525364|   66.93875|   75.83093|        995.6734|      Bearings|      Inspection|Maintenance required|
|          10|        Bearings| 0.6039635

# Seals Maintenance


In [6]:
# 1. Statistical Thresholds
stats_df = df.select(
    F.mean("pressure").alias("mean_pressure"),
    F.stddev("pressure").alias("std_pressure"),
    F.mean("oil_viscosity").alias("mean_oil_viscosity"),
    F.stddev("oil_viscosity").alias("std_oil_viscosity"),
    F.mean("temperature").alias("mean_temperature"),
    F.stddev("temperature").alias("std_temperature")
).collect()[0]

PRESSURE_THRESHOLD_STAT = stats_df['mean_pressure'] - 3 * stats_df['std_pressure']
OIL_VISCOSITY_THRESHOLD_STAT = stats_df['mean_oil_viscosity'] - 3 * stats_df['std_oil_viscosity']
TEMPERATURE_THRESHOLD_STAT = stats_df['mean_temperature'] + 3 * stats_df['std_temperature']

# 2. Historical Thresholds
failure_stats_df = df.filter(F.col("parts_replaced") == "Seals").select(
    F.mean("pressure").alias("seal_failure_pressure"),
    F.mean("oil_viscosity").alias("seal_failure_oil_viscosity"),
    F.mean("temperature").alias("seal_failure_temperature")
).collect()[0]

PRESSURE_THRESHOLD_HIST = failure_stats_df['seal_failure_pressure']
OIL_VISCOSITY_THRESHOLD_HIST = failure_stats_df['seal_failure_oil_viscosity']
TEMPERATURE_THRESHOLD_HIST = failure_stats_df['seal_failure_temperature']

# 3. Final Thresholds
PRESSURE_THRESHOLD = PRESSURE_THRESHOLD_HIST if PRESSURE_THRESHOLD_HIST < PRESSURE_THRESHOLD_STAT else PRESSURE_THRESHOLD_STAT
OIL_VISCOSITY_THRESHOLD = OIL_VISCOSITY_THRESHOLD_HIST if OIL_VISCOSITY_THRESHOLD_HIST < OIL_VISCOSITY_THRESHOLD_STAT else OIL_VISCOSITY_THRESHOLD_STAT
TEMPERATURE_THRESHOLD = TEMPERATURE_THRESHOLD_HIST if TEMPERATURE_THRESHOLD_HIST < TEMPERATURE_THRESHOLD_STAT else TEMPERATURE_THRESHOLD_STAT

# 4. Add Maintenance Types for Seal Failures
df = df.withColumn(
    "maintenance_item",
    F.when(
        (F.col("maintenance_item").isNull()) & 
        (F.col("maintenance_needed") == "Maintenance required") &
        #(F.col("maintenance_type").isin("Repair", "Replacement")) &
        (F.col("parts_replaced") == "Seals") &
        (
            (F.col("pressure") < PRESSURE_THRESHOLD) |
            (F.col("oil_viscosity") < OIL_VISCOSITY_THRESHOLD) |
            (F.col("temperature") > TEMPERATURE_THRESHOLD)
        ),
        "Seals"  # Set 'Seal' for matching rows
    ).otherwise(F.col("maintenance_item"))  # Retain existing values
)

# Display rows to verify
df.filter(F.col("maintenance_item") == "Seals").select(
    'equipment_id', 'maintenance_item', 'pressure', 'oil_viscosity', 'temperature',
    'parts_replaced', 'maintenance_type', 'maintenance_needed'
).show()

+------------+----------------+----------+-------------+-----------+--------------+----------------+--------------------+
|equipment_id|maintenance_item|  pressure|oil_viscosity|temperature|parts_replaced|maintenance_type|  maintenance_needed|
+------------+----------------+----------+-------------+-----------+--------------+----------------+--------------------+
|           1|           Seals| 102.67639|     44.44392|   62.29391|         Seals|         Routine|Maintenance required|
|           1|           Seals|  96.23182|     52.77085|   79.68224|         Seals|         Routine|Maintenance required|
|           1|           Seals|  87.56094|    52.151855|   77.06982|         Seals|         Routine|Maintenance required|
|           1|           Seals| 78.314735|      54.0121|  62.224873|         Seals|         Routine|Maintenance required|
|           1|           Seals|  90.48011|    53.516655|  61.853565|         Seals|         Routine|Maintenance required|
|           1|          

# Filter maintenance

In [7]:
# 1. Statistical Thresholds
stats_df = df.select(
    F.mean("pressure").alias("mean_pressure"),
    F.stddev("pressure").alias("std_pressure"),
    F.mean("oil_viscosity").alias("mean_oil_viscosity"),
    F.stddev("oil_viscosity").alias("std_oil_viscosity"),
    F.mean("temperature").alias("mean_temperature"),
    F.stddev("temperature").alias("std_temperature")
).collect()[0]

PRESSURE_THRESHOLD_STAT = stats_df['mean_pressure'] - 3 * stats_df['std_pressure']
OIL_VISCOSITY_THRESHOLD_STAT = stats_df['mean_oil_viscosity'] - 3 * stats_df['std_oil_viscosity']
TEMPERATURE_THRESHOLD_STAT = stats_df['mean_temperature'] + 3 * stats_df['std_temperature']

# 2. Historical Thresholds
failure_stats_df = df.filter(F.col("parts_replaced") == "Seals").select(
    F.mean("pressure").alias("seal_failure_pressure"),
    F.mean("oil_viscosity").alias("seal_failure_oil_viscosity"),
    F.mean("temperature").alias("seal_failure_temperature")
).collect()[0]

PRESSURE_THRESHOLD_HIST = failure_stats_df['seal_failure_pressure']
OIL_VISCOSITY_THRESHOLD_HIST = failure_stats_df['seal_failure_oil_viscosity']
TEMPERATURE_THRESHOLD_HIST = failure_stats_df['seal_failure_temperature']

# 3. Final Thresholds
PRESSURE_THRESHOLD = PRESSURE_THRESHOLD_HIST if PRESSURE_THRESHOLD_HIST < PRESSURE_THRESHOLD_STAT else PRESSURE_THRESHOLD_STAT
OIL_VISCOSITY_THRESHOLD = OIL_VISCOSITY_THRESHOLD_HIST if OIL_VISCOSITY_THRESHOLD_HIST < OIL_VISCOSITY_THRESHOLD_STAT else OIL_VISCOSITY_THRESHOLD_STAT
TEMPERATURE_THRESHOLD = TEMPERATURE_THRESHOLD_HIST if TEMPERATURE_THRESHOLD_HIST < TEMPERATURE_THRESHOLD_STAT else TEMPERATURE_THRESHOLD_STAT

# 4. Add Maintenance Types for Seal Failures
df = df.withColumn(
    "maintenance_item",
    F.when(
        (F.col("maintenance_item").isNull()) & 
        (F.col("maintenance_needed") == "Maintenance required") &
        #(F.col("maintenance_type").isin("Repair", "Replacement")) &
        (F.col("parts_replaced") == "Filters") &
        (
            (F.col("pressure") < PRESSURE_THRESHOLD) |
            (F.col("oil_viscosity") < OIL_VISCOSITY_THRESHOLD) |
            (F.col("temperature") > TEMPERATURE_THRESHOLD)
        ),
        "Filters"  # Set 'Filters' for matching rows
    ).otherwise(F.col("maintenance_item"))  # Retain existing values
)

# Display rows to verify
df.filter(F.col("maintenance_item") == "Filters").select(
    'equipment_id', 'maintenance_item', 'pressure', 'oil_viscosity', 'temperature',
    'parts_replaced', 'maintenance_type', 'maintenance_needed'
).show()

+------------+----------------+----------+-------------+-----------+--------------+----------------+--------------------+
|equipment_id|maintenance_item|  pressure|oil_viscosity|temperature|parts_replaced|maintenance_type|  maintenance_needed|
+------------+----------------+----------+-------------+-----------+--------------+----------------+--------------------+
|           2|         Filters|  91.56752|      55.1587|  62.315475|       Filters|         Routine|Maintenance required|
|           2|         Filters|  76.90515|    51.656483|   67.92446|       Filters|         Routine|Maintenance required|
|           2|         Filters|112.165726|     55.58629|   65.73516|       Filters|         Routine|Maintenance required|
|           2|         Filters|  98.81363|    55.919113|  62.273525|       Filters|         Routine|Maintenance required|
|           2|         Filters| 126.53312|    62.594173|   71.29914|       Filters|         Routine|Maintenance required|
|           2|         F

# Coupling

In [8]:
# Update only the rows where maintenance_item is null and maintenance_needed == "Maintenance required"
df = df.withColumn(
    "maintenance_item",
    F.when(
        (F.col("maintenance_item").isNull()) & 
        (F.col("maintenance_needed") == "Maintenance required"),
        "Coupling"
    ).otherwise(F.col("maintenance_item"))  # Retain existing values
)

# Final result summary

In [9]:
# Calculate the total count of rows
total_count = df.filter(F.col("maintenance_needed") == "Maintenance required").count()

# Calculate the count and percentage for each maintenance type
maintenance_type_percentage = (
    df.filter(F.col("maintenance_needed") == "Maintenance required")
    .groupBy("maintenance_item")
    .agg(F.count("*").alias("count"))
    .withColumn("percentage", (F.col("count") / total_count) * 100)
)

# Show the results
maintenance_type_percentage.show()

+----------------+-----+------------------+
|maintenance_item|count|        percentage|
+----------------+-----+------------------+
|           Motor| 1116| 8.766692851531815|
|           Seals| 1346|10.573448546739984|
|        Coupling| 8675| 68.14611154752554|
|         Filters|  442| 3.472113118617439|
|        Bearings| 1151| 9.041633935585232|
+----------------+-----+------------------+



In [10]:
# Filter the dataset for rows where maintenance_needed == "Maintenance required"
filtered_df = df.filter(F.col("maintenance_needed") == "Maintenance required")

# Display the filtered dataset using show()
print("Filtered Dataset (first 20 rows):")
filtered_df.show(20, truncate=False)

# Convert to pandas for better display in notebook
print("\nFiltered Dataset (pandas display):")
filtered_pandas_df = filtered_df.toPandas()
display(filtered_pandas_df)

# Save the filtered dataset to a new CSV file
output_path = r"C:\Users\Son Phan\Scalable\Predictive-Maintenance-System-using-Apache-Spark\Predictive-Maintenance-System-using-Apache-Spark\maintenance_data.csv"

# Save the filtered dataset locally with overwrite mode
filtered_df.coalesce(1).write.option("header", True).csv(output_path, mode="overwrite")

print(f"\nFiltered dataset saved successfully to: {output_path}")

Py4JJavaError: An error occurred while calling o405.csv.
: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
	at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
	at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:793)
	at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1249)
	at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1454)
	at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:601)
	at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
	at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
	at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
	at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
	at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
	at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.getAllCommittedTaskPaths(FileOutputCommitter.java:334)
	at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJobInternal(FileOutputCommitter.java:404)
	at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitJob(FileOutputCommitter.java:377)
	at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitJob(HadoopMapReduceCommitProtocol.scala:192)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$writeAndCommit$3(FileFormatWriter.scala:275)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.apache.spark.util.Utils$.timeTakenMs(Utils.scala:552)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.writeAndCommit(FileFormatWriter.scala:275)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeWrite(FileFormatWriter.scala:304)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:190)
	at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:190)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:107)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:76)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:461)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:437)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:98)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:85)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:83)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:142)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:869)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:391)
	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:364)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:243)
	at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:860)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.base/java.lang.Thread.run(Thread.java:834)
