In [29]:
#import pandas as pd
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_timestamp, date_format, round, when, expr
#from google.colab import drive
from datetime import datetime
#drive.mount('/content/drive')

spark=SparkSession.builder.appName('Propwise').config("spark.jars", r"C:\Users\Asad\Documents\Propwise Challenge\postgresql-42.7.6.jar").getOrCreate()

#file_path = '/content/drive/MyDrive/Propwise Challenge/transactions-2025-06-07.csv'
file_path = r"C:\Users\Asad\Documents\Propwise Challenge\transactions-2025-06-07.csv"
df = spark.read.csv(file_path, header=True, inferSchema=True)

#df.printSchema()

"""Cleaning and transforming data"""

def transform(df):

  ### Remove null and duplicate records
  cleaned_df = df.na.drop()
  cleaned_df = cleaned_df.dropDuplicates()
  #df = df.dropna()
  #df = df.drop_duplicates()

  ### Standardize column names
  for oldCol in cleaned_df.columns:
    cleaned_df = cleaned_df.withColumnRenamed(oldCol, oldCol.lower().replace(' ','_'))
  #df.columns = [x.lower().replace(' ','_') for x in df.columns]

  ### Parse dates

  # Convert instance_date column to type timestamp
  cleaned_df = cleaned_df.withColumn('instance_date_timestamp', to_timestamp(col('instance_date'), 'yyyy-MM-dd HH:mm:ss'))

  # Place the date and time in their own new columns
  cleaned_df = cleaned_df.withColumn('date_col', date_format(col('instance_date_timestamp'), 'yyyy-MM-dd'))
  cleaned_df = cleaned_df.withColumn('time_col', date_format(col('instance_date_timestamp'), 'HH:mm:ss'))

  # Drop the instance_date column
  #cleaned_df = cleaned_df.drop('instance_date')

  #df = df.withColumn('date', to_timestamp(col('date'), 'MM/dd/yyyy'))
  #if df.columns.str.contains('date', case= False).any():
   # df.columns = pd.to_datetime(df.columns)

  ### Parse numeric columns
  for column in ["trans_value", "procedure_area", "actual_area"]:
    cleaned_df = cleaned_df.withColumn(column, round(col(column).cast('double'),2))

  ### New columns (price_per_sqm, price_per_room, budget_tier)
  cleaned_df = cleaned_df.withColumn('price_per_sqm', round(col('trans_value')/col('procedure_area'),2))
  #cleaned_df = cleaned_df.withColumn('property_age', 2025 - col('date_col'))
  cleaned_df = cleaned_df.withColumn('price_per_room', round(col("trans_value") / when(col("rooms_en") == "Studio", 1).otherwise(expr("try_cast(substr(rooms_en, 1, 1) as int)")), 2))
  cleaned_df = cleaned_df.withColumn('budget_tier', when(col("trans_value")<=600000, "Low Budget").when((col("trans_value")>600000) & (col("trans_value")<=2500000), "Medium Budget").otherwise("High Budget"))



  return cleaned_df




In [30]:
cleaned_df = transform(df)
cleaned_df.show()

+------------------+-------------------+--------+--------------------+-------------+---------------+-----------+--------------------+------------+---------------+-----------+--------------+-----------+--------+-------+--------------------+--------------------+--------------------+-----------+------------+--------------------+--------------------+-----------------------+----------+--------+-------------+--------------+-------------+
|transaction_number|      instance_date|group_en|        procedure_en|is_offplan_en|is_free_hold_en|   usage_en|             area_en|prop_type_en|prop_sb_type_en|trans_value|procedure_area|actual_area|rooms_en|parking|    nearest_metro_en|     nearest_mall_en| nearest_landmark_en|total_buyer|total_seller|   master_project_en|          project_en|instance_date_timestamp|  date_col|time_col|price_per_sqm|price_per_room|  budget_tier|
+------------------+-------------------+--------+--------------------+-------------+---------------+-----------+----------------

In [31]:
def load(cleaned_df, table_name):
  # PostgreSQL info
  postgresql_url = r"jdbc:postgresql://localhost:5432/UAE_Real_Estate"
  properties = {
      "user" : "postgres",
      "password" : "123",
      "driver" : "org.postgresql.Driver"
  }

  # Write DataFrame to PostgreSQL table
  cleaned_df.write.jdbc(url=postgresql_url, table=table_name, mode="overwrite", properties=properties)
  

cleaned_df = transform(df)
load(cleaned_df, "dubai_real_estate_transactions")



spark.stop()

In [32]:
output_csv_path = r"C:\Users\Asad\Documents\Propwise Challenge\cleaned_transactions_output"
cleaned_df.write.option("header", True).mode("overwrite").csv(output_csv_path)



Py4JJavaError: An error occurred while calling o2358.csv.
: java.lang.IllegalStateException: Cannot call methods on a stopped SparkContext.
This stopped SparkContext was created at:

org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:59)
java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:500)
java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:481)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
py4j.Gateway.invoke(Gateway.java:238)
py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
py4j.ClientServerConnection.run(ClientServerConnection.java:108)
java.base/java.lang.Thread.run(Thread.java:840)

And it was stopped at:

org.apache.spark.api.java.JavaSparkContext.stop(JavaSparkContext.scala:552)
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.base/java.lang.reflect.Method.invoke(Method.java:569)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
py4j.Gateway.invoke(Gateway.java:282)
py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
py4j.commands.CallCommand.execute(CallCommand.java:79)
py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
py4j.ClientServerConnection.run(ClientServerConnection.java:108)
java.base/java.lang.Thread.run(Thread.java:840)

The currently active SparkContext was created at:

(No active SparkContext.)
         
	at org.apache.spark.SparkContext.assertNotStopped(SparkContext.scala:128)
	at org.apache.spark.SparkContext.defaultParallelism(SparkContext.scala:2872)
	at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.$anonfun$apply$1(CoalesceShufflePartitions.scala:63)
	at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.scala:17)
	at scala.Option.getOrElse(Option.scala:201)
	at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.apply(CoalesceShufflePartitions.scala:60)
	at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.apply(CoalesceShufflePartitions.scala:34)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$optimizeQueryStage$2(AdaptiveSparkPlanExec.scala:174)
	at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
	at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
	at scala.collection.immutable.List.foldLeft(List.scala:79)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.optimizeQueryStage(AdaptiveSparkPlanExec.scala:173)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.newQueryStage(AdaptiveSparkPlanExec.scala:668)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:603)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
	at scala.collection.immutable.Vector1.map(Vector.scala:2141)
	at scala.collection.immutable.Vector1.map(Vector.scala:386)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
	at scala.collection.immutable.Vector1.map(Vector.scala:2141)
	at scala.collection.immutable.Vector1.map(Vector.scala:386)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
	at scala.collection.immutable.Vector1.map(Vector.scala:2141)
	at scala.collection.immutable.Vector1.map(Vector.scala:386)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
	at scala.collection.immutable.Vector1.map(Vector.scala:2141)
	at scala.collection.immutable.Vector1.map(Vector.scala:386)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
	at scala.collection.immutable.Vector1.map(Vector.scala:2141)
	at scala.collection.immutable.Vector1.map(Vector.scala:386)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:558)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$withFinalPlanUpdate$1(AdaptiveSparkPlanExec.scala:284)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.withFinalPlanUpdate(AdaptiveSparkPlanExec.scala:279)
	at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.executeCollect(AdaptiveSparkPlanExec.scala:402)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$2(QueryExecution.scala:155)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:162)
	at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:268)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:124)
	at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
	at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
	at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:106)
	at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:124)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:291)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:123)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:77)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:233)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:155)
	at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
	at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$eagerlyExecute$1(QueryExecution.scala:154)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:169)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:164)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:360)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:356)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:164)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyCommandExecuted$1(QueryExecution.scala:126)
	at scala.util.Try$.apply(Try.scala:217)
	at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
	at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1439)
	at org.apache.spark.util.LazyTry.get(LazyTry.scala:58)
	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:131)
	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:192)
	at org.apache.spark.sql.classic.DataFrameWriter.runCommand(DataFrameWriter.scala:622)
	at org.apache.spark.sql.classic.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
	at org.apache.spark.sql.classic.DataFrameWriter.saveInternal(DataFrameWriter.scala:241)
	at org.apache.spark.sql.classic.DataFrameWriter.save(DataFrameWriter.scala:118)
	at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:426)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:569)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:184)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:108)
	at java.base/java.lang.Thread.run(Thread.java:840)
	Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
		at org.apache.spark.SparkContext.assertNotStopped(SparkContext.scala:128)
		at org.apache.spark.SparkContext.defaultParallelism(SparkContext.scala:2872)
		at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.$anonfun$apply$1(CoalesceShufflePartitions.scala:63)
		at scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.scala:17)
		at scala.Option.getOrElse(Option.scala:201)
		at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.apply(CoalesceShufflePartitions.scala:60)
		at org.apache.spark.sql.execution.adaptive.CoalesceShufflePartitions.apply(CoalesceShufflePartitions.scala:34)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$optimizeQueryStage$2(AdaptiveSparkPlanExec.scala:174)
		at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
		at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
		at scala.collection.immutable.List.foldLeft(List.scala:79)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.optimizeQueryStage(AdaptiveSparkPlanExec.scala:173)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.newQueryStage(AdaptiveSparkPlanExec.scala:668)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:603)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
		at scala.collection.immutable.Vector1.map(Vector.scala:2141)
		at scala.collection.immutable.Vector1.map(Vector.scala:386)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
		at scala.collection.immutable.Vector1.map(Vector.scala:2141)
		at scala.collection.immutable.Vector1.map(Vector.scala:386)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
		at scala.collection.immutable.Vector1.map(Vector.scala:2141)
		at scala.collection.immutable.Vector1.map(Vector.scala:386)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
		at scala.collection.immutable.Vector1.map(Vector.scala:2141)
		at scala.collection.immutable.Vector1.map(Vector.scala:386)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$createNonResultQueryStages$2(AdaptiveSparkPlanExec.scala:643)
		at scala.collection.immutable.Vector1.map(Vector.scala:2141)
		at scala.collection.immutable.Vector1.map(Vector.scala:386)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createNonResultQueryStages(AdaptiveSparkPlanExec.scala:643)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.createQueryStages(AdaptiveSparkPlanExec.scala:558)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$withFinalPlanUpdate$1(AdaptiveSparkPlanExec.scala:284)
		at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
		at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.withFinalPlanUpdate(AdaptiveSparkPlanExec.scala:279)
		at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.executeCollect(AdaptiveSparkPlanExec.scala:402)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$2(QueryExecution.scala:155)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$8(SQLExecution.scala:162)
		at org.apache.spark.sql.execution.SQLExecution$.withSessionTagsApplied(SQLExecution.scala:268)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$7(SQLExecution.scala:124)
		at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
		at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:112)
		at org.apache.spark.sql.artifact.ArtifactManager.withClassLoaderIfNeeded(ArtifactManager.scala:106)
		at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:111)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:124)
		at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:291)
		at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:123)
		at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:804)
		at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:77)
		at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:233)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:155)
		at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:654)
		at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$eagerlyExecute$1(QueryExecution.scala:154)
		at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:169)
		at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$3.applyOrElse(QueryExecution.scala:164)
		at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
		at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:86)
		at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:360)
		at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:356)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
		at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
		at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:164)
		at org.apache.spark.sql.execution.QueryExecution.$anonfun$lazyCommandExecuted$1(QueryExecution.scala:126)
		at scala.util.Try$.apply(Try.scala:217)
		at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1378)
		at org.apache.spark.util.LazyTry.tryT$lzycompute(LazyTry.scala:46)
		at org.apache.spark.util.LazyTry.tryT(LazyTry.scala:46)
		... 20 more
