In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import IntegerType
from pyspark.sql import Row

# Initialize a Spark session
spark = SparkSession.builder.appName("Create Table").getOrCreate()

# Step 1: Create the DataFrame
data = [Row(n=1), Row(n=2), Row(n=3), Row(n=4), Row(n=5)]
df = spark.createDataFrame(data)

# Step 2: Save the DataFrame as a table
df.createOrReplaceTempView("numbers")

# Insert new values (9) into the DataFrame
new_data = [Row(n=9)]
new_df = spark.createDataFrame(new_data)

# Combine the original DataFrame with the new DataFrame
combined_df = df.union(new_df)

In [0]:
# Overwrite the existing table with the combined data
combined_df.createOrReplaceTempView("numbers")

# Now you can run SQL queries on the "numbers" table
spark.sql("SELECT * FROM numbers").display()

n
1
2
3
4
5
9


In [0]:
from pyspark.sql.functions import col

# Step 1: Create a DataFrame to hold the results
result_df = combined_df.select("n").withColumn("num_count", col("n"))

# Step 2: Use a loop to simulate recursion and generate the sequence
max_n = combined_df.agg({"n": "max"}).collect()[0][0]  # Get the maximum value of n
for i in range(2, max_n + 1):  # Loop through from 2 to max value of n
    result_df = result_df.union(
        combined_df.filter(col("n") >= i).withColumn("num_count", col("n") - (i - 1))
    )

# Step 3: Select only the n column and order the results
final_df = result_df.select("n").orderBy("n")

# Show the result
final_df.display()

n
1
2
2
3
3
3
4
4
4
4
