In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window

from pyspark.sql.types import *

# Define schema
schema = StructType([
    StructField("stock_id", StringType(), True),
    StructField("date", StringType(), True),
    StructField("price", IntegerType(), True)
])

# Sample data
data = [
    ("A", "2024-01-01", 100),
    ("A", "2024-01-02", 105),
    ("A", "2024-01-03", 104),
    ("B", "2024-01-01", 200),
    ("B", "2024-01-02", 200),
    ("B", "2024-01-03", 201)
]

# Create DataFrame
df = spark.createDataFrame(data, schema=schema)

df.display()

stock_id,date,price
A,2024-01-01,100
A,2024-01-02,105
A,2024-01-03,104
B,2024-01-01,200
B,2024-01-02,200
B,2024-01-03,201


In [0]:

# Define window specification
window_spec = Window.partitionBy("stock_id").orderBy("date")

# Use lag function to get the previous day's price
df = df.withColumn("prev_price", lag("price").over(window_spec))

# Calculate price change direction
df = df.withColumn("price_change", 
          when(col("prev_price").isNull(), "NULL")
          .when(col("price") > col("prev_price"), "UP")
          .when(col("price") < col("prev_price"), "DOWN")
          .otherwise("SAME"))

# Select final columns
df_results = df.select("stock_id", "date", "price", "price_change")

# Show results
df_results.display()

stock_id,date,price,price_change
A,2024-01-01,100,
A,2024-01-02,105,UP
A,2024-01-03,104,DOWN
B,2024-01-01,200,
B,2024-01-02,200,SAME
B,2024-01-03,201,UP


In [0]:
df.createOrReplaceTempView("stocks")

# Run Spark SQL query to calculate price change
query = """
SELECT
    stock_id,
    date,
    price,
    CASE
        WHEN LAG(price) OVER(PARTITION BY stock_id ORDER BY date) IS NULL THEN 'NULL'
        WHEN price > LAG(price) OVER(PARTITION BY stock_id ORDER BY date) THEN 'UP'
        WHEN price < LAG(price) OVER(PARTITION BY stock_id ORDER BY date) THEN 'DOWN'
        ELSE 'SAME'
    END AS price_change
FROM stocks
"""

# Execute the query
df_results = spark.sql(query)

# Show results
df_results.display()

stock_id,date,price,price_change
A,2024-01-01,100,
A,2024-01-02,105,UP
A,2024-01-03,104,DOWN
B,2024-01-01,200,
B,2024-01-02,200,SAME
B,2024-01-03,201,UP
