In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import IntegerType,DateType,StructField,StructType
from pyspark.sql.functions import col,lag
from pyspark.sql.window import Window
from datetime import datetime

spark = SparkSession.builder.master("local[*]").appName("app").getOrCreate()

In [0]:
schema = StructType([StructField("id", IntegerType(),True),
                     StructField("recordDate",DateType(),True),
                     StructField("temp",IntegerType(),True)] )

data = [
    (1  , datetime(2015,1,1) , 10),
    (2  , datetime(2015,1,2) , 25),
    (3  , datetime(2015,1,3) , 20),
    (4  , datetime(2015,1,4) , 30)
]

weather = spark.createDataFrame(data,schema)
weather.show()

+---+----------+----+
| id|recordDate|temp|
+---+----------+----+
|  1|2015-01-01|  10|
|  2|2015-01-02|  25|
|  3|2015-01-03|  20|
|  4|2015-01-04|  30|
+---+----------+----+



In [0]:
# Write a solution to find all dates' Id with higher temperatures compared to its previous dates (yesterday). Return the result table in any order.
window_spec = Window.orderBy("recordDate")
weather.withColumn("last_day_temp",lag("temp").over(window=window_spec)).where(col("temp")>col("last_day_temp")).select("id").show()

+---+
| id|
+---+
|  2|
|  4|
+---+



In [0]:
weather.createOrReplaceTempView("weather")
spark.sql("with cte as (select *,lag(temp) over(order by recordDate) as last_day_temp from weather) select id from cte where temp>last_day_temp").show()

+---+
| id|
+---+
|  2|
|  4|
+---+



In [0]:
spark.stop()