In [None]:
!pip install pandas
!pip install pyspark
!pip install pyarrow

In [None]:
from pyspark.sql import SparkSession
 
# Building the SparkSession and name 
# it :'pandas to spark'
spark = SparkSession.builder.appName(
  "pandas to spark").getOrCreate()

spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")

In [66]:
import pandas as pd

data = [[1, '2015-01-01', 10], [2, '2015-01-02', 25], [3, '2015-01-03', 20], [4, '2015-01-04', 30]]
weather = pd.DataFrame(data, columns=['id', 'recordDate', 'temperature']).astype({'id':'Int64', 'recordDate':'datetime64[ns]', 'temperature':'Int64'})

In [67]:
weather = spark.createDataFrame(weather)
weather.show()

+---+-------------------+-----------+
| id|         recordDate|temperature|
+---+-------------------+-----------+
|  1|2015-01-01 00:00:00|         10|
|  2|2015-01-02 00:00:00|         25|
|  3|2015-01-03 00:00:00|         20|
|  4|2015-01-04 00:00:00|         30|
+---+-------------------+-----------+



In [68]:
from pyspark.sql.functions import lag, lit
from pyspark.sql import Window

weather \
.withColumn('constant', lit('abc')) \
.withColumn('previous_temperature', lag('temperature', 1) \
            .over(Window.partitionBy("constant").orderBy("constant"))) \
.withColumn('previous_recordDate', lag('recordDate', 1) \
            .over(Window.partitionBy("constant").orderBy("constant"))) \
.where("""temperature > previous_temperature 
          and datediff(recordDate, previous_recordDate) == 1""") \
.select('id') \
.show()

+---+
| id|
+---+
|  2|
|  4|
+---+

