In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,IntegerType
from pyspark.sql.functions import col,lag
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("app").master("local[3]").getOrCreate()

In [0]:
schema = StructType([
    StructField("id",IntegerType(),False),
    StructField("num",IntegerType(),False)
])
data = [
( 1   , 1 )  ,
( 2   , 1 )  ,
( 3   , 1 )  ,
( 4   , 2 )  ,
( 5   , 1 )  ,
( 6   , 2 )  ,
( 7   , 2 )  ,
( 8   , 1 )  ,
( 9   , 1 )  ,
( 10  , 1 )  ,
( 11  , 1 )  ,
( 12  , 1 )  
]
logs = spark.createDataFrame(data,schema)
logs.show()

+---+---+
| id|num|
+---+---+
|  1|  1|
|  2|  1|
|  3|  1|
|  4|  2|
|  5|  1|
|  6|  2|
|  7|  2|
|  8|  1|
|  9|  1|
| 10|  1|
| 11|  1|
| 12|  1|
+---+---+



In [0]:
# Find all numbers that appear at least three times consecutively. Return the result table in any order.
window_spec = Window.orderBy("id")
logs.select("num",lag("num",1).over(window_spec).alias("last_1"), lag("num",2).over(window_spec).alias("last_2")).filter((col("num")==col("last_1"))&(col("num")==col("last_2"))).select("num").distinct().show()

+---+
|num|
+---+
|  1|
+---+



In [0]:
logs.createOrReplaceTempView("logs")
spark.sql("""with cte as (select num, lead(num,1) over(order by id) next1, lag(num,1) over(order by id) last1 from logs)
          select distinct num from cte where num=last1 and num=next1""").show()

+---+
|num|
+---+
|  1|
+---+



In [0]:
spark.stop()