In [1]:
import findspark
findspark.init()

In [12]:
import pyspark
from datetime import datetime
from pyspark.sql import SparkSession
from pyspark.sql.window import Window
from pyspark.sql.functions import * 
from pyspark.sql.types import *

In [None]:
_schema = StructType([
    StructField("hall_id", IntegerType()),
    StructField("start_date", DateType()),
    StructField("end_date", DateType())
])

_data = [
    (1, datetime.strptime('2023-01-13', '%Y-%m-%d').date(), datetime.strptime('2023-01-14', '%Y-%m-%d').date()),
    (1, datetime.strptime('2023-01-14', '%Y-%m-%d').date(), datetime.strptime('2023-01-17', '%Y-%m-%d').date()),
    (1, datetime.strptime('2023-01-15', '%Y-%m-%d').date(), datetime.strptime('2023-01-17', '%Y-%m-%d').date()),
    (1, datetime.strptime('2023-01-18', '%Y-%m-%d').date(), datetime.strptime('2023-01-25', '%Y-%m-%d').date()),
    (2, datetime.strptime('2022-12-09', '%Y-%m-%d').date(), datetime.strptime('2022-12-23', '%Y-%m-%d').date()),
    (2, datetime.strptime('2022-12-13', '%Y-%m-%d').date(), datetime.strptime('2022-12-17', '%Y-%m-%d').date()),
    (3, datetime.strptime('2022-12-01', '%Y-%m-%d').date(), datetime.strptime('2023-01-30', '%Y-%m-%d').date())
]

In [7]:
sc = SparkSession.builder.appName("master").config("master", "local").getOrCreate()

In [8]:
df = sc.createDataFrame(data=_data, schema=_schema)
df.show()

+-------+----------+----------+
|hall_id|start_date|  end_date|
+-------+----------+----------+
|      1|2023-01-13|2023-01-14|
|      1|2023-01-14|2023-01-17|
|      1|2023-01-15|2023-01-17|
|      1|2023-01-18|2023-01-25|
|      2|2022-12-09|2022-12-23|
|      2|2022-12-13|2022-12-17|
|      3|2022-12-01|2023-01-30|
+-------+----------+----------+



In [11]:
mid_df = df.alias("a").join(df.alias("b"), how = "left", on = ((col("a.hall_id") == col("b.hall_id")) & (col("a.end_date") >= col("b.start_date")) & (col("a.start_date") <= col("b.end_date"))))
mid_df.show()

+-------+----------+----------+-------+----------+----------+
|hall_id|start_date|  end_date|hall_id|start_date|  end_date|
+-------+----------+----------+-------+----------+----------+
|      1|2023-01-13|2023-01-14|      1|2023-01-14|2023-01-17|
|      1|2023-01-13|2023-01-14|      1|2023-01-13|2023-01-14|
|      1|2023-01-14|2023-01-17|      1|2023-01-15|2023-01-17|
|      1|2023-01-14|2023-01-17|      1|2023-01-14|2023-01-17|
|      1|2023-01-14|2023-01-17|      1|2023-01-13|2023-01-14|
|      1|2023-01-15|2023-01-17|      1|2023-01-15|2023-01-17|
|      1|2023-01-15|2023-01-17|      1|2023-01-14|2023-01-17|
|      1|2023-01-18|2023-01-25|      1|2023-01-18|2023-01-25|
|      2|2022-12-09|2022-12-23|      2|2022-12-13|2022-12-17|
|      2|2022-12-09|2022-12-23|      2|2022-12-09|2022-12-23|
|      2|2022-12-13|2022-12-17|      2|2022-12-13|2022-12-17|
|      2|2022-12-13|2022-12-17|      2|2022-12-09|2022-12-23|
|      3|2022-12-01|2023-01-30|      3|2022-12-01|2023-01-30|
+-------

In [21]:
mid_df = df.withColumn("lagged", lag(col("end_date"), default=datetime.strptime('1990-12-01', '%Y-%m-%d').date()).over(Window.partitionBy("hall_id").orderBy(col("start_date").asc(), col("end_date").asc())))
mid_df.show()

+-------+----------+----------+----------+
|hall_id|start_date|  end_date|    lagged|
+-------+----------+----------+----------+
|      1|2023-01-13|2023-01-14|1990-12-01|
|      1|2023-01-14|2023-01-17|2023-01-14|
|      1|2023-01-15|2023-01-17|2023-01-17|
|      1|2023-01-18|2023-01-25|2023-01-17|
|      2|2022-12-09|2022-12-23|1990-12-01|
|      2|2022-12-13|2022-12-17|2022-12-23|
|      3|2022-12-01|2023-01-30|1990-12-01|
+-------+----------+----------+----------+



In [24]:
mid_df1 = mid_df.withColumn("sum", sum(when(col("start_date") <= col("lagged"), 0).otherwise(1)).over(Window.partitionBy("hall_id").orderBy(col("start_date").asc(), col("end_date").asc())))
mid_df1.show()

+-------+----------+----------+----------+---+
|hall_id|start_date|  end_date|    lagged|sum|
+-------+----------+----------+----------+---+
|      1|2023-01-13|2023-01-14|1990-12-01|  1|
|      1|2023-01-14|2023-01-17|2023-01-14|  1|
|      1|2023-01-15|2023-01-17|2023-01-17|  1|
|      1|2023-01-18|2023-01-25|2023-01-17|  2|
|      2|2022-12-09|2022-12-23|1990-12-01|  1|
|      2|2022-12-13|2022-12-17|2022-12-23|  1|
|      3|2022-12-01|2023-01-30|1990-12-01|  1|
+-------+----------+----------+----------+---+



In [25]:
res_df = mid_df1.groupBy(col("hall_id"), col("sum")).agg(min(col("start_date")).alias("start_date"), max(col("end_date")).alias("end_date")).select(col("hall_id"), col("start_date"), col("end_date"))
res_df.show()

+-------+----------+----------+
|hall_id|start_date|  end_date|
+-------+----------+----------+
|      1|2023-01-13|2023-01-17|
|      1|2023-01-18|2023-01-25|
|      2|2022-12-09|2022-12-23|
|      3|2022-12-01|2023-01-30|
+-------+----------+----------+



In [None]:
from typing import List


# lst = [1,2] #2
# lst = [1,2,3] #4
# lst = [1,2,3,4] #8

def generatesusbsets(res: List, idx: int):
    if idx < 0:
        print(res)
        return

    generatesusbsets(res, idx-1)
    res.append(lst[idx])
    generatesusbsets(res, idx-1)
    res.pop()


generatesusbsets([], len(lst)-1)

[]
[1]
[2]
[2, 1]


In [11]:
print(2**(1-1))
print(2**(2-1))
print(2**(3-1))
print(2**(4-1))

1
2
4
8
