In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window

In [0]:
%sql
select * from sales.customer_loyalty_hist_details order by row_id limit 5

row_id,customer_id,customer_name,region,province,customer_since,total_years,total_orders_count,loyalty_points,loyalty_type,start_date,end_date
1,001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01,Nick Crebassa,Prarie,Saskachewan,2009,15,432,650,Classic,2024-05-26,9999-12-31
2,00fcf5c6646b01ce72417b854ad2ebcff2a73c4beec096c90dd9d1e60a7fd1e9,Barry Pond,Yukon,Yukon,2010,14,227,350,Classic,2024-05-26,9999-12-31
3,0105946ed08fb4ccf5f3efcec88a83121bb5804c698b14f2ad20cf7dd2541f7f,Giulietta Weimer,West,British Columbia,2009,15,492,800,Classic,2024-05-26,9999-12-31
4,018fc79fefdcb46768279300fb6e32f4e1e9f627ae99e3a672244d3052462e9b,Ashley Jarboe,Yukon,Yukon,2009,15,495,950,Classic,2024-05-26,9999-12-31
5,01a041b0a5fbe79fb524be166ed14fc9f58054716ce63e9db05a67d35f767527,Christy Brittain,Yukon,Yukon,2009,15,604,1500,Daimond,2024-05-26,9999-12-31


In [0]:
spark.sql('''select * from sales.customer_loyalty_hist_details order by row_id limit 5''').show(5,truncate=False)

+------+----------------------------------------------------------------+----------------+------+----------------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|customer_id                                                     |customer_name   |region|province        |customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|end_date  |
+------+----------------------------------------------------------------+----------------+------+----------------+--------------+-----------+------------------+--------------+------------+----------+----------+
|1     |001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01|Nick Crebassa   |Prarie|Saskachewan     |2009          |15         |432               |650           |Classic     |2024-05-26|9999-12-31|
|2     |00fcf5c6646b01ce72417b854ad2ebcff2a73c4beec096c90dd9d1e60a7fd1e9|Barry Pond      |Yukon |Yukon           |2010          |14         |227            

In [0]:
max_row_id = spark.sql("SELECT COALESCE(MAX(row_id)) FROM sales.customer_loyalty_hist_details").collect()[0][0]
print("max row id : ",max_row_id)

max row id :  795


In [0]:
schema = StructType([
    StructField("customer_id", StringType(), True), 
    StructField("customer_name", StringType(), True), 
    StructField("region", StringType(), True), 
    StructField("province", StringType(), True), 
    StructField("customer_since", IntegerType(), True), 
    StructField("total_years", IntegerType(), True) ,
    StructField("total_orders_count", IntegerType(), True),
    StructField("loyalty_points", IntegerType(), True) ,
    StructField("loyalty_type", StringType(), True) 
])
new_cust_dtls_data = [
    ("001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01","Nick Crebassa",'Texas','Texas',2009,15,450,1000,"Gold"),
    ("001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02", "Mike Jackson",'North','Yukon',2024,1,1,50,"Classic")
]
# Create DataFrame for new order data
new_cust_dtls_df = spark.createDataFrame(new_cust_dtls_data,schema)

In [0]:
new_cust_dtls_df = new_cust_dtls_df.withColumn("start_date", current_date()+1) \
               .withColumn("end_date", lit("9999-12-31"))

In [0]:
new_cust_dtls_df = new_cust_dtls_df.withColumn("row_id", row_number().over(Window.orderBy("customer_id")) + max_row_id)

In [0]:
new_cust_dtls_df.show(2,truncate=False)

+----------------------------------------------------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+------+
|customer_id                                                     |customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|end_date  |row_id|
+----------------------------------------------------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+------+
|001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01|Nick Crebassa|Texas |Texas   |2009          |15         |450               |1000          |Gold        |2024-05-27|9999-12-31|796   |
|001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02|Mike Jackson |North |Yukon   |2024          |1          |1                 |50            |Classic     |2024-05-27|9999-12-31|797   |


In [0]:
spark.sql('''select * from sales.customer_loyalty_hist_details where
          customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01'
          ''').show(truncate=False)

+------+----------------------------------------------------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|customer_id                                                     |customer_name|region|province   |customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|end_date  |
+------+----------------------------------------------------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|1     |001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01|Nick Crebassa|Prarie|Saskachewan|2009          |15         |432               |650           |Classic     |2024-05-26|9999-12-31|
+------+----------------------------------------------------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+--------

In [0]:
from delta.tables import *

customer_loyalty_details_hist_instance = DeltaTable.forPath(spark, "/FileStore/tables/delta-table-merge/customer_loyalty_hist_details")
print(type(customer_loyalty_details_hist_instance))

<class 'delta.tables.DeltaTable'>


In [0]:
#To update end_date of latest record only as a given customer_id can have multiple rows
cust_latest_df = customer_loyalty_details_hist_instance.toDF().withColumn("row_number", row_number().over(
    Window.partitionBy("customer_id").orderBy(desc("start_date"))
    )).filter("row_number = 1").drop("row_number")

In [0]:
cust_latest_df.write.format("delta").mode("overwrite").save('/FileStore/tables/delta-table-merge/cust_loyalty_hist_latest_dtls')
cust_dtls_hist_latest_instance = DeltaTable.forPath(spark, "/FileStore/tables/delta-table-merge/cust_loyalty_hist_latest_dtls")

In [0]:
#Updating end_date of latest record for a given customer
cust_dtls_hist_latest_instance.alias("target").merge(
    new_cust_dtls_df.alias("source"),
    "target.customer_id = source.customer_id"
).whenMatchedUpdate(
    set={"end_date": current_date()+1}
).whenNotMatchedInsertAll().execute()

In [0]:
cust_dtls_hist_latest_instance.toDF().filter("customer_id = '001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01'").show()

+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|   province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|     1|001492ac094ba3c98...|Nick Crebassa|Prarie|Saskachewan|          2009|         15|               432|           650|     Classic|2024-05-26|2024-05-27|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
cust_dtls_hist_latest_instance.toDF().filter("customer_id = '001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02'").show()

+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|   797|001492ac094ba3c98...| Mike Jackson| North|   Yukon|          2024|          1|                 1|            50|     Classic|2024-05-27|9999-12-31|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
# Inserting all the records with latest information about customer
cust_dtls_hist_latest_instance.alias("target").merge(
    new_cust_dtls_df.alias("source"),
"target.customer_id = source.customer_id and target.row_id = source.row_id and target.start_date = source.start_date and target.end_date = source.end_date"
).whenMatchedUpdateAll(). \
whenNotMatchedInsertAll(). \
execute()

In [0]:
spark.sql('''
select * from sales.customer_loyalty_hist_details where customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01'
''').show()

+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|   province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|     1|001492ac094ba3c98...|Nick Crebassa|Prarie|Saskachewan|          2009|         15|               432|           650|     Classic|2024-05-26|9999-12-31|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
cust_dtls_hist_latest_instance.toDF().filter("customer_id = '001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01'").show()

+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|   province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|     1|001492ac094ba3c98...|Nick Crebassa|Prarie|Saskachewan|          2009|         15|               432|           650|     Classic|2024-05-26|2024-05-27|
|   796|001492ac094ba3c98...|Nick Crebassa| Texas|      Texas|          2009|         15|               450|          1000|        Gold|2024-05-27|9999-12-31|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
cust_dtls_hist_latest_instance.toDF().filter("customer_id = '001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02'").show()

+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|   797|001492ac094ba3c98...| Mike Jackson| North|   Yukon|          2024|          1|                 1|            50|     Classic|2024-05-27|9999-12-31|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
customer_loyalty_details_hist_instance.alias("target").merge(
    cust_dtls_hist_latest_instance.toDF().alias("source"),
    "target.customer_id = source.customer_id and target.row_id = source.row_id"
).whenMatchedUpdateAll(). \
whenNotMatchedInsertAll(). \
execute()

In [0]:
spark.sql('''
select * from sales.customer_loyalty_hist_details where customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01'
''').show()

+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|   province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|     1|001492ac094ba3c98...|Nick Crebassa|Prarie|Saskachewan|          2009|         15|               432|           650|     Classic|2024-05-26|2024-05-27|
|   796|001492ac094ba3c98...|Nick Crebassa| Texas|      Texas|          2009|         15|               450|          1000|        Gold|2024-05-27|9999-12-31|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
spark.sql('''
select * from sales.customer_loyalty_hist_details where customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02'
''').show()

+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|   797|001492ac094ba3c98...| Mike Jackson| North|   Yukon|          2024|          1|                 1|            50|     Classic|2024-05-27|9999-12-31|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
max_row_id = spark.sql("SELECT COALESCE(MAX(row_id)) FROM sales.customer_loyalty_hist_details").collect()[0][0]
print("max row id : ",max_row_id)

max row id :  797


In [0]:
schema = StructType([
    StructField("customer_id", StringType(), True), 
    StructField("customer_name", StringType(), True), 
    StructField("region", StringType(), True), 
    StructField("province", StringType(), True), 
    StructField("customer_since", IntegerType(), True), 
    StructField("total_years", IntegerType(), True) ,
    StructField("total_orders_count", IntegerType(), True),
    StructField("loyalty_points", IntegerType(), True) ,
    StructField("loyalty_type", StringType(), True) 
])
new_cust_dtls_data = [
    ("001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01","Nick Crebassa",'Yukon','Texas',2009,15,460,1000,"Gold"),
    ("001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02", "Mike Jackson",'Andra','Yukon',2024,1,4,100,"Classic"),
    ("001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa03", "Nick Jones",'Kingdom','Kingdom',2024,1,1,50,"Classic")
]
# Create DataFrame for new order data
new_cust_dtls_df = spark.createDataFrame(new_cust_dtls_data,schema)

In [0]:
new_cust_dtls_df = new_cust_dtls_df.withColumn("start_date", current_date()+2) \
               .withColumn("end_date", lit("9999-12-31")) \
               .withColumn("row_id", row_number().over(Window.orderBy("customer_id")) + max_row_id)


In [0]:
new_cust_dtls_df.show(2,truncate=False)

+----------------------------------------------------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+------+
|customer_id                                                     |customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|end_date  |row_id|
+----------------------------------------------------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+------+
|001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01|Nick Crebassa|Yukon |Texas   |2009          |15         |460               |1000          |Gold        |2024-05-28|9999-12-31|798   |
|001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02|Mike Jackson |Andra |Yukon   |2024          |1          |4                 |100           |Classic     |2024-05-28|9999-12-31|799   |


In [0]:
cust_latest_df = customer_loyalty_details_hist_instance.toDF().withColumn("row_number", row_number().over(
    Window.partitionBy("customer_id").orderBy(desc("start_date"))
    )).filter("row_number = 1").drop("row_number")
cust_latest_df.write.format("delta").mode("overwrite").save('/FileStore/tables/delta-table-merge/cust_loyalty_hist_latest_dtls')
cust_dtls_hist_latest_instance = DeltaTable.forPath(spark, "/FileStore/tables/delta-table-merge/cust_loyalty_hist_latest_dtls")

In [0]:
#Updating end_date of latest record for a given customer
cust_dtls_hist_latest_instance.alias("target").merge(
    new_cust_dtls_df.alias("source"),
    "target.customer_id = source.customer_id"
).whenMatchedUpdate(
    set={"end_date": current_date()+2}
).whenNotMatchedInsertAll().execute()



In [0]:
# Inserting all the records with latest information about customer
cust_dtls_hist_latest_instance.alias("target").merge(
    new_cust_dtls_df.alias("source"),
"target.customer_id = source.customer_id and target.row_id = source.row_id and target.start_date = source.start_date and target.end_date = source.end_date"
).whenMatchedUpdateAll(). \
whenNotMatchedInsertAll(). \
execute()

In [0]:
cust_dtls_hist_latest_instance.toDF().filter("customer_id in ('001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01','001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02')").orderBy("row_id").show()

+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|   796|001492ac094ba3c98...|Nick Crebassa| Texas|   Texas|          2009|         15|               450|          1000|        Gold|2024-05-27|2024-05-28|
|   797|001492ac094ba3c98...| Mike Jackson| North|   Yukon|          2024|          1|                 1|            50|     Classic|2024-05-27|2024-05-28|
|   798|001492ac094ba3c98...|Nick Crebassa| Yukon|   Texas|          2009|         15|               460|          1000|        Gold|2024-05-28|9999-12-31|
|   799|001492ac094ba3c98...| Mike Jackson| Andra|   Yukon|     

In [0]:
customer_loyalty_details_hist_instance.alias("target").merge(
    cust_dtls_hist_latest_instance.toDF().alias("source"),
    "target.customer_id = source.customer_id and target.row_id = source.row_id"
).whenMatchedUpdateAll(). \
whenNotMatchedInsertAll(). \
execute()

In [0]:
spark.sql('''
select * from sales.customer_loyalty_hist_details where customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01' order by row_id
''').show()

+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|   province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|     1|001492ac094ba3c98...|Nick Crebassa|Prarie|Saskachewan|          2009|         15|               432|           650|     Classic|2024-05-26|2024-05-27|
|   796|001492ac094ba3c98...|Nick Crebassa| Texas|      Texas|          2009|         15|               450|          1000|        Gold|2024-05-27|2024-05-28|
|   798|001492ac094ba3c98...|Nick Crebassa| Yukon|      Texas|          2009|         15|               460|          1000|        Gold|2024-05-28|9999-12-31|
+------+--------------------+-------------+---

In [0]:
spark.sql('''
select * from sales.customer_loyalty_hist_details where customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02' order by row_id
''').show()

+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|   797|001492ac094ba3c98...| Mike Jackson| North|   Yukon|          2024|          1|                 1|            50|     Classic|2024-05-27|2024-05-28|
|   799|001492ac094ba3c98...| Mike Jackson| Andra|   Yukon|          2024|          1|                 4|           100|     Classic|2024-05-28|9999-12-31|
+------+--------------------+-------------+------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
spark.sql('''
select * from sales.customer_loyalty_hist_details where customer_id='001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa03'
''').show()

+------+--------------------+-------------+-------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name| region|province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+-------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+
|   800|001492ac094ba3c98...|   Nick Jones|Kingdom| Kingdom|          2024|          1|                 1|            50|     Classic|2024-05-28|9999-12-31|
+------+--------------------+-------------+-------+--------+--------------+-----------+------------------+--------------+------------+----------+----------+



In [0]:
for customer_id in ['001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa01', '001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa02','001492ac094ba3c986a45e7799a0409db64ed3c856e39cf16854012aa468fa03']:
    spark.sql(f"SELECT * FROM sales.customer_loyalty_hist_details WHERE customer_id='{customer_id}' ORDER BY row_id").show()

+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|row_id|         customer_id|customer_name|region|   province|customer_since|total_years|total_orders_count|loyalty_points|loyalty_type|start_date|  end_date|
+------+--------------------+-------------+------+-----------+--------------+-----------+------------------+--------------+------------+----------+----------+
|     1|001492ac094ba3c98...|Nick Crebassa|Prarie|Saskachewan|          2009|         15|               432|           650|     Classic|2024-05-26|2024-05-27|
|   796|001492ac094ba3c98...|Nick Crebassa| Texas|      Texas|          2009|         15|               450|          1000|        Gold|2024-05-27|2024-05-28|
|   798|001492ac094ba3c98...|Nick Crebassa| Yukon|      Texas|          2009|         15|               460|          1000|        Gold|2024-05-28|9999-12-31|
+------+--------------------+-------------+---

In [0]:
spark.sql('''
select count(*) from sales.customer_loyalty_hist_details
''').show()

+--------+
|count(1)|
+--------+
|     800|
+--------+

