In [None]:
# IMPORTANT: This notebook manipulates sample data to guarantee that the Power BI report includes data for the current date, the last two days, and the last seven days. 
# It is OPTIONAL and is only used to ensure the Power BI report can display data during each deployment.

In [None]:
%%sql
--# RUN TO MOVE THE DATES FORWARD TO TODAY
UPDATE ckm_conv_processed
SET StartTime = DATEADD(day, (SELECT DATEDIFF(NOW(), MAX(ConversationDate)) FROM ckm_conv_processed), StartTime),
EndTime = DATEADD(day, (SELECT DATEDIFF(NOW(), MAX(ConversationDate)) FROM ckm_conv_processed), EndTime),
ConversationDate = DATEADD(day, (SELECT DATEDIFF(NOW(), MAX(ConversationDate)) FROM ckm_conv_processed), ConversationDate)

In [None]:
# This code manipulates sample data that allocates a percentage of the data
# across a two weeks period to support storytelling and demo

import pandas as pd
from datetime import date, datetime, timedelta
from pyspark.sql.functions import col

df = spark.sql("SELECT * FROM ckm_conv_processed")

# Convert string columns to timestamp types
df = df.withColumn("StartTime", col("StartTime").cast("timestamp"))
df = df.withColumn("EndTime", col("EndTime").cast("timestamp"))
df = df.withColumn("ConversationDate", col("ConversationDate").cast("timestamp"))

dfp = df.toPandas()
dfp = dfp.sample(frac=1) # This line randomly shuffles the df for a new distribution and demo percentages

# Following list are date weights from Today-0 to Today-13 (two weeks)
weights = [30, 26, 5, 5, 5, 5, 15, 2, 2, 1, 1, 1, 1, 1]
dfindex = 0  # index loop through all conversations
daysback = 0  # start at today and work backwards
for row in weights:
    numconvos = int((row/100.00) * df.count())
    for i in range(numconvos):
        dfp.at[dfindex, 'StartTime'] = datetime.combine(date.today() - timedelta(days = daysback) , dfp.at[dfindex, 'StartTime'].time())
        dfp.at[dfindex, 'EndTime'] = datetime.combine(date.today() - timedelta(days = daysback) , dfp.at[dfindex, 'EndTime'].time())
        dfp.at[dfindex, 'ConversationDate'] = datetime.combine(date.today() - timedelta(days = daysback) , dfp.at[dfindex, 'ConversationDate'].time())
        dfindex += 1
    daysback += 1
df = spark.createDataFrame(dfp)

# Write to temp table, then update final results table
df.write.format('delta').mode('overwrite').saveAsTable('ckm_conv_processed_temp')
df = spark.sql("SELECT * FROM ckm_conv_processed_temp ")
df.write.format('delta').mode('overwrite').option("overwriteSchema", "false").saveAsTable('ckm_conv_processed')