In [None]:
ml_catalog = dbutils.widgets.get("ml_catalog")
ml_search_db = dbutils.widgets.get("ml_search_db")

In [None]:
recommended_actions = spark.sql(f"""WITH clicks AS (
    SELECT
        client_id,
        click_object_id,
        click_details_caption,
        TO_UNIX_TIMESTAMP(time_stamp, "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") AS unix_timestamp,
        COUNT(*) AS clicks
    FROM
        {ml_catalog}.{ml_search_db}.ml_search_with_click
    WHERE
        click_object_id IS NOT NULL 
        AND action = 'actions'
    GROUP BY
        client_id,
        click_object_id,
        click_details_caption,
        TO_UNIX_TIMESTAMP(time_stamp, "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
),

clicks_with_max AS (
    SELECT
        *,
        MAX(unix_timestamp) OVER () AS max_timestamp
    FROM clicks
)

SELECT
    client_id,
    click_object_id,
    click_details_caption,
    SUM((1.0 / (1 + ((max_timestamp - unix_timestamp) / (24 * 60 * 60 * 100)))) * clicks) AS weighted_clicks
FROM
    clicks_with_max
GROUP BY
    client_id,
    click_object_id,
    click_details_caption
ORDER BY
    client_id,
    weighted_clicks DESC;
""")

In [None]:
display(recommended_actions)

### Export to Spark Table

In [None]:
spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled","true")
spark.sql(f"DROP TABLE IF EXISTS {ml_catalog}.{ml_search_db}.cold_start_recommended_actions")

(recommended_actions
.write
.format("delta")
.mode("overwrite")
.option("mergeSchema", "true")
.saveAsTable(f"{ml_catalog}.{ml_search_db}.cold_start_recommended_actions"))

In [None]:
secret_scope = dbutils.widgets.get("secret_scope")

if secret_scope.split("-")[0] == "prod":
    dbutils.notebook.exit("Skip run in prod environment")

### Upload to S3

In [None]:
import boto3

boto3_session = boto3.Session(
    botocore_session=dbutils.credentials.getServiceCredentialsProvider(
        'service-cred-nas-lifion_ml-sdq-dit'
    )
)
s3_client = boto3_session.client('s3') 

In [None]:
import pandas as pd
from datetime import datetime

bucket_name = "ml-models-bucket-appbuild-02"
ts = datetime.now()

file = f"cold_start_{ts}.csv"
recommended_actions.toPandas().to_csv(file, index=False)

# Upload file to S3
file_path = f"recommended-actions/{file}"
response = s3_client.put_object(Bucket=bucket_name, Body=open(file, "rb"), Key=file_path)
status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
if status == 200:
    print(f"Successful S3 put_object response. Status - {status}")