In [None]:
%pip install dlt

#Campaign Analytics<br/>

1. **Usecase               :** Performing Campaign analytics on static campaign data coming from OneLake Bronze Layer.<br/>
2. **Notebook Summary      :** This notebook is a part of campaign analytics application which perform `campaign analytics using various pyspark capability`.<br/>
3. **Notebook Description  :** Performing Campaign Analytics on OneLake Bronze Layer Files.


###Feature List
1. Data Profiling
2. Total Revenue, Total Revenue Target & Profit 
3. Campaign Run by Per Week 
4. Total Profit by Country Per Week
5. Top Loss-Making Campaign 

The bronze data received for processing is already curated. So, we will derive gold tables from bronze tables.

### Import Libraries

In [None]:

import dlt
from pyspark.sql.functions import struct
from pyspark.sql.functions import col
from pyspark.sql.types import DateType
from pyspark.sql.types import *
import pyspark.sql.functions as f
from pyspark.sql.functions import udf
from pyspark.sql.functions import *
from delta.tables import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, DateType, TimestampType, DecimalType
from pyspark.sql.functions import sum as _sum
from pyspark.sql.functions import mean as _mean
from pyspark.sql.functions import max as _max
from pyspark.sql.functions import min as _min
import pyspark.sql.functions as func
import pyspark.sql.functions as F
from pyspark.sql.functions import *
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType, DateType

###Define the Schema for the input file

In [None]:
campaignSchema = StructType([    
    StructField("Region",StringType(),True),
    StructField("Country",StringType(),True),
    StructField("ProductCategory",StringType(),True),
    StructField("Campaign_ID",IntegerType(),True),    
    StructField("Campaign_Name",StringType(),True),
    StructField("Qualification",StringType(),True),
    StructField("Qualification_Number",StringType(),True),
    StructField("Response_Status",StringType(),True),
    StructField("Responses",FloatType(),True),
    StructField("Cost",FloatType(),True),
    StructField("Revenue",FloatType(),True),
    StructField("ROI",FloatType(),True),
    StructField("Lead_Generation",StringType(),True),
    StructField("Revenue_Target",FloatType(),True),
    StructField("Campaign_Tactic",StringType(),True),
    StructField("Customer_Segment",StringType(),True),
    StructField("Status",StringType(),True),
    StructField("Profit",FloatType(),True),
    StructField("Marketing_Cost",FloatType(),True),
    StructField("CampaignID",IntegerType(),True),
    StructField("CampDate",DateType(),True), 
    StructField("SORTED_ID",IntegerType(),True)])
    

### Load the Campaign Dataset from OneLake Bronze Layer

In [None]:
# Bronze Table Setup
@dlt.table(comment="Raw data", 
           path = "/mnt/OneLake/Tables/Tables/dlt_bronze_campaign_data")
def bronze_campaign_data():
#   return (spark.table("campaign.campaign_source"))
  return (spark.read.format("csv").option("header",True).schema(campaignSchema).load("/mnt/datasource-onelake-bronze/data/CampaignData/campaign-data.csv"))

### Total Revenue, Total Revenue Target & Profit

In [None]:
# Gold Table Setup
@dlt.table(comment="Raw data", 
           path = "/mnt/OneLake_Gold/Tables/Tables/dlt_gold_country_wise_revenue")
def gold_country_wise_revenue():
    
    df = dlt.read("bronze_campaign_data").groupBy("Country","Region").agg(_sum("Revenue").alias("Total_Revenue"), _sum("Revenue_Target").alias("Total_Revenue_Target"),_sum("Profit").alias("Total_Profit"),_max("Cost").alias("Max_Cost"),_min("Cost").alias("Min_Cost"))
    df = df.withColumn("Total_Revenue", func.round(df["Total_Revenue"],2)).withColumn("Total_Revenue_Target", func.round(df["Total_Revenue_Target"], 2)).withColumn("Total_Profit", func.round(df["Total_Profit"], 2))
    return df
    

### Campaign Run by Per Week

In [None]:
# Gold Table Setup
@dlt.table(comment="Raw data",
           path = "/mnt/OneLake_Gold/Tables/Tables/dlt_gold_campaign_per_week")
def gold_campaign_per_week():
    return dlt.read("bronze_campaign_data") \
    .groupBy(
      "Campaign_Name",
      window("CampDate", "1 week")) \
    .count().orderBy(col("count").desc())

### Total Profit by Country Per Week

In [None]:
# Gold Table Setup
@dlt.table(comment="Raw data",
           path = "/mnt/OneLake_Gold/Tables/Tables/dlt_gold_Total_Profit_by_Country_Per_Week")
def gold_Total_Profit_by_Country_Per_Week():
    return dlt.read("bronze_campaign_data").select("Region","Country", "Cost", "Profit","CampDate") \
                     .groupBy(window(col("CampDate"), "7 days"), col("Country")) \
                     .agg(sum("Profit").alias('Total_Profit'),) \
                            .orderBy(col("window.start"))

### Top Loss-Making Campaign

In [None]:
# Gold Table Setup
@dlt.table(comment="Raw data",
           path = "/mnt/OneLake_Gold/Tables/Tables/dlt_gold_top_loss_making_campaign")
def gold_Top_Loss_Making_Campaign():
    loss = dlt.read("bronze_campaign_data").select("Campaign_Name","Profit").filter(F.col("Profit") < 0)
    loss = loss.withColumn("Loss_Count", F.when((F.col('Profit') < 0 ) , F.lit(1)).otherwise(F.lit(0)))
#     loss = loss.groupBy('Campaign_Name').sum('Loss_Count')
    return loss

#  Retail Sales Data Preparation using Spark

Preparing retail data for training a regression model to predict total sales revenue of a product from a store using the following features: 
- Brand (The brand of the product)
- Quantity (Quantity of product purchased)
- Advert (Whether the product had an advertisement or not)
- Price (How much the product costs)

## Importing Libraries

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import percent_rank
from pyspark.sql import Window
from io import BytesIO
from copy import deepcopy
from datetime import datetime
from dateutil import parser
import logging
from pyspark.sql.types import *

## Defining the schema for the data

In [None]:
Dataschema = StructType([
    StructField("ID", StringType()),
    StructField("WeekStarting", DateType()),
    StructField("Store", IntegerType()),
    StructField("Brand", StringType()),
    StructField("Quantity", IntegerType()),
    StructField("Advert", IntegerType()),
    StructField("Price", FloatType()),
    StructField("Revenue", FloatType())
])


## Load the data from the source and perform the transformations

In [None]:
@dlt.table(comment="Raw data",
           path = "/mnt/OneLake/Tables/Tables/dlt_bronze_SalesTrans" )
           
def bronze_SalesTrans():
  return (spark.read.csv('/mnt/datasource-onelake-bronze/data/StoreTransactionsData/SalesTransData.txt',schema=Dataschema))

In [None]:
@dlt.table(comment="Silver data",
           path = "/mnt/OneLake_Silver/Tables/Tables/dlt_silver_rank_data")
def silver_rank_data():
    pydf = dlt.read('bronze_SalesTrans').withColumn("rank", percent_rank().over(Window.partitionBy().orderBy("WeekStarting")))
    return pydf

In [None]:
@dlt.table(comment="Silver data",
           path = "/mnt/OneLake_Silver/Tables/Tables/dlt_silver_train")
def gold_train():
    train = dlt.read('silver_rank_data').where("rank <= .8").drop("rank")
    return train
    
@dlt.table(comment="Silver data",
           path = "/mnt/OneLake_Silver/Tables/Tables/dlt_silver_test")
def gold_test():
    test = dlt.read('silver_rank_data').where("rank > .8").drop("rank")
    return test

#  Customer Churn Data Preparation using Spark

## Load the data from the source and perform the transformations

In [None]:
@dlt.table(comment="Raw data",
           path = "/mnt/OneLake/Tables/Tables/dlt_bronze_CustomerChurnTrans" )
           
def bronze_CustomerChurnTrans():
  return (spark.read.format("csv").option("header","true").load('/mnt/datasource-onelake-bronze/data/CustomerChurnData/CustomerChurnData.csv'))

In [None]:
@dlt.table(comment="Silver data",
           path = "/mnt/OneLake_Silver/Tables/Tables/dlt_silver_customerchurn_data")
def silver_customerchurn_data():
    pydf = dlt.read('bronze_CustomerChurnTrans')
    return pydf

In [None]:
# withColumn("TotalAmount",col("TotalAmount").cast("Double")).withColumn("UnitPrice",col("UnitPrice").cast("Double")).withColumn("tenure",col("tenure").cast("integer"))