## Fetch Marketing Campaigns data into DataFrame and Calculate Revenue Variance


In [33]:
%%pyspark
data_path = spark.read.load('abfss://marketingdb-staging@#DATA_LAKE_NAME#.dfs.core.windows.net/CampaignAnalytics.csv', format='csv'
, header=True 
)
data_path.show(10)

+--------------------+---------+--------------------+--------------------+----------+--------------+
|              Region|  Country|    Product_Category|       Campaign_Name|   Revenue|Revenue_Target|
+--------------------+---------+--------------------+--------------------+----------+--------------+
|North & Central A...|   Canada|               Books|EnjoyTheMoment; B...|$13,873.00|    $10,617.00|
|              Europe|  Germany|Apparel and Footwear|     Fun with Colors|$14,865.00|    $15,960.00|
|       South America|   Brazil|               Books|EnjoyTheMoment; B...|$16,611.00|     $7,917.00|
|        Asia Pacific|      USA|               Books|EnjoyTheMoment; B...|$12,174.00|     $6,996.00|
|              Europe|    Italy|               Books|EnjoyTheMoment; B...| $5,867.00|    $19,049.00|
|North & Central A...|Australia|               Books|EnjoyTheMoment; B...| $9,112.00|    $11,930.00|
|       South America|   Canada|               Books|EnjoyTheMoment; B...|$16,386.00|     $

## Data Transformation - Calculate Revenue Variance


In [34]:
%%pyspark
from pyspark.sql.functions import *
from pyspark.sql.types import *

import numpy as np

pd_df = data_path.select("*").toPandas()
pd_df['Revenue']= pd_df['Revenue'].replace('[\$,]', '', regex=True).astype(float)
pd_df['Revenue_Target']= pd_df['Revenue_Target'].replace('[\$,]', '', regex=True).astype(float)

#Create new column
pd_df['Revenue_Variance'] = pd_df['Revenue_Target'] - pd_df['Revenue']

display(pd_df[1:5])

## Move data to Azure Data Lake Gen2


In [43]:
%%pyspark
df = spark.createDataFrame(pd_df)
df.show(5)

(df
 .coalesce(1)
 .write
 .mode("overwrite")
 .option("header", "true")
 .format("com.databricks.spark.csv")
 .save('abfss://processed-campaigndata@#DATA_LAKE_NAME#.dfs.core.windows.net/campaigndata'))

+--------------------+-------+--------------------+--------------------+-------+--------------+----------------+
|              Region|Country|    Product_Category|       Campaign_Name|Revenue|Revenue_Target|Revenue_Variance|
+--------------------+-------+--------------------+--------------------+-------+--------------+----------------+
|North & Central A...| Canada|               Books|EnjoyTheMoment; B...|13873.0|       10617.0|         -3256.0|
|              Europe|Germany|Apparel and Footwear|     Fun with Colors|14865.0|       15960.0|          1095.0|
|       South America| Brazil|               Books|EnjoyTheMoment; B...|16611.0|        7917.0|         -8694.0|
|        Asia Pacific|    USA|               Books|EnjoyTheMoment; B...|12174.0|        6996.0|         -5178.0|
|              Europe|  Italy|               Books|EnjoyTheMoment; B...| 5867.0|       19049.0|         13182.0|
+--------------------+-------+--------------------+--------------------+-------+--------------+-