# Retail Store Rollup

In [None]:
import sys
sys.path.append("..")
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, grouping_id, sum
from helpers.path_translation import translate_to_file_string
from helpers.data_prep_and_print import print_df

### Create spark session

In [None]:
#create a SparkSession
spark = (SparkSession
       .builder
       .appName("Retail Store Rollup")
       .getOrCreate())
spark.sparkContext.setLogLevel("ERROR")

## Load Retail Store Data

In [None]:
# create a DataFrame from Json
inputFile = translate_to_file_string("../data/retail_store.csv")
df = spark.read.option("header", "true") \
       .option("inferSchema", "true") \
       .option("delimiter", "|") \
       .csv(inputFile)
print_df(df,10)

## RollUp / Cube

In [None]:

rollup1 = df.rollup("Outlet","Location").agg(sum("Sales_Amount").alias("Total"),grouping_id().alias("gid")).orderBy("Outlet","Location")
print_df(rollup1)

In [None]:
cube1 = df.cube('Outlet','Location').agg(sum('Sales_Amount').alias('total_sales'),grouping_id().alias('gid')).orderBy('Outlet','Location')
print_df(cube1)


In [None]:
rollup2 = df.rollup('Date','Outlet','Location').agg(sum('Sales_Amount').alias('total_sales'),grouping_id().alias('gid')).orderBy('Date','Outlet','Location')
print_df(rollup2)


In [None]:
cube2 = df.cube('Date','Outlet','Location').agg(sum('Sales_Amount').alias('total_sales'),grouping_id().alias('gid')).orderBy('Date','Outlet','Location')
print_df(cube2)

In [None]:
spark.stop()