Copyright (c) Microsoft Corporation.

Licensed under the MIT License.

# Load Example Data

This notebook assumes that you have already deployed the solution through this Solution Accelerators auto deployment process. That process populates Azure Data 
Lake blob storage with the csv files representing sample data for this solution.  This notebook converts each of those csv files into a corisponding table within 
Synapse, and creates severl other empty tables needed by the solution.  Note that the data schemas used are from the retail inventory Common Data Model (CDM).

## 1.0 Imports & Read in Data from Azure Data Lake

In [None]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from notebookutils import mssparkutils
import json
sc = spark.sparkContext

data_lake_account_name = 'data_lake_account_name' # Synapse Workspace ADLS
file_system_name = 'rawdata'
synapse_workspace_name = 'synapse_workspace_name'

database_name = 'ContosoCoolerDatabase'

file_names = ['InventoryProjected','InventoryTransaction','InventoryTransactionType','InventoryTransactionUnserializedItem','Item','Location','PickList','PickListItem','Cooler','CoolerItemBalance']


## 2.0 Run once to create initial template tables
This cell loops through each of the csv files and creates a corisponding table. The file name is used for the table name, and the columns are contained in the csv files as headers.

In [None]:
for file_name in file_names:
    file_base_path = f'abfss://{file_system_name}@{data_lake_account_name}.dfs.core.windows.net/'
    df = spark.read.load(file_base_path + file_name + '.csv', format='csv', header=True,inferSchema=True)
    df.write.mode("overwrite").saveAsTable(f"{database_name}.{file_name}")

## 3.0 Create empty custom tables to support IoT device data, and projection result tables

In [None]:
#Create RestockProjected Schema
schema = StructType([
  StructField('CoolerId', IntegerType(), True),
  StructField('ProjectedDateTime', TimestampType(), True),
  StructField('PreviousProjectedDateTime', TimestampType(), True)
  ])

#Create empty DataFrame from empty RDD
df = spark.createDataFrame([sc.emptyRDD],schema)
df.printSchema()
df.write.mode("overwrite").saveAsTable(f"{database_name}.RestockProjected")

#Create IotInventoryAction Schema
schema = StructType([
  StructField('PickTime', TimestampType(), True),
  StructField('CoolerId', IntegerType(), True),
  StructField('ItemSku', StringType(), True),
  StructField('Quantity', IntegerType(), True)
  ])

#Create empty DataFrame from empty RDD
df = spark.createDataFrame([sc.emptyRDD],schema)
df.printSchema()

df.write.format('csv').mode("overwrite").saveAsTable(f"{database_name}.IotInventoryAction")