In [None]:
# This cell is NOT editable. Overwrite variables on your own discretion.
# Any changes other than the script code will NOT BE SAVED!
# All cells are assumed to be script code cells, unless explictly tagged as 'o9_ignore'

In [None]:
_sales = "select ([WalmartTime_Python].[Day_Python] * [Version].[Version Name].[CurrentWorkingView] * [Department_Python].[Department_ID] * [Store_Python].[Store_ID] * {Measure.[Weekly Sales_Python]});"
_features = "select( [WalmartTime_Python].[Day_Python] * [Version].[Version Name].[CurrentWorkingView] * [Store_Python].[Store_ID] * { Measure.[Temperature_Python], Measure.[Fuel Price_Python], Measure.[MarkDown1_Python] , Measure.[MarkDown2_Python], Measure.[MarkDown3_Python], Measure.[MarkDown4_Python] , Measure.[MarkDown5_Python] , Measure.[CPI_Python] , Measure.[Unemployment_Python]});"
_stores = "select([Store_Python].[Store_ID] *[Store_Python].[Type] *[Store_Python].[Size]);"


# Initialize the O9DataLake with the input parameters and dataframes
# Data can be accessed with O9DataLake.get(<Input Name>)
# Overwritten values will not be reflected in the O9DataLake after initialization

from o9_common_utils.O9DataLake import O9DataLake, ResourceType, DataSource,PluginSetting
sales = O9DataLake.register("sales",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _sales,plugin_setting = PluginSetting.Inputs)
features = O9DataLake.register("features",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _features,plugin_setting = PluginSetting.Inputs)
stores = O9DataLake.register("stores",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _stores,plugin_setting = PluginSetting.Inputs)

O9DataLake.register("PredictedSales_Walmart",data_source = DataSource.LS,entity_type = ResourceType.IBPL,plugin_setting = PluginSetting.Outputs)
script_params = O9DataLake.register({}, data_source = DataSource.LS,plugin_setting = PluginSetting.ScriptParam)

In [None]:
"""
EXEC plugin instance [Python_MediumWeight_Plugin]
for measures {[Python_MW_Output]}
using scope (Version.[Version Name].[CurrentWorkingView] * 
[Department_Python].[Department_ID].[1]) 
using arguments {(NumExecutors, 2), (executorMemory,"1G"),
(driverMemory,"1G"),(DataTransferMode, "parquet")};

"""

# Import packages
import os
import numpy as np
import pandas as pd 
import logging

# Initializing logger

logger = logging.getLogger('o9_logger')
logger.info("WMLogger: Reading Input Dataframes")

sales_df = sales[['WalmartTime_Python.[Day_Python]',
'Department_Python.[Department_ID]','Store_Python.[Store_ID]','Weekly Sales_Python']]
features_df = features[['Store_Python.[Store_ID]','WalmartTime_Python.[Day_Python]',
'Temperature_Python','Fuel Price_Python','MarkDown1_Python','MarkDown2_Python',
'MarkDown3_Python', 'MarkDown4_Python','MarkDown5_Python', 'CPI_Python', 'Unemployment_Python']]
stores_df = stores[['Store_Python.[Store_ID]', 'Store_Python.[Type]', 'Store_Python.[Size]']]

dataset = sales_df.merge(stores_df, how='left').merge(features_df, how='left')
logger.info("Dataset")
logger.info(dataset.columns)


# Input dataframe

input_df = dataset
logger.info("WMLogger: INPUT DATAFRAME:")
logger.info(input_df.head())
#print(input_df.head())
#input_df.fillna(0, inplace=True)
print(input_df.head())
print(input_df.dtypes)
input_df.replace(np.nan, 0, inplace=True)
print(input_df.head())
input_df = pd.get_dummies(input_df, columns=["Store_Python.[Type]"])


logger.info("WMLogger: Before normalization")


PredictedSales_Walmart = sales[['Version.[Version Name]', 'WalmartTime_Python.[Day_Python]', 'Department_Python.[Department_ID]','Store_Python.[Store_ID]']]

logging.info("WMLogger: Writing output")

PredictedSales_Walmart["Python_MW_Output"] = np.arange(sales.shape[0])

#PredictedSales["Python_MW_Output"] = 0

PredictedSales_Walmart = PredictedSales_Walmart.astype({'Python_MW_Output':'float64'})

logging.info("WMLogger: Finishing Plugin Execution")