In [18]:
_predict_df = "select (Version.[Version Name]*Product.[Product].[196426]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});"
_input_df = "select (Version.[Version Name]*Product.[Product].[208821]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});"

from o9_common_utils.O9DataLake import O9DataLake, ResourceType, DataSource,PluginSetting

# register inputs
predict_df = O9DataLake.register("predict_df",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _predict_df,plugin_setting = PluginSetting.Inputs)
input_df = O9DataLake.register("input_df",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _input_df,plugin_setting = PluginSetting.Inputs)
liveinput = O9DataLake.register("WeeklySales",data_source = DataSource.LIVEFRAME,entity_type = ResourceType.LIVEFRAME,plugin_setting = PluginSetting.Inputs)
DPSelloutPrediction = O9DataLake.register("DPSelloutPrediction",data_source = DataSource.LIVEFRAME,entity_type = ResourceType.LIVEFRAME,plugin_setting = PluginSetting.Inputs)
# register slice dimension
O9DataLake.register("Product.[Product]", data_source = DataSource.LS, entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.SliceDimension)

# register outputs
O9DataLake.register("output1",data_source = DataSource.LS,entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.Outputs)
O9DataLake.register("output2",data_source = DataSource.LS,entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.Outputs)

# register script params
script_params = O9DataLake.register({"var1":"10","var2":"Test"}, data_source = DataSource.LS, plugin_setting = PluginSetting.ScriptParam)

In [19]:
O9DataLake.inputs

{'predict_df': {'name': 'predict_df',
  'resource_type': <ResourceType.IBPL: 'ibpl_query'>,
  'data_source': <DataSource.LS: 'liveserver'>,
  'query': 'select (Version.[Version Name]*Product.[Product].[196426]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});',
  'std_count_limit': '200000',
  'df':    Version.[Version Name]  Product.[Product] Time.[FiscalWeek]  \
  0                      S2             196426          W03-2016   
  1      CurrentWorkingView             196426          W03-2016   
  2                      S2             196426          W05-2016   
  3      CurrentWorkingView             196426          W05-2016   
  4                      S2             196426          W08-2016   
  5      CurrentWorkingView             196426          W08-2016   
  6                      S2             196426          W13-2016   
  7

In [6]:
# fetching inputs
predict_df = O9DataLake.get('predict_df')
input_df = O9DataLake.get('input_df')
liveinput = O9DataLake.get('WeeklySales')

# fetching script params
value1 = O9DataLake.get_script_param("var1")
value2 = O9DataLake.get_script_param("var2")

In [7]:
#user script

# package imports
import logging
from sklearn import tree

# initialize output variables
output1 = None
output2 = None

# initialize logger
logger = logging.getLogger('o9_logger')

logger.debug(f'predict_df dataframe:  {predict_df.shape}')
logger.debug(f'input_df dataframe:  {input_df.shape}')
logger.debug(f'liveinput dataframe:  {liveinput.shape}')
logger.debug(f'script param var1 value: {value1}')
logger.debug(f'script param var2 value: {value2}')

if predict_df is not None and len(predict_df.index) > 0:
    x = input_df[['Mean Pricing Save PCT', 'Placement Count', 'Promotion Count', 'DPSellOutPrice']]
    y = input_df[['DPSellOutUnitsActuals']]
    x = x.fillna(0).values
    y = y.fillna(0).values

    clf = tree.DecisionTreeRegressor()
    clf = clf.fit(x, y)

    predict = predict_df[['Mean Pricing Save PCT', 'Placement Count', 'Promotion Count', 'DPSellOutPrice']].fillna(0).values
    # predict_keys = predict_df[['Time.[FiscalWeek]', 'Product.[Product]', 'Version.[Version Name]', 'SalesAccount.[Account]','Location.[Location]']]
    # predict_keys["Product.[Product]"] = pd.to_numeric(predict_keys["Product.[Product]"], downcast='integer')
    predict_values = clf.predict(predict)
    predict_df["DPSellOutUnitsFcst"] = predict_values
    #out_df = pd.concat([predict_keys, out_df_part], axis=1)
    output1 = predict_df[['Time.[FiscalWeek]', 'Product.[Product]', 'Version.[Version Name]', 'SalesAccount.[Account]','Location.[Location]','DPSellOutUnitsFcst']]
    logger.info(output1)
   
else:
    output1 = None

    

2023-10-19 21:40:16,844 - o9_logger - DEBUG  - predict_df dataframe:  (14, 10)
2023-10-19 21:40:16,845 - o9_logger - DEBUG  - input_df dataframe:  (320, 10)
2023-10-19 21:40:16,846 - o9_logger - DEBUG  - liveinput dataframe:  (200000, 4)
2023-10-19 21:40:16,847 - o9_logger - DEBUG  - script param var1 value: 10
2023-10-19 21:40:16,849 - o9_logger - DEBUG  - script param var2 value: Test
2023-10-19 21:40:16,872 - o9_logger - INFO  -    Time.[FiscalWeek]  Product.[Product] Version.[Version Name]  \
0           W03-2016             196426                     S2   
1           W03-2016             196426     CurrentWorkingView   
2           W05-2016             196426                     S2   
3           W05-2016             196426     CurrentWorkingView   
4           W08-2016             196426                     S2   
5           W08-2016             196426     CurrentWorkingView   
6           W13-2016             196426                     S2   
7           W13-2016             196

In [9]:
output2 = liveinput
logger.debug(f'liveinput dataframe:  {liveinput.head()}')

2023-10-19 21:41:01,287 - o9_logger - DEBUG  - liveinput dataframe:    WalmartTime.[Day]  Department.[Department_ID]  Store.[Store_ID]  \
0        02-05-2010                           1                 1   
1        02-05-2010                           1                10   
2        02-05-2010                           1                11   
3        02-05-2010                           1                12   
4        02-05-2010                           1                13   

   Weekly Sales dummyMeasure  
0      24924.50           10  
1      40212.84           10  
2      19611.13           10  
3      17426.75           10  
4      46761.90           10  


In [10]:
#pushing outputs to Live Server(make sure all dimensions,measures present in LiveServer)
O9DataLake.put('output1', output1)
#O9DataLake.put('output2', output2)