In [7]:
_predict_df = "select (Version.[Version Name]*Product.[Product].[196426]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});"
_input_df = "select (Version.[Version Name]*Product.[Product].[208821]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});"

from o9_common_utils.O9DataLake import O9DataLake, ResourceType, DataSource,PluginSetting

# register inputs
predict_df = O9DataLake.register("predict_df",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _predict_df,plugin_setting = PluginSetting.Inputs)
input_df = O9DataLake.register("input_df",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _input_df,plugin_setting = PluginSetting.Inputs)
liveinput = O9DataLake.register("WeeklySales",data_source = DataSource.LIVEFRAME,entity_type = ResourceType.LIVEFRAME,plugin_setting = PluginSetting.Inputs)

# register slice dimension
O9DataLake.register("Product.[Product]", data_source = DataSource.LS, entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.SliceDimension)

# register outputs
O9DataLake.register("output1",data_source = DataSource.LS,entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.Outputs)


# register script params
script_params = O9DataLake.register({"var1":"1","var3":"Test"}, data_source = DataSource.LS, plugin_setting = PluginSetting.ScriptParam)

In [5]:
O9DataLake.inputs

{'predict_df': {'name': 'predict_df',
  'resource_type': <ResourceType.IBPL: 'ibpl_query'>,
  'data_source': <DataSource.LS: 'liveserver'>,
  'query': 'select (Version.[Version Name]*Product.[Product].[196426]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});',
  'std_count_limit': '200000',
  'df':    Version.[Version Name]  Product.[Product] Time.[FiscalWeek]  \
  0                      S1             196426          W03-2016   
  1      CurrentWorkingView             196426          W03-2016   
  2                      S1             196426          W05-2016   
  3      CurrentWorkingView             196426          W05-2016   
  4                      S1             196426          W08-2016   
  5      CurrentWorkingView             196426          W08-2016   
  6                      S1             196426          W13-2016   
  7

In [8]:
os.environ['script_params']

"{'var1': '1', 'var2': '20', 'var3': 'Test'}"

In [6]:
# fetching inputs
predict_df = O9DataLake.get('predict_df')
input_df = O9DataLake.get('input_df')
liveinput = O9DataLake.get('WeeklySales')

# fetching script params
value1 = O9DataLake.get_script_param("var1")

In [8]:
#user script

# package imports
import logging
from sklearn import tree

# initialize output variables
output1 = None
output2 = None

# initialize logger
logger = logging.getLogger('o9_logger')

logger.debug(f'predict_df dataframe:  {predict_df.shape}')
logger.debug(f'input_df dataframe:  {input_df.shape}')
logger.debug(f'liveinput dataframe:  {liveinput.shape}')
logger.debug(f'script param var1 value: {value1}')


if predict_df is not None and len(predict_df.index) > 0:
    x = input_df[['Mean Pricing Save PCT', 'Placement Count', 'Promotion Count', 'DPSellOutPrice']]
    y = input_df[['DPSellOutUnitsActuals']]
    x = x.fillna(0).values
    y = y.fillna(0).values

    clf = tree.DecisionTreeRegressor()
    clf = clf.fit(x, y)

    predict = predict_df[['Mean Pricing Save PCT', 'Placement Count', 'Promotion Count', 'DPSellOutPrice']].fillna(0).values
    # predict_keys = predict_df[['Time.[FiscalWeek]', 'Product.[Product]', 'Version.[Version Name]', 'SalesAccount.[Account]','Location.[Location]']]
    # predict_keys["Product.[Product]"] = pd.to_numeric(predict_keys["Product.[Product]"], downcast='integer')
    predict_values = clf.predict(predict)
    predict_df["DPSellOutUnitsFcst"] = predict_values
    #out_df = pd.concat([predict_keys, out_df_part], axis=1)
    output1 = predict_df[['Time.[FiscalWeek]', 'Product.[Product]', 'Version.[Version Name]', 'SalesAccount.[Account]','Location.[Location]','DPSellOutUnitsFcst']]
    logger.info(output1)
   
else:
    output1 = None

    

2024-01-17 15:54:14,328 - o9_logger - DEBUG  - predict_df dataframe:  (14, 10)
2024-01-17 15:54:14,330 - o9_logger - DEBUG  - input_df dataframe:  (320, 10)
2024-01-17 15:54:14,331 - o9_logger - DEBUG  - liveinput dataframe:  (421570, 14)
2024-01-17 15:54:14,332 - o9_logger - DEBUG  - script param var1 value: 10
2024-01-17 15:54:14,346 - o9_logger - INFO  -    Time.[FiscalWeek]  Product.[Product] Version.[Version Name]  \
0           W03-2016             196426                     S1   
1           W03-2016             196426     CurrentWorkingView   
2           W05-2016             196426                     S1   
3           W05-2016             196426     CurrentWorkingView   
4           W08-2016             196426                     S1   
5           W08-2016             196426     CurrentWorkingView   
6           W13-2016             196426                     S1   
7           W13-2016             196426     CurrentWorkingView   
8           W48-2016             196426       

In [9]:
O9DataLake.put('output1', output1)