In [15]:
_predict_df = "select (Version.[Version Name]*Product.[Product].[196426]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});"
_input_df = "select (Version.[Version Name]*Product.[Product].[208821]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});"

from o9_common_utils.O9DataLake import O9DataLake, ResourceType, DataSource,PluginSetting

# register inputs
predict_df = O9DataLake.register("predict_df",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _predict_df,plugin_setting = PluginSetting.Inputs)
input_df = O9DataLake.register("input_df",data_source = DataSource.LS, entity_type = ResourceType.IBPL, query = _input_df,plugin_setting = PluginSetting.Inputs)
liveinput = O9DataLake.register("WeeklySales",data_source = DataSource.LIVEFRAME,entity_type = ResourceType.LIVEFRAME,plugin_setting = PluginSetting.Inputs)

# register slice dimension
O9DataLake.register("Product.[Product]", data_source = DataSource.LS, entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.SliceDimension)

# register outputs
O9DataLake.register("output1",data_source = DataSource.LS,entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.Outputs)
O9DataLake.register("output2",data_source = DataSource.LS,entity_type = ResourceType.IBPL, plugin_setting = PluginSetting.Outputs)

# register script params
script_params = O9DataLake.register({"var1":"10","var2":"Test1"}, data_source = DataSource.LS, plugin_setting = PluginSetting.ScriptParam)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [16]:
O9DataLake.inputs

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

{'predict_df': {'name': 'predict_df', 'resource_type': <ResourceType.IBPL: 'ibpl_query'>, 'data_source': <DataSource.LS: 'liveserver'>, 'query': 'select (Version.[Version Name]*Product.[Product].[196426]*Time.FiscalWeek*SalesAccount.[Account]*Location.[Location]*{Measure.[DPSellOutUnitsActuals],Measure.[Mean Pricing Save PCT],Measure.[Placement Count],Measure.[Promotion Count],Measure.[DPSellOutPrice]});', 'std_count_limit': '200000', 'df':    Version.[Version Name]  Product.[Product]  ... Promotion Count DPSellOutPrice
0                      S1             196426  ...             NaN            6.0
1      CurrentWorkingView             196426  ...             NaN            6.0
2                      S1             196426  ...             NaN            9.5
3      CurrentWorkingView             196426  ...             NaN            9.5
4                      S1             196426  ...             NaN            6.0
5      CurrentWorkingView             196426  ...             NaN    

In [17]:
# fetching inputs
predict_df = O9DataLake.get('predict_df')
input_df = O9DataLake.get('input_df')
liveinput = O9DataLake.get('WeeklySales')

# fetching script params
value1 = O9DataLake.get_script_param("var1")
value2 = O9DataLake.get_script_param("var2")

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

In [18]:
#user script

# package imports
import logging
from sklearn import tree

# initialize output variables
output1 = None
output2 = None

# initialize logger
logger = logging.getLogger('o9_logger')

logger.debug(f'predict_df dataframe:  {predict_df.shape}')
logger.debug(f'input_df dataframe:  {input_df.shape}')
logger.debug(f'liveinput dataframe:  {liveinput.shape}')
logger.debug(f'script param var1 value: {value1}')
logger.debug(f'script param var2 value: {value2}')

if predict_df is not None and len(predict_df.index) > 0:
    x = input_df[['Mean Pricing Save PCT', 'Placement Count', 'Promotion Count', 'DPSellOutPrice']]
    y = input_df[['DPSellOutUnitsActuals']]
    x = x.fillna(0).values
    y = y.fillna(0).values

    clf = tree.DecisionTreeRegressor()
    clf = clf.fit(x, y)

    predict = predict_df[['Mean Pricing Save PCT', 'Placement Count', 'Promotion Count', 'DPSellOutPrice']].fillna(0).values
    # predict_keys = predict_df[['Time.[FiscalWeek]', 'Product.[Product]', 'Version.[Version Name]', 'SalesAccount.[Account]','Location.[Location]']]
    # predict_keys["Product.[Product]"] = pd.to_numeric(predict_keys["Product.[Product]"], downcast='integer')
    predict_values = clf.predict(predict)
    predict_df["DPSellOutUnitsFcst"] = predict_values
    #out_df = pd.concat([predict_keys, out_df_part], axis=1)
    output1 = predict_df[['Time.[FiscalWeek]', 'Product.[Product]', 'Version.[Version Name]', 'SalesAccount.[Account]','Location.[Location]','DPSellOutUnitsFcst']]
    logger.info(output1)
   
else:
    output1 = None

    

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

2023-12-18 07:09:21,079 - o9_logger - DEBUG - predict_df dataframe:  (14, 10)
2023-12-18 07:09:21,079 - o9_logger - DEBUG - input_df dataframe:  (320, 10)
2023-12-18 07:09:21,079 - o9_logger - DEBUG - liveinput dataframe:  (200000, 14)
2023-12-18 07:09:21,079 - o9_logger - DEBUG - script param var1 value: 10
2023-12-18 07:09:21,079 - o9_logger - DEBUG - script param var2 value: Test1
2023-12-18 07:09:21,085 - o9_logger - INFO -    Time.[FiscalWeek]  Product.[Product]  ... Location.[Location] DPSellOutUnitsFcst
0           W03-2016             196426  ...                 ALL                0.0
1           W03-2016             196426  ...                 ALL                0.0
2           W05-2016             196426  ...                 ALL                0.0
3           W05-2016             196426  ...                 ALL                0.0
4           W08-2016             196426  ...                 ALL                0.0
5           W08-2016             196426  ...                 ALL

In [19]:
output2 = liveinput
logger.debug(f'liveinput dataframe:  {liveinput.head()}')

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

2023-12-18 07:09:42,735 - o9_logger - DEBUG - liveinput dataframe:    WalmartTime.[Day]  Store.[Store_ID]  ...  Unemployment Weekly Sales
0        02-05-2010                 1  ...         8.106     24924.50
1        02-05-2010                 1  ...         8.106     50605.27
2        02-05-2010                 1  ...         8.106     13740.12
3        02-05-2010                 1  ...         8.106     39954.04
4        02-05-2010                 1  ...         8.106     32229.38

[5 rows x 14 columns]

In [20]:
#pushing outputs to Live Server(make sure all dimensions,measures present in LiveServer)
O9DataLake.put('output1', output1)
#O9DataLake.put('output2', output2)

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…