In [None]:
from forecast_process import *

In [None]:
def to_elastic(flat_result_all_cat, index_name, doctype):
    settings = {
        "index":{
            "number_of_shards": 5,
            "number_of_replicas": 1,
            "mapping": {
                "total_fields":{
                    "limit": "1000"
                }
            },
        }
    }

    mapping = {
            doctype: { 
               "dynamic_templates": [
                   {"strings": {
                        "match_mapping_type": "string",
                        "mapping": {
                          "type": "keyword"
                        }
                      }
                    }
                ]
            }
        }

    uri = ['http://{}:{}'.format(ip, '9200') for ip in ['192.168.0.179', '192.168.0.178']]
    es = DocTools(uri)
    esi = es.indextool()
    esi.create(index_name, overwrite = True, settings = settings, mapping = mapping)
    res = es.bulk(index_name, flat_result_all_cat, doctype = doctype)

In [None]:
def init():
    #load fact
    dlf = datalabframework.project.load()
    engine = dlf.engine()
    spark = engine.context()
    fact_transaction = engine.load('fact_table').select('sku_id', 'sku_name', 'transaction_date', 'quantity', \
                                                        'doc_type', 'unit_price', 'cat_id', 'cat_group_id', \
                                                        'cat_root_id', 'cat_name', 'cat_group_name', 'cat_root_name',\
                                                        'brand_id', 'brand_name')
    product_quantity_date = fact_transaction.where(F.expr('doc_type == "PTX"') | F.expr('doc_type == "HDF"'))\
                .where(F.expr('unit_price != 0'))\
                .groupby('sku_id', 'sku_name', 'transaction_date', 'cat_id', 'cat_group_id', 'cat_root_id', 'cat_name', \
                         'cat_group_name', 'cat_root_name', 'brand_id', 'brand_name')\
                .agg(F.sum('quantity').alias('daily_quantity'), F.avg('unit_price').alias('daily_price'))\
                .orderBy('transaction_date')
    product_list = fact_transaction.where(F.col('product_state_id') == 1).select('sku_id', 'sku_name', 'quantity', 'unit_price')\
                .withColumn('revenue', F.col('quantity') * F.col('unit_price'))\
                .groupby('sku_id', 'sku_name').agg(F.sum('revenue').alias('total_revenue'))\
                .sort(F.desc('total_revenue')).toPandas()[['sku_id', 'sku_name']].values
    product_quantity_date = product_quantity_date.toPandas()
    product_quantity_date['daily_quantity'] = product_quantity_date['daily_quantity'].astype(np.int64)
    product_quantity_date['transaction_date'] = pd.to_datetime(product_quantity_date['transaction_date'])
    return product_quantity_date, product_list

In [None]:
def select_trans(product_quantity_date, sku_id):
    total_by_date = product_quantity_date[product_quantity_date['sku_id'] == sku_id]
    total_by_date = total_by_date.groupby('transaction_date').agg({'daily_quantity':'sum'}).reset_index()
    return total_by_date

In [None]:
product_quantity_date, product_list = init()

In [None]:
flat_test_result_all_product = json.load(open('json_output/month_flat_test_result_all_product.json', 'r'))
flat_cv_result_all_product = json.load(open('json_output/month_flat_cv_result_all_product.json', 'r'))
preds_future = json.load(open('json_output/month_future_predict_all_product.json', 'r'))

In [None]:
to_elastic(flat_test_result_all_product, 'month_flat_test_result_all_product', 'month_forecast')

In [None]:
to_elastic(preds_future, 'month_future_predict_all_product', 'month_forecast')

In [None]:
n = 3000
# flat_test_result_all_product = []
# flat_cv_result_all_product = []
# preds_future = []
for product in product_list[3000:5000]:
    print(n, ':', product)
    n = n + 1
    sku_id = product[0]
    sku_name = product[1]
    try:
        total_by_date = select_trans(product_quantity_date, sku_id)
        flat_result_cv, flat_result_test, preds= adaptive_forecast_process(total_by_date, 'M')
        if not flat_result_test:
            continue
        product_dict = dict(zip(['sku_id', 'sku_name'], product))
        flat_result_test.update(product_dict)
        for result in flat_result_cv:
            result.update(product_dict)
        list_preds = list(zip(preds.index, preds))
        for pred in list_preds:
            pred_arr = [pred[0].isoformat(), pred[1]]
            init_attr = dict(product_dict)
            init_attr.update({'mape_error': flat_result_test['mape_error'], 'wape_cv': flat_result_test['wape_cv']})
            result = dict(zip(['time_predict', 'demand_predict'], pred_arr))
            result.update(init_attr)
            preds_future.append(result)
        flat_test_result_all_product.append(flat_result_test)
        flat_cv_result_all_product.extend(flat_result_cv)
        json.dump(flat_test_result_all_product, open('json_output/month_flat_test_result_all_product.json', 'w', encoding = 'utf8'))
        json.dump(flat_test_result_all_product, open('json_output/month_flat_cv_result_all_product.json', 'w', encoding = 'utf8'))
        json.dump(preds_future, open('json_output/month_future_predict_all_product.json', 'w', encoding = 'utf8'))
    except:
        continue

In [None]:
1

In [None]:
flat_test_result_all_product

In [None]:
preds_future