In [2]:
import pandas as pd
import numpy as np
import zipfile

In [3]:
%load_ext lab_black

# Подготовка данных

Ниже зачитываем все необходимые данные (справочник номенклатуры и данные о продажах, прогнозах).

Изначально (в процессе проработки непосредственно) чтение данных делалось при помощи зачитывания архива и обращению к read_csv из pandas. Но так как инструментарий работы с БД не позволял выгружать большие файлы, то отчеты могли дробиться на некоторое неограниченное количество файлов. Для была написана функция: 

In [5]:
def file_reader(env="test"):
    forecast = pd.DataFrame()
    sales = pd.DataFrame()
    zf = zipfile.ZipFile("/Users/rodion/Desktop/Forecast_analysis/" + env + ".zip")
    files_list = zf.namelist()[:-1]
    number_of_forecast_files = 0
    number_of_sales_files = 0
    for file in files_list:
        if "sales" in file and "MACOSX" not in file:
            number_of_sales_files += 1
        elif "forecast" in file and "MACOSX" not in file:
            number_of_forecast_files += 1
    for value in range(1, number_of_forecast_files + 1):
        forecast = pd.concat(
            [
                forecast,
                pd.read_csv(
                    zf.open("forecast_" + env + "_" + str(value) + ".csv"),
                    sep=";",
                    skiprows=2,
                    names=[
                        "product_link",
                        "effective_forecast",
                        "date",
                        "in_promotion",
                        "sales_frequency_class",
                    ],
                ),
            ]
        )
    for value in range(1, number_of_sales_files + 1):
        sales = pd.concat(
            [
                sales,
                pd.read_csv(
                    zf.open("sales_" + env + "_" + str(value) + ".csv"),
                    sep=";",
                    skiprows=2,
                    names=["product_link", "sales_qty", "date", "order_type"],
                ),
            ]
        )
    del zf
    forecast["env"] = env
    sales["env"] = env
    return forecast, sales

In [6]:
%%time

forecast_test, sales_test = file_reader()

CPU times: user 57.6 s, sys: 7.66 s, total: 1min 5s
Wall time: 1min 8s


In [7]:
%%time

forecast_prod, sales_prod = file_reader(env='prod')

CPU times: user 57.8 s, sys: 8.25 s, total: 1min 6s
Wall time: 1min 11s


In [8]:
# формируем общий датафрейм для двух сред
forecast = pd.concat([forecast_test, forecast_prod], axis=0)

In [9]:
forecast.head()

Unnamed: 0,product_link,effective_forecast,date,in_promotion,sales_frequency_class,env
0,1000295532 / 230409 (МАГНИТ Вода артезианская ...,0.85,2022-02-07,No,Q1,test
1,1000295532 / 230409 (МАГНИТ Вода артезианская ...,1.01,2022-02-08,No,Q1,test
2,1000295532 / 230418 (МАГНИТ Вода артезианская ...,0.14,2022-02-07,No,Q2,test
3,1000295532 / 230418 (МАГНИТ Вода артезианская ...,0.18,2022-02-08,No,Q2,test
4,"1000031109 / 230495 (МОЯ ЦЕНА Молоко у/паст 2,...",0.75,2022-02-07,No,Q1,test


In [10]:
forecast.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38749240 entries, 0 to 2788035
Data columns (total 6 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   product_link           object 
 1   effective_forecast     float64
 2   date                   object 
 3   in_promotion           object 
 4   sales_frequency_class  object 
 5   env                    object 
dtypes: float64(1), object(5)
memory usage: 2.0+ GB


In [11]:
del forecast_test, forecast_prod

In [12]:
# аналогичная история для продаж
sales = pd.concat([sales_test, sales_prod], axis=0)

In [13]:
del sales_test, sales_prod

In [14]:
# формируем единый датафрейм, содержащий продажи и прогноз одновременно
merged_df = forecast.merge(
    sales[["product_link", "sales_qty", "date", "env"]],
    on=["product_link", "date", "env"],
    how="left",
)

In [15]:
merged_df["sales_qty"] = merged_df["sales_qty"].fillna(0)

In [16]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38970746 entries, 0 to 38970745
Data columns (total 7 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   product_link           object 
 1   effective_forecast     float64
 2   date                   object 
 3   in_promotion           object 
 4   sales_frequency_class  object 
 5   env                    object 
 6   sales_qty              float64
dtypes: float64(2), object(5)
memory usage: 2.3+ GB


In [17]:
# функция для расчета метрик для оценки точности прогнозирования
def accuracy_metrics(df):
    if "deficite" not in df.columns:
        df[["proficite", "deficite"]] = 0
        df.loc[df["effective_forecast"] > df["sales_qty"], "proficite"] = (
            df["effective_forecast"] - df["sales_qty"]
        )
        df.loc[df["effective_forecast"] < df["sales_qty"], "deficite"] = (
            df["sales_qty"] - df["effective_forecast"]
        )
    df["proficite_percent"] = round(df["proficite"] / df["sales_qty"], 4)
    df["deficite_percent"] = round(df["deficite"] / df["sales_qty"], 4)
    df["abs"] = round(1 - (df["deficite_percent"] + df["proficite_percent"]), 4)
    df["val"] = round(1 - df["deficite_percent"] + df["proficite_percent"], 4)
    return df

In [18]:
def w_metrics(df):
    w_metrics = df.groupby(["pg3", "env", "in_promotion"], as_index=False).agg(
        {
            "proficite": "sum",
            "deficite": "sum",
            "effective_forecast": "sum",
            "sales_qty": "sum",
        }
    )
    w_metrics["proficite_percent"] = round(
        w_metrics["proficite"] / w_metrics["sales_qty"], 4
    )
    w_metrics["deficite_percent"] = round(
        w_metrics["deficite"] / w_metrics["sales_qty"], 4
    )
    w_metrics["abs"] = round(
        1 - (w_metrics["deficite_percent"] + w_metrics["proficite_percent"]), 4
    )
    w_metrics["val"] = round(
        1 - w_metrics["deficite_percent"] + w_metrics["proficite_percent"], 4
    )
    df = df.merge(
        w_metrics[
            [
                "pg3",
                "env",
                "in_promotion",
                "proficite_percent",
                "deficite_percent",
                "abs",
                "val",
            ]
        ],
        how="left",
        on=["pg3", "env", "in_promotion"],
        suffixes=["_normal", "_weighted"],
    )
    return df

Соберем датафрейм для изучения точности прогноза в разрезе классов частоты продаж.

In [19]:
# соберем датафрейм для изучения точности прогноза в разрезе классов частоты продаж
accuracy_with_sales_frequency_class = (
    merged_df.groupby(
        ["product_link", "date", "env", "in_promotion", "sales_frequency_class"]
    )
    .agg({"effective_forecast": "max", "sales_qty": "sum"})
    .reset_index()
    .fillna(0)
)

In [13]:
a = pd.DataFrame(['28.01.2022', '03.05.2022'])
a[0] = pd.to_datetime(a[0])
a.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   0       2 non-null      datetime64[ns]
dtypes: datetime64[ns](1)
memory usage: 144.0 bytes


In [20]:
accuracy_with_sales_frequency_class = accuracy_with_sales_frequency_class[
    (accuracy_with_sales_frequency_class["sales_qty"] > 0)
    | (accuracy_with_sales_frequency_class["effective_forecast"] > 0)
]
accuracy_with_sales_frequency_class["date"] = pd.to_datetime(
    accuracy_with_sales_frequency_class["date"], format="%Y-%m-%d"
)
accuracy_with_sales_frequency_class["week"] = accuracy_with_sales_frequency_class[
    "date"
].dt.week
pl_week_sales_fr_class = (
    accuracy_with_sales_frequency_class.groupby(
        ["product_link", "week", "env", "in_promotion", "sales_frequency_class"]
    )
    .agg({"effective_forecast": "sum", "sales_qty": "sum"})
    .reset_index()
    .sort_values(by="sales_qty", ascending=False)
)
pl_week_sales_fr_class["product_code"] = pl_week_sales_fr_class.product_link.str[
    0:10
].astype(int)
pl_week_sales_fr_class["location_code"] = pl_week_sales_fr_class.product_link.str[
    13:19
].astype(int)
pl_week_sales_fr_class = pl_week_sales_fr_class.merge(
    products[["product_code", "product_name", "pg3"]], on="product_code", how="left"
)
pl_week_sales_fr_class = accuracy_metrics(pl_week_sales_fr_class)
pl_week_sales_fr_class.sample()

  accuracy_with_sales_frequency_class['week'] = accuracy_with_sales_frequency_class['date'].dt.week


Unnamed: 0,product_link,week,env,in_promotion,sales_frequency_class,effective_forecast,sales_qty,product_code,location_code,product_name,pg3,proficite,deficite,proficite_percent,deficite_percent,abs,val
6225733,1000197033 / 992373 (DIROL Жев резин Colors XX...,6,test,No,Q1,2.76,0.0,1000197033,992373,DIROL Жев резин Colors XXL мят вк 19гбум/уп(Мо...,Жевательная резинка и освежающее драже,2.76,0.0,inf,,,


In [21]:
pg3_env_pl_level_sales_fr_class = pl_week_sales_fr_class.groupby(
    ["pg3", "env", "in_promotion", "sales_frequency_class"], as_index=False
).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_env_pl_level_sales_fr_class = accuracy_metrics(pg3_env_pl_level_sales_fr_class)
pg3_env_pl_level_sales_fr_class = w_metrics(pg3_env_pl_level_sales_fr_class)
pg3_env_pl_level_sales_fr_class = pg3_env_pl_level_sales_fr_class.pivot_table(
    index=["pg3", "in_promotion", "sales_frequency_class"],
    columns="env",
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent_normal",
        "proficite_percent_normal",
        "abs_normal",
        "val_normal",
        "proficite_percent_weighted",
        "deficite_percent_weighted",
        "abs_weighted",
        "val_weighted",
    ],
    aggfunc="sum",
).reset_index()
pg3_env_pl_level_sales_fr_class = (
    pg3_env_pl_level_sales_fr_class[
        [
            "pg3",
            "in_promotion",
            "sales_frequency_class",
            "effective_forecast",
            "sales_qty",
            "abs_normal",
            "val_normal",
            "deficite_percent_normal",
            "proficite_percent_normal",
            "abs_weighted",
            "val_weighted",
            "deficite_percent_weighted",
            "proficite_percent_weighted",
        ]
    ]
    .sort_values(by=("sales_qty", "test"), ascending=False)
    .fillna(0)
    .reset_index(drop=True)
)
pg3_env_pl_level_sales_fr_class.sample()

Unnamed: 0_level_0,pg3,in_promotion,sales_frequency_class,effective_forecast,effective_forecast,sales_qty,sales_qty,abs_normal,abs_normal,val_normal,...,proficite_percent_normal,proficite_percent_normal,abs_weighted,abs_weighted,val_weighted,val_weighted,deficite_percent_weighted,deficite_percent_weighted,proficite_percent_weighted,proficite_percent_weighted
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,prod,test,prod,test,prod,test,prod,...,prod,test,prod,test,prod,test,prod,test,prod,test
3319,Батарейки,No,No movement,17.35,21.92,6.0,6.0,-1.145,-1.8667,2.8916,...,2.0183,2.76,0.0704,-0.2744,1.0706,1.6612,0.4295,0.3066,0.5001,0.9678


In [22]:
# собираем сет для экспорта на уровне группа3-товар с учетом класса продаж
pg3_product_env_pl_level_sales_fr_class = pl_week_sales_fr_class.groupby(
    ["pg3", "product_name", "env", "in_promotion", "sales_frequency_class"],
    as_index=False,
).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_product_env_pl_level_sales_fr_class = accuracy_metrics(
    pg3_product_env_pl_level_sales_fr_class
)
pg3_product_env_pl_level_sales_fr_class = w_metrics(
    pg3_product_env_pl_level_sales_fr_class
)
pg3_product_env_pl_level_sales_fr_class = pg3_product_env_pl_level_sales_fr_class.pivot_table(
    index=["pg3", "product_name", "in_promotion", "sales_frequency_class"],
    columns="env",
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent_normal",
        "proficite_percent_normal",
        "abs_normal",
        "val_normal",
        "proficite_percent_weighted",
        "deficite_percent_weighted",
        "abs_weighted",
        "val_weighted",
    ],
    aggfunc="sum",
).reset_index()
pg3_product_env_pl_level_sales_fr_class = (
    pg3_product_env_pl_level_sales_fr_class[
        [
            "pg3",
            "product_name",
            "in_promotion",
            "sales_frequency_class",
            "effective_forecast",
            "sales_qty",
            "abs_normal",
            "val_normal",
            "deficite_percent_normal",
            "proficite_percent_normal",
            "abs_weighted",
            "val_weighted",
            "deficite_percent_weighted",
            "proficite_percent_weighted",
        ]
    ]
    .sort_values(by=("sales_qty", "test"), ascending=False)
    .fillna(0)
    .reset_index(drop=True)
)
pg3_product_env_pl_level_sales_fr_class.sample()

Unnamed: 0_level_0,pg3,product_name,in_promotion,sales_frequency_class,effective_forecast,effective_forecast,sales_qty,sales_qty,abs_normal,abs_normal,...,proficite_percent_normal,proficite_percent_normal,abs_weighted,abs_weighted,val_weighted,val_weighted,deficite_percent_weighted,deficite_percent_weighted,proficite_percent_weighted,proficite_percent_weighted
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,prod,test,prod,test,prod,test,...,prod,test,prod,test,prod,test,prod,test,prod,test
24826,Бараночные изделия,МАГНИТ Баранки традицион горчичные 350г фл/п (...,No,No movement,17.16,12.28,11.0,8.0,-0.9582,-0.5126,...,1.2591,1.0238,0.5968,0.5613,0.9932,1.1297,0.205,0.1545,0.1982,0.2842


In [23]:
# собираем датафрейм на уровне товар-локация-неделя
plc_week = (
    accuracy_with_sales_frequency_class.groupby(
        ["product_link", "week", "env", "in_promotion"]
    )
    .agg({"effective_forecast": "sum", "sales_qty": "sum"})
    .reset_index()
    .sort_values(by="sales_qty", ascending=False)
)

In [24]:
plc_week.sample()

Unnamed: 0,product_link,week,env,in_promotion,effective_forecast,sales_qty
2426979,"1000213882 / 237522 (ФРУКТ ОБЛ Йогурт 2,5% клу...",6,test,No,12.56,5.0


In [25]:
# выделяем код продукта и код локации
plc_week["product_code"] = plc_week.product_link.str[0:10].astype(int)
plc_week["location_code"] = plc_week.product_link.str[13:19].astype(int)

In [26]:
# добавляем идентификаторы группы и наименование продукта
plc_week = plc_week.merge(
    products[["product_code", "product_name", "pg3"]], on="product_code", how="left"
)

In [27]:
product_week = (
    plc_week.groupby(["pg3", "product_name", "week", "env", "in_promotion"])
    .agg({"effective_forecast": "sum", "sales_qty": "sum"})
    .reset_index()
    .sort_values(by="pg3", ascending=False)
)
product_week = accuracy_metrics(product_week)

In [28]:
product_week.sample()

Unnamed: 0,pg3,product_name,week,env,in_promotion,effective_forecast,sales_qty,proficite,deficite,proficite_percent,deficite_percent,abs,val
12218,Соки и нектары,ДОБРЫЙ Нектар деревенские яблочки 2л т/пак(Мул...,6,test,No,34.46,8.0,26.46,0.0,3.3075,0.0,-2.3075,4.3075


In [29]:
plc_week = accuracy_metrics(plc_week)

In [30]:
product_week_extract = product_week.pivot_table(
    index=["pg3", "product_name", "in_promotion"],
    columns=["env", "week"],
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
product_week_extract["proficite_percent"] = product_week_extract[
    "proficite_percent"
].replace(np.inf, 0)
product_week_extract["deficite_percent"] = product_week_extract[
    "deficite_percent"
].replace(np.inf, 0)
product_week_extract = (
    product_week_extract[
        [
            "pg3",
            "product_name",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(
        by=(
            "sales_qty",
            "test",
            max(
                pd.MultiIndex.from_frame(
                    product_week_extract["sales_qty", "test"]
                ).names
            ),
        ),
        ascending=False,
    )
    .fillna(0)
    .reset_index(drop=True)
)
product_week_extract.sample()

  result = self._run_cell(


Unnamed: 0_level_0,pg3,product_name,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
week,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,6,6,6,6,6,6,6,6,6,6,6,6
5051,Соусы прочие,HEINZ Соус Терияки 230г дой пак(ППК):14,Yes,313.514994,487.872036,258.93,250.0,0.7892,0.0485,1.2108,1.9515,0.0,0.0,0.2108,0.9515


In [31]:
pl_level_pg3_week_env = plc_week.groupby(
    ["pg3", "week", "env", "in_promotion"], as_index=False
).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pl_level_pg3_week_env = accuracy_metrics(pl_level_pg3_week_env)
pl_level_pg3_week_env = pl_level_pg3_week_env.pivot_table(
    index=["pg3", "in_promotion"],
    columns=["env", "week"],
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
pl_level_pg3_week_env = (
    pl_level_pg3_week_env[
        [
            "pg3",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(
        by=(
            "sales_qty",
            "test",
            max(
                pd.MultiIndex.from_frame(
                    product_week_extract["sales_qty", "test"]
                ).names
            ),
        ),
        ascending=False,
    )
    .fillna(0)
    .reset_index(drop=True)
)
pl_level_pg3_week_env.sample()

  result = self._run_cell(


Unnamed: 0_level_0,pg3,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
week,Unnamed: 1_level_2,Unnamed: 2_level_2,6,6,6,6,6,6,6,6,6,6,6,6
556,Сельди соленые,Yes,1409.320782,10973.431047,1373.632,1351.246,0.3121,-6.69,1.0259,8.121,0.331,0.2845,0.3569,7.4055


In [32]:
pg3_week_env = product_week.groupby(
    ["pg3", "week", "env", "in_promotion"], as_index=False
).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_week_env = accuracy_metrics(pg3_week_env)
pg3_week_env = pg3_week_env.pivot_table(
    index=["pg3", "in_promotion"],
    columns=["env", "week"],
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
pg3_week_env = (
    pg3_week_env[
        [
            "pg3",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(
        by=(
            "sales_qty",
            "test",
            max(
                pd.MultiIndex.from_frame(
                    product_week_extract["sales_qty", "test"]
                ).names
            ),
        ),
        ascending=False,
    )
    .fillna(0)
    .reset_index(drop=True)
)
pg3_week_env.sample()

  result = self._run_cell(


Unnamed: 0_level_0,pg3,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
week,Unnamed: 1_level_2,Unnamed: 2_level_2,6,6,6,6,6,6,6,6,6,6,6,6
101,Каши детские,Yes,46598.407353,41808.765596,41180.0,40724.0,0.7722,0.4579,1.1316,1.0267,0.0481,0.2577,0.1797,0.2844


In [33]:
pg3_env_pl_week = plc_week.groupby(["pg3", "env", "in_promotion"], as_index=False).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_env_pl_week = accuracy_metrics(pg3_env_pl_week)
pg3_env_pl_week = pg3_env_pl_week.pivot_table(
    index=["pg3", "in_promotion"],
    columns="env",
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
pg3_env_pl_week = (
    pg3_env_pl_week[
        [
            "pg3",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(by=("sales_qty", "test"), ascending=False)
    .fillna(0)
    .reset_index(drop=True)
)
pg3_env_pl_week.sample()

Unnamed: 0_level_0,pg3,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
685,Вина плодовые белые,Yes,239.07683,190.513322,137.0,137.0,-1.0053,-0.7856,1.7451,1.3906,0.6301,0.6975,1.3752,1.0881


In [34]:
pg3_env = product_week.groupby(["pg3", "env", "in_promotion"], as_index=False).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_env = accuracy_metrics(pg3_env)
pg3_env = pg3_env.pivot_table(
    index=["pg3", "in_promotion"],
    columns="env",
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
pg3_env = (
    pg3_env[
        [
            "pg3",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(by=("sales_qty", "test"), ascending=False)
    .fillna(0)
    .reset_index(drop=True)
)
pg3_env.sample()

Unnamed: 0_level_0,pg3,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
165,Одноразовая посуда,No,20358.14,25610.9,21263.0,21217.0,0.9401,0.7929,0.9575,1.2071,0.0512,0.0,0.0087,0.2071


In [35]:
pg3_product_env_pl_level = plc_week.groupby(
    ["pg3", "product_name", "env", "in_promotion"], as_index=False
).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_product_env_pl_level = accuracy_metrics(pg3_product_env_pl_level)
pg3_product_env_pl_level = pg3_product_env_pl_level.pivot_table(
    index=["pg3", "product_name", "in_promotion"],
    columns="env",
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
pg3_product_env_pl_level = (
    pg3_product_env_pl_level[
        [
            "pg3",
            "product_name",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(by=("sales_qty", "test"), ascending=False)
    .fillna(0)
    .reset_index(drop=True)
)
pg3_product_env_pl_level.sample()

Unnamed: 0_level_0,pg3,product_name,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
3599,Овощная закуска,МАГНИТ Фасоль печеная в аджике 530г ст/бан (Ро...,Yes,783.129156,1322.887529,680.0,678.0,0.2483,-0.3568,1.1517,1.9512,0.3,0.2028,0.4517,1.154


In [36]:
pg3_product_env = product_week.groupby(
    ["pg3", "product_name", "env", "in_promotion"], as_index=False
).agg(
    {
        "effective_forecast": "sum",
        "sales_qty": "sum",
        "proficite": "sum",
        "deficite": "sum",
    }
)
pg3_product_env = accuracy_metrics(pg3_product_env)
pg3_product_env = pg3_product_env.pivot_table(
    index=["pg3", "product_name", "in_promotion"],
    columns="env",
    values=[
        "effective_forecast",
        "sales_qty",
        "deficite_percent",
        "proficite_percent",
        "abs",
        "val",
    ],
    aggfunc="sum",
).reset_index()
pg3_product_env = (
    pg3_product_env[
        [
            "pg3",
            "product_name",
            "in_promotion",
            "effective_forecast",
            "sales_qty",
            "abs",
            "val",
            "deficite_percent",
            "proficite_percent",
        ]
    ]
    .sort_values(by=("sales_qty", "test"), ascending=False)
    .fillna(0)
    .reset_index(drop=True)
)
pg3_product_env.sample()

Unnamed: 0_level_0,pg3,product_name,in_promotion,effective_forecast,effective_forecast,sales_qty,sales_qty,abs,abs,val,val,deficite_percent,deficite_percent,proficite_percent,proficite_percent
env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,prod,test,prod,test,prod,test,prod,test,prod,test,prod,test
1100,Горчица,UNI DAN Горчица русская жгучая 170г ст/бан(Бас...,No,3774.1,3813.92,3518.0,3514.0,0.9272,0.9146,1.0728,1.0854,0.0,0.0,0.0728,0.0854


In [37]:
def dfs_tabs(df_list, sheet_list, file_name):
    writer = pd.ExcelWriter(file_name, engine="xlsxwriter")
    for dataframe, sheet in zip(df_list, sheet_list):
        dataframe.to_excel(writer, sheet_name=sheet, startrow=0, startcol=0)
    writer.save()


# list of dataframes and sheet names
dfs = [
    pg3_env,
    pg3_product_env,
    pg3_week_env,
    pg3_env_pl_week,
    pg3_product_env_pl_level,
    pl_level_pg3_week_env,
    pg3_env_pl_level_sales_fr_class,
    pg3_product_env_pl_level_sales_fr_class,
]
sheets = [
    "pw_level_pg3_env",
    "pw_level_pg3_product_env",
    "pw_level_pg3_week_env",
    "plw_level_pg3_env",
    "plw_level_pg3_product_env",
    "plw_level_pg3_week_env",
    "pg3_env_sales_fr_class",
    "pg3_product_env_sale_fr_class",
]

# run function
dfs_tabs(dfs, sheets, "/Users/rodion/Desktop/Forecast_analysis/accuracy_report.xlsx")

На выходе получается Excel-документ с несколькими листами, на каждом из которых выводится отчет о точности прогнозирования на нужном уровне агрегации представленных результатов с двумя возможными уровнями гранулярности расчета метрика. 