In [1]:
import os
from datetime import datetime

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

import seaborn as sns
from sklearn.ensemble import RandomForestRegressor

matplotlib.rcParams['figure.figsize'] = (20, 10)
pd.set_option('display.max_rows', 100)

In [2]:
data_root = os.path.join('..', 'data', 'train')

In [3]:
def add_master_data_mappings(df: pd.DataFrame) -> pd.DataFrame:
    data_root = os.path.join('..', 'data', 'main')
    
    # = Пути к справочникам - откорректировать если в реальной системе будут лежать по другому адресу =
    client_mapping_file = os.path.join(data_root, "client_mapping.csv")
    freight_mapping_file = os.path.join(data_root, "freight_mapping.csv")
    station_mapping_file = os.path.join(data_root, "station_mapping.csv")

    # Клиент - холдинг
    client_mapping = pd.read_csv(
        client_mapping_file,
        sep=";",
        decimal=",",
        encoding="windows-1251",
    )
    df = pd.merge(df, client_mapping, how="left", on="client_sap_id")

    # Груз
    freight_mapping = pd.read_csv(
        freight_mapping_file, sep=";", decimal=",", encoding="windows-1251"
    )
    df = pd.merge(df, freight_mapping, how="left", on="freight_id")

    # Станции
    station_mapping = pd.read_csv(
        station_mapping_file,
        sep=";",
        decimal=",",
        encoding="windows-1251",
    )
    df = pd.merge(
        df,
        station_mapping.add_prefix("sender_"),
        how="left",
        on="sender_station_id",
    )
    df = pd.merge(
        df,
        station_mapping.add_prefix("recipient_"),
        how="left",
        on="recipient_station_id",
    )

    return df


def evaluate(fact: pd.DataFrame, forecast: pd.DataFrame, public: bool = True) -> float:
    # = Параметры для расчета метрики =
    accuracy_granularity = [
        "period",
        "rps",
        "holding_name",
        "sender_department_name",
        "recipient_department_name",
    ]
    fact_value, forecast_value = "real_wagon_count", "forecast_wagon_count"
    if public:
        metric_weight = np.array([0.0, 1.0, 0.0, 0.0, 0.0])
    else:
        metric_weight = np.array([0.1, 0.6, 0.1, 0.1, 0.1])

    # = Собственно расчет метрик =
    # 1. Добавляем сущности верхних уровней гранулярности по справочникам
    fact = add_master_data_mappings(fact)
    forecast = add_master_data_mappings(forecast)
    
    # 2. Расчет KPI
    compare_data = pd.merge(
        fact.groupby(accuracy_granularity, as_index=False)[fact_value].sum(),
        forecast.groupby(accuracy_granularity, as_index=False)[forecast_value].sum(),
        how="outer",
        on=accuracy_granularity,
    ).fillna(0)
    # Против самых хитрых - нецелочисленный прогноз вагоноотправок не принимаем
    compare_data[fact_value] = np.around(compare_data[fact_value]).astype(int)
    compare_data[forecast_value] = np.around(compare_data[forecast_value]).astype(int)

    # 3. Рассчитаем метрики для каждого месяца в выборке
    compare_data["ABS_ERR"] = abs(
        compare_data[forecast_value] - compare_data[fact_value]
    )
    
    compare_data["MAX"] = abs(compare_data[[forecast_value, fact_value]].max(axis=1))
    summary = compare_data.groupby("period")[
        [forecast_value, fact_value, "ABS_ERR", "MAX"]
    ].sum()
    summary["Forecast Accuracy"] = 1 - summary["ABS_ERR"] / summary["MAX"]

    # 4. Взвесим метрики отдельных месяцев для получения одной цифры score
    weighted_summary = summary["Forecast Accuracy"].sort_index(ascending=True) * metric_weight
    
    score = (
        weighted_summary
    ).sum()

    return score, weighted_summary, compare_data


def calc_score_public(fact: pd.DataFrame, forecast: pd.DataFrame) -> float:
    return evaluate(fact, forecast, public=True)


def calc_score_private(fact: pd.DataFrame, forecast: pd.DataFrame) -> float:
    return evaluate(fact, forecast, public=False)

In [4]:
# Факт на задание
SUBMIT_FLAG = True

if SUBMIT_FLAG:
    fact = pd.read_csv(os.path.join('..', 'data', 'main', 'fact_train_test.csv'), sep=";", decimal=",", encoding="windows-1251")
else:
    fact = pd.read_csv(os.path.join(data_root, "train_data.csv"))
    
fact["period"] = fact["period"].astype("datetime64[ns]")
# fact - уже обрезанная выборка

# Пример прогноза - наивный, копия последнего факта
result = []
last_known_fact_month = fact["period"].max()
test_periods = pd.date_range(start=last_known_fact_month, periods=6, freq='MS', inclusive='right')


for period in test_periods:
    print(period)
    month_forecast = fact[fact["period"] == last_known_fact_month].reset_index(drop=True)
    month_forecast["period"] = period
    result.append(month_forecast)
result = pd.concat(result).reset_index(drop=True)
result.rename(columns={"real_wagon_count": "forecast_wagon_count", "real_weight": "forecast_weight"}, inplace=True)
result.to_csv(os.path.join(data_root, "forecast_example.csv"), index=False, sep=";", decimal=",", encoding="windows-1251")

2023-04-01 00:00:00
2023-05-01 00:00:00
2023-06-01 00:00:00
2023-07-01 00:00:00
2023-08-01 00:00:00


In [5]:
# = Примеры файлов для проверки =
validation_file = os.path.join(data_root, "test_data.csv")
forecast_file = os.path.join(data_root, "forecast_example.csv")

if SUBMIT_FLAG:
    # train_file = os.path.join(data_root, 'extended_dataset.csv')
    train_file = os.path.join('..', 'data', 'main', 'fact_train_test.csv')
else:
    train_file = os.path.join(data_root, 'train_data.csv')

# Валидационный датасет
fact = pd.read_csv(validation_file)
print("Валидационный датасет:", fact.shape)
# Прогноз
forecast = pd.read_csv(forecast_file, sep=";", decimal=",", encoding="windows-1251")
print("Прогноз:", forecast.shape)
# Данные для обучения
train_data = pd.read_csv(train_file, sep=";", decimal=",", encoding="windows-1251")

fact['period'] = fact['period'].astype("datetime64[ns]")
forecast['period'] = forecast['period'].astype("datetime64[ns]")
train_data['period'] = train_data['period'].astype("datetime64[ns]")
train_data = add_master_data_mappings(train_data)

if SUBMIT_FLAG:
    fact = pd.concat([fact, result], axis=0)

fact = fact[['period', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'real_weight', 'real_wagon_count']]
forecast = forecast[['period', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'forecast_weight', 'forecast_wagon_count']]
train_file = train_data[['period', 'rps', 'podrod', 'filial', 'client_sap_id', 'freight_id', 'sender_station_id', 'recipient_station_id', 'sender_organisation_id', 'real_weight', 'real_wagon_count']]

Валидационный датасет: (218828, 17)
Прогноз: (239605, 11)


In [6]:
fact

Unnamed: 0,period,rps,podrod,filial,client_sap_id,freight_id,sender_station_id,recipient_station_id,sender_organisation_id,real_weight,real_wagon_count
0,2022-11-01,0,0,0,-1,3495,4893,6913,32616,40.0,1.0
1,2022-11-01,0,0,0,-1,3495,4893,20189,32616,40.0,1.0
2,2022-11-01,0,0,0,-1,15,38963,38966,30964,360.0,6.0
3,2022-11-01,0,0,0,-1,349,38725,38966,27437,329.0,7.0
4,2022-11-01,0,0,0,-1,349,38756,38966,26664,247.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...
239600,2023-08-01,1,5,2,1346,1482,31438,31482,0,,
239601,2023-08-01,1,5,2,1346,1492,31438,36091,27275,,
239602,2023-08-01,1,5,2,1346,1492,31438,35450,27275,,
239603,2023-08-01,1,5,2,1346,1492,31438,31482,0,,


In [7]:
def find_value(data, rps, holding_name, sender_department_name, recipient_department_name):
    return data[(data['rps'] == rps) & (data['holding_name'] == holding_name) & (data['sender_department_name'] == sender_department_name) & (data['recipient_department_name'] == recipient_department_name)]


def fit_predict_series(fact, train_data, rps, holding_name, sender_department_name, recipient_department_name):
    accuracy_granularity = [
        "period",
        "rps",
        "holding_name",
        "sender_department_name",
        "recipient_department_name",
    ]
    
    extended_fact = add_master_data_mappings(fact)
    
    grouped_extended = train_data.groupby(accuracy_granularity, as_index=False).sum()
    grouped_fact = extended_fact.groupby(accuracy_granularity, as_index=False).sum()

    # print(grouped_extended)
    
    temp_data = find_value(grouped_extended, rps, holding_name, sender_department_name, recipient_department_name)
    temp_true = find_value(grouped_fact, rps, holding_name, sender_department_name, recipient_department_name)

    temp_true['period'] = temp_true['period'].astype("datetime64[ns]")

    # print(temp_data)
    
    X_train = np.array([
        temp_data['period'].apply(lambda x: x.month),  # Месяц
        temp_data['period'].apply(lambda x: x.quarter), # Квартал
        temp_data['period'].apply(lambda x: x.month // 3 + 1), # Триместр,
        
    ]
    ).T
    
    y_train = temp_data['real_wagon_count']

    # print(temp_data)
    # print(temp_true)
    
    X_val = np.array([
        temp_true['period'].apply(lambda x: x.month),  # Месяц
        temp_true['period'].apply(lambda x: x.quarter), # Квартал
        temp_true['period'].apply(lambda x: x.month // 3 + 1), # Триместр
    ]
    ).T
    
    y_val = temp_true['real_wagon_count']

    rf_model = RandomForestRegressor()
    rf_model.fit(X_train, y_train)
    
    rf_prediction = rf_model.predict(X_val)

    prediction = temp_true.copy()
    prediction = prediction.rename({'real_wagon_count': 'forecast_wagon_count', 'real_weight': 'forecast_weight'})
    prediction['forecast_wagon_count'] = rf_prediction

    return prediction

In [8]:
def reg_distribution_railcars(wagon_count: int, coef):
    def normalize_array(arr):
        if min(arr) < 0:
            raise ValueError('Negative coefficient')
        sum_arr = sum(arr)
        if sum_arr == 0:
            return np.array([1 / arr.size] * arr.size)
        else:
            normalized_arr = arr / sum(arr)
            return normalized_arr

    coef = normalize_array(coef)
    distributed_integer = np.floor(np.array(coef) * wagon_count).astype(int)

    # Коррекция, чтобы обеспечить сумму равной входному целому числу
    diff = wagon_count - sum(distributed_integer)
    distributed_integer[np.argmax(coef)] += diff

    return distributed_integer


def spread_values_across_rows(input_data, pred_vals, features_list):
    result_list = list()
    
    for ind, item in input_data.groupby(features_list):
        spreading_coefs = item['forecast_wagon_count'] / item['forecast_wagon_count'].sum()

        # print(pred_vals)
        # print(item)
        # print(pred_vals[pred_vals['period'] == item['period'].iloc[0]]['forecast_wagon_count'])
        
        predicted_wagon_count = pred_vals[pred_vals['period'] == item['period'].iloc[0]]['forecast_wagon_count'].values[0]
        
        item['forecast_wagon_count'] = reg_distribution_railcars(predicted_wagon_count, spreading_coefs)

        result_list.append(item)

    return pd.concat(result_list, axis=0)

In [9]:
def update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name):
    forecast = forecast.copy()
    
    # Get prediction for some timeseries 
    prediction_df = fit_predict_series(fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

    # print(prediction_df)
    # assert 1 == 0

    # Add features to the forecast df
    extended_forecast = add_master_data_mappings(forecast)
    selected_data = extended_forecast[(extended_forecast['rps'] == rps) & (extended_forecast['holding_name'] == holding_name) & (extended_forecast['sender_department_name'] == sender_department_name) & (extended_forecast['recipient_department_name'] == recipient_department_name)]

    # Spread prediction across rows
    spreaded_pred = spread_values_across_rows(selected_data, prediction_df, ["period", "rps", "holding_name", "sender_department_name", "recipient_department_name"])

    # Update forecast with new prediction
    forecast.loc[spreaded_pred.index] = spreaded_pred

    return forecast

In [10]:
fact

Unnamed: 0,period,rps,podrod,filial,client_sap_id,freight_id,sender_station_id,recipient_station_id,sender_organisation_id,real_weight,real_wagon_count
0,2022-11-01,0,0,0,-1,3495,4893,6913,32616,40.0,1.0
1,2022-11-01,0,0,0,-1,3495,4893,20189,32616,40.0,1.0
2,2022-11-01,0,0,0,-1,15,38963,38966,30964,360.0,6.0
3,2022-11-01,0,0,0,-1,349,38725,38966,27437,329.0,7.0
4,2022-11-01,0,0,0,-1,349,38756,38966,26664,247.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...
239600,2023-08-01,1,5,2,1346,1482,31438,31482,0,,
239601,2023-08-01,1,5,2,1346,1492,31438,36091,27275,,
239602,2023-08-01,1,5,2,1346,1492,31438,35450,27275,,
239603,2023-08-01,1,5,2,1346,1492,31438,31482,0,,


In [11]:
rps = 1
holding_name = 519
sender_department_name = 79
recipient_department_name = 79

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [12]:
# Скорим
if not SUBMIT_FLAG:
    score_public = calc_score_public(fact, forecast)
    score_private = calc_score_private(fact, forecast)
    print(f"Public score: {score_public[0]}")
    print(f"Private score: {score_private[0]}")

In [13]:
rps = 1
holding_name = 1983
sender_department_name = 79
recipient_department_name = 78

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [14]:
# Скорим
if not SUBMIT_FLAG:
    score_public = calc_score_public(fact, forecast)
    score_private = calc_score_private(fact, forecast)
    print(f"Public score: {score_public[0]}")
    print(f"Private score: {score_private[0]}")

In [15]:
rps = 1
holding_name = 1555
sender_department_name = 43
recipient_department_name = 24

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [16]:
# Скорим
if not SUBMIT_FLAG:
    score_public = calc_score_public(fact, forecast)
    score_private = calc_score_private(fact, forecast)
    print(f"Public score: {score_public[0]}")
    print(f"Private score: {score_private[0]}")

In [17]:
rps = 1
holding_name = 1675
sender_department_name = 75
recipient_department_name = 93

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [18]:
rps = 1
holding_name = 1118
sender_department_name = 101
recipient_department_name = 103

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [19]:
rps = 1
holding_name = 2117
sender_department_name = 26
recipient_department_name = 77

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [20]:
rps = 1
holding_name = 1575
sender_department_name = 78
recipient_department_name = 55

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [21]:
rps = 1
holding_name = 1675
sender_department_name = 75
recipient_department_name = 93

forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")


In [22]:
series_list = [
 [1, 406, 17, 160],
 [1, 1275, 43, 141],
 [1, 962, 133, 160],
 [1, 1118, 25, 135],
 [1, 1575, 79, 130],
 [1, 406, 130, 17],
 [1, 1462, 26, 134],
 [1, 1462, 133, 103],
 [1, 406, 17, 148],
 [1, 2028, 43, 160],
 [1, 582, 97, 136],
 [1, 1675, 119, 150],
 [1, 777, 79, 148],
 [1, 1048, 133, 27],
 [1, 962, 77, 160],
 [1, 406, 171, 17],
 [1, 962, 9, 160],
 [1, 468, 158, 23],
 [1, 1555, 160, 43],
 [1, 784, 132, 58],
 [1, 1195, 111, 148],
 [1, 962, 103, 160],
 [1, 1011, 160, 134],
 [1, 2071, 43, 141],
 [1, 2129, 79, 154],
 [1, 519, 103, 130],
 [0, 395, 77, 147],
 [1, 1616, 55, 161],
 [1, 2117, 134, 123],
 [1, 962, 141, 160],
 [1, 1011, 149, 76],
 [1, 1011, 160, 101],
 [1, 962, 109, 160],
 [1, 1462, 133, 141],
 [1, 468, 23, 136],
 [1, 1575, 154, 161],
 [1, 1048, 160, 16],
 [1, 784, 132, 123],
 [1, 1356, 148, 17],
 [1, 1454, 11, 130],
 [1, 1118, 101, 140],
 [1, 953, 111, 164],
 [1, 1555, 43, 140],
 [1, 74, 138, 123],
 [1, 204, 171, 141],
 [1, 1195, 148, 123],
 [1, 962, 140, 160],
 [1, 2076, 43, 150],
 [1, 887, 133, 36],
 [1, 468, 158, 76],
 [1, 1196, 171, 99],
 [1, 1621, 80, 141],
 [1, 1048, 51, 132],
 [1, 2076, 103, 141],
 [1, 896, 36, 130],
 [1, 2028, 160, 43],
 [1, 1616, 55, 137],
 [1, 962, 160, 16],
 [1, 1363, 136, 158],
 [1, 1616, 79, 137],
 [1, 1356, 119, 146],
 [1, 1273, 120, 171],
 [1, 1575, 78, 136],
 [1, 2117, 26, 130],
 [1, 1011, 160, 120],
 [1, 962, 78, 160],
 [1, 1141, 123, 134],
 [1, 1048, 160, 132],
 [1, 962, 54, 160],
 [0, 602, 146, 139],
 [1, 784, 132, 118],
 [1, 1048, 86, 148],
 [1, 887, 133, 96],
 [1, 1462, 26, 148],
 [1, 1048, 43, 141],
 [1, 962, 160, 78],
 [1, 1011, 160, 103],
 [1, 1011, 149, 23],
 [1, 2028, 160, 110],
 [1, 962, 119, 160],
 [1, 1356, 55, 137],
 [1, 962, 104, 160],
 [1, 1356, 130, 8],
 [1, 1011, 160, 74],
 [1, 953, 17, 160],
 [1, 1195, 79, 148],
 [1, 519, 103, 160],
 [1, 2028, 79, 147],
 [1, 2117, 100, 134],
 [1, 1366, 147, 147],
 [0, 1356, 146, 23],
 [1, 388, 147, 147],
 [1, 2243, 146, 137],
 [1, 962, 160, 79],
 [1, 1917, 16, 141],
 [1, 1195, 148, 99],
 [1, 1356, 103, 146],
 [1, 953, 17, 133],
 [1, 1663, 119, 130],
 [1, 1575, 161, 161],
 [1, 962, 16, 160],
 [1, 962, 160, 74],
 [1, 1356, 130, 146],
 [1, 1866, 160, 16],
 [1, 1862, 79, 130],
 [1, 962, 160, 160],
 [1, 1048, 160, 24],
 [1, 406, 148, 17],
 [0, 1356, 8, 130],
 [1, 1011, 146, 160],
 [1, 1118, 101, 137],
 [1, 1275, 43, 139],
 [1, 1462, 26, 130],
 [1, 962, 43, 160],
 [1, 962, 160, 141],
 [1, 1814, 43, 160],
 [1, 962, 160, 123],
 [1, 1616, 79, 141],
 [1, 1195, 17, 148],
 [0, 602, 146, 154],
 [1, 887, 133, 97],
 [1, 962, 160, 119],
 [1, 1011, 160, 54],
 [1, 1575, 78, 158],
 [1, 962, 160, 24],
 [1, 1196, 43, 133],
 [1, 1454, 11, 147],
 [1, 1575, 154, 23],
 [1, 1533, 160, 109],
 [1, 2028, 103, 160],
 [1, 1363, 8, 136],
 [1, 962, 160, 103],
 [1, 953, 17, 164],
 [1, 1462, 26, 133],
 [1, 1011, 16, 160],
 [1, 962, 160, 36],
 [1, 1011, 43, 160],
 [1, 1356, 55, 154],
 [0, 1489, 138, 143]]

In [23]:
train_data

Unnamed: 0,period,rps,podrod,filial,client_sap_id,freight_id,sender_station_id,recipient_station_id,sender_organisation_id,real_weight,real_wagon_count,holding_name,freight_group_name,sender_department_name,sender_railway_name,recipient_department_name,recipient_railway_name
0,2012-07-01,1,5,1,328,1193,30252,13005,10036,71.0,1,1423.0,21,153,12,75,4
1,2012-10-01,1,5,1,328,1193,30252,11376,10036,210.0,3,1423.0,21,153,12,99,23
2,2014-03-01,0,1,1,328,3472,30252,29548,10036,67.0,1,1423.0,30,153,12,16,11
3,2014-03-01,0,1,1,328,3472,30252,29158,10036,67.0,1,1423.0,30,153,12,118,11
4,2014-03-01,0,2,1,328,3472,30252,27484,10036,66.0,1,1423.0,30,153,12,94,39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3559227,2023-03-01,1,5,2,1346,1482,31438,31482,0,69.0,1,2226.0,22,73,14,143,35
3559228,2023-03-01,1,5,2,1346,1492,31438,36091,27275,70.0,1,2226.0,22,73,14,79,10
3559229,2023-03-01,1,5,2,1346,1492,31438,35450,27275,70.0,1,2226.0,22,73,14,104,10
3559230,2023-03-01,1,5,2,1346,1492,31438,31482,0,207.0,3,2226.0,22,73,14,143,35


In [24]:
for rps, holding_name, sender_department_name, recipient_department_name in series_list:
    # print(rps, holding_name, sender_department_name, recipient_department_name)
    forecast = update_forecast_with_series(forecast, fact, train_data, rps, holding_name, sender_department_name, recipient_department_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_true['period'] = temp_true['period'].astype("datetime64[ns]")
A va

KeyboardInterrupt: 

In [None]:
if SUBMIT_FLAG:
    forecast.to_csv(os.path.join(data_root, 'submition.csv'), index=False, sep=";", decimal=",", encoding="windows-1251")

In [None]:
# score_public[2].sort_values('ABS_ERR')