In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
from datetime import date, timedelta
import json
from tqdm import tqdm
import os 
from exponenta import get_stat
from get_data_all_sources import get_data, get_mpstats_year, make_mpstats_request
from make_prediction import Seq2Seq, BiLSTMAutoencoder
from net_expect import ExpectParams
from net_trend_bi import PredictTrendBi

WEIGHTS_DIR = 'data/'
PRODUCTS_DIR = 'data/products/'
NUM_DAYS_TO_PREDICT = 14
# параметры BiLSTM 
INPUT_DIM = 2
HIDDEN_DIM = 128
NUM_LAYERS = 1
# параметры GRU  
INPUT_SIZE = 2
HIDDEN_SIZE = 256
OUTPUT_SIZE = 1
# параметры новой сетки
NEW_INPUT_SIZE = 3
NEW_NUM_LAYERS = 2
EPS = 1e-5

In [2]:
%load_ext autoreload
%autoreload 2

# Старый метод


In [3]:
class OldBiLSTMMethod:
    """ предсказывает на один день вперёд """
    def __init__(self, df:pd.DataFrame, tomm_price, min_price, max_price) -> None:
        self.df = df
        self.tomm_price = tomm_price
        self.min_price = min_price
        self.max_price = max_price
        
    def find_trend(self, data):
        """
        data: np.array of shape (1, price.size, 2)
        """
        model = BiLSTMAutoencoder(INPUT_DIM, HIDDEN_DIM, NUM_LAYERS)
        model.load_state_dict(torch.load(WEIGHTS_DIR+'trend_bilstm', map_location='cpu'))
        
        trend = model(data)
        trend = trend.detach().numpy().squeeze()
        trend = trend/trend[-1]
        return trend
    
    def predict_demand(self, data, b):
        model = Seq2Seq(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
        model.load_state_dict(torch.load(WEIGHTS_DIR+'trend_gru', map_location='cpu'))
        predicted_trend = model(data, num_step=1).detach().numpy().flatten()
        
        new_demand = np.exp(b[0]+b[1]*self.tomm_price)*predicted_trend
        return new_demand
    
    def run(self,):
        self.df.sales = self.df.sales.astype(float)
        self.df.price = self.df.price.astype(int)
        order = self.df.sales.to_numpy(copy=True)
        price = self.df.price.to_numpy(copy=True)
        price_norm = (price-self.min_price)/(self.max_price-self.min_price)
        # extract trend
        data = torch.zeros(1, price_norm.size, 2, dtype=torch.float32)
        data[:,:,0] = torch.tensor(price_norm)
        data[:,:,1] = torch.tensor(order)
        trend = self.find_trend(data)
        discount_order = order/trend
        # exponenta model; returns vector of shape (5, -1) or (2, 1)
        b = get_stat(price_norm, discount_order)
        data_perm = data.permute([1,0,2])
        new_demand = self.predict_demand(data_perm, b)
        return new_demand

# Новый метод

In [33]:
class NewMethod:
    """ предсказывает на один день вперёд """
    def __init__(self, df:pd.DataFrame, tomm_price, min_price, max_price) -> None:
        self.df = df
        self.tomm_price = tomm_price
        self.min_price = min_price
        self.max_price = max_price
        self.expect_params = ExpectParams(3, 128)
        self.expect_params.load_state_dict(torch.load(WEIGHTS_DIR+'expect_128_1', weights_only=False, map_location='cpu'))
        
    def predict_demand(self, data):
        # print(self.min_price, self.max_price)
        a, b = self.expect_params(data).detach().numpy().flatten() 
        # print(data.shape)
        # print(data[:, -1, :])
        # print(a, b)
        new_demand = np.exp(b - a*self.tomm_price)
        # print(new_demand)
        return new_demand
    
    def run(self,):
        self.df.sales = self.df.sales.astype(float)
        self.df.price = self.df.price.astype(float)
        self.df.balance = self.df.balance.astype(float)
        order = self.df.sales.to_numpy(copy=True)
        price = self.df.price.to_numpy(copy=True)
        balance = self.df.balance.to_numpy(copy=True)
        # adding mask for predicting the next day
        bal = np.ones_like(balance)
        bal[balance<order] = 0
        balance = bal.copy()
        # balance[balance<order] = 0
        price_norm = (price-self.min_price)/(self.max_price-self.min_price)
        # extract trend
        data = torch.zeros(1, price_norm.size, 3, dtype=torch.float32)
        data[:,:,0] = torch.tensor(balance)
        data[:,:,1] = torch.tensor(price_norm)
        data[:,:,2] = torch.tensor(order)
        new_demand = self.predict_demand(data)
        return new_demand

In [5]:
class NewV2Method:
    """ предсказывает на один день вперёд """
    def __init__(self, df:pd.DataFrame, tomm_price, min_price, max_price) -> None:
        self.df = df
        self.tomm_price = tomm_price
        self.min_price = min_price
        self.max_price = max_price
        
    def find_trend(self, data):
        """
        data: np.array of shape (1, price.size, 3)
        """
        model = PredictTrendBi(NEW_INPUT_SIZE, HIDDEN_DIM, NEW_NUM_LAYERS)
        model.load_state_dict(torch.load(WEIGHTS_DIR+'trend_2enc_1dec_128_bi', weights_only=False, \
            map_location='cpu'))
        
        trend = model(data)
        trend = trend.detach().numpy().squeeze()
        trend = trend/trend[-1]
        return trend
    
    def predict_demand(self, data, b):
        model = Seq2Seq(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
        model.load_state_dict(torch.load(WEIGHTS_DIR+'trend_gru', map_location='cpu'))
        predicted_trend = model(data, num_step=1).detach().numpy().flatten()
        
        new_demand = np.exp(b[0]+b[1]*self.tomm_price)*predicted_trend
        return new_demand
    
    def run(self,):
        self.df.sales = self.df.sales.astype(float)
        self.df.price = self.df.price.astype(int)
        self.df.balance = self.df.balance.astype(int)
        order = self.df.sales.to_numpy(copy=True)
        price = self.df.price.to_numpy(copy=True)
        balance = self.df.balance.to_numpy(copy=True)
        price_norm = (price-self.min_price)/(self.max_price-self.min_price)
        # extract trend
        data = torch.zeros(1, price_norm.size, 3, dtype=torch.float32)
        data[:,:,0] = torch.tensor(balance)
        data[:,:,1] = torch.tensor(price_norm)
        data[:,:,2] = torch.tensor(order)
        trend = self.find_trend(data)
        discount_order = order/trend
        # exponenta model; returns vector of shape (5, -1) or (2, 1)
        # B = balance == 1
        # b = get_stat(price_norm[B], discount_order[B])
        b = get_stat(price_norm, discount_order)
        data_perm = data[:, :, 1:]
        data_perm = data_perm.permute([1,0,2])
        new_demand = self.predict_demand(data_perm, b)
        return new_demand

In [247]:
data_t = pd.read_csv(PRODUCTS_DIR+f'{52711355}.csv')
num_days_to_predict = 30
df = data_t[:-num_days_to_predict].copy()
df_test = data_t[-num_days_to_predict:].copy()
df.sales = df.sales.astype(float)
df.price = df.price.astype(int)
df.balance = df.balance.astype(int)

min_price, max_price = df.price.min(), df.price.max()
order = df.sales.to_numpy(copy=True)
price = df.price.to_numpy(copy=True)
balance = df['balance'].to_numpy(copy=True)
df.loc[:, 'balance'] = 1
balance[balance<=order] = 0
df.loc[:, 'balance'] = balance
price_norm = (price-min_price)/(max_price-min_price)
# extract trend
data = torch.zeros(1, price_norm.size, 3, dtype=torch.float32)
data[:,:,0] = torch.tensor(balance)
data[:,:,1] = torch.tensor(price_norm)
data[:,:,2] = torch.tensor(order)

In [220]:
model = PredictTrendBi(NEW_INPUT_SIZE, HIDDEN_DIM, NEW_NUM_LAYERS)
model.load_state_dict(torch.load(WEIGHTS_DIR+'trend_2enc_1dec_128_bi', weights_only=False, \
    map_location='cpu'))

trend = model(data)
trend = trend.detach().numpy().squeeze()
trend = trend/trend[-1]

In [250]:
def predict_demand(data, b, tomm_price):
    model = Seq2Seq(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
    model.load_state_dict(torch.load(WEIGHTS_DIR+'trend_gru', map_location='cpu'))
    predicted_trend = model(data, num_step=1).detach().numpy().flatten()
    new_demand = np.exp(b[0]+b[1]*tomm_price)*predicted_trend
    return new_demand

discount_order = order/trend
B = balance == 1
b = get_stat(price_norm[B], discount_order[B])
b = get_stat(price_norm, discount_order)
data_perm = data[:, :, 1:]
data_perm = data_perm.permute([1,0,2])
tomm_price = df_test.sales.iloc[0]
new_demand = predict_demand(data_perm, b, tomm_price)
new_demand

LinAlgError: Singular matrix

In [34]:
def simulate_all_skus(skus, num_days_to_predict, **kwargs):
    problems = set()
    diverged = set()
    res_old = list()
    res_new = list()
    res_new_v2 = list()
    res_skus = list()
    for sku in tqdm(skus):
        print(sku)
        print('================================')
        demands_old = list()
        demands_new = list()
        demands_new_v2 = list()
        path_file = os.path.join(PRODUCTS_DIR, f'{sku}.csv')
        if os.path.isfile(path_file):
            df = pd.read_csv(path_file)
        else:
            df = get_data(sku)
            df.to_csv(path_file, index=False)
        order = df.sales.to_numpy(copy=True)
        df.loc[:, 'balance'] = 1
        balance = df.balance.to_numpy(copy=True)
        balance[balance<order] = 0
        df.loc[:, 'balance'] = balance
        train_df = df[:-num_days_to_predict].copy()
        test_df = df[-num_days_to_predict:].copy()
        demands_true = test_df.sales.to_numpy(copy=True)
        # средняя продажа больше 1
        # if demands_true.mean() < (1 - 1e-6):
        #     continue
        try:
            for step in range(test_df.shape[0]):
                # сразу добавляем последнюю цену, чтобы она учитывалась в краевых значениях
                train_df = pd.concat([train_df.copy(), test_df.iloc[step].to_frame().transpose()])
                min_price, max_price = train_df.price.min(), train_df.price.max()
                tomm_price = test_df.copy().price.iloc[step]
                tomm_price = (tomm_price-min_price)/(max_price-min_price)
                
                # old = OldBiLSTMMethod(train_df.iloc[:, :-1].copy(), tomm_price, min_price, max_price)
                # demand_old = old.run(**kwargs)
                # demands_old.append(demand_old)
                
                new = NewMethod(train_df.iloc[:-1, :].copy(), tomm_price, min_price, max_price)
                demand_new = new.run(**kwargs)
                demands_new.append(demand_new)
                
                # new_v2 = NewV2Method(train_df.copy(), tomm_price, min_price, max_price)
                # demand_new_v2 = new_v2.run(**kwargs)
                # demands_new_v2.append(demand_new_v2)
        except (RuntimeError, RuntimeWarning):
                diverged.add(sku)
        except (ValueError, TypeError):
                problems.add(sku)
        # demands_old = np.array(demands_old).flatten() 
        demands_new = np.array(demands_new).flatten()
        # demands_new_v2 = np.array(demands_new_v2).flatten()
        # res_old.append((demands_old.sum() - demands_true.sum())/(demands_true.sum() + EPS))
        res_new.append((demands_new.sum() - demands_true.sum())/demands_true.sum())
        # res_new_v2.append((demands_new_v2.sum() - demands_true.sum())/(demands_true.sum() + EPS))
        res_skus.append(sku)
    # res_df = pd.DataFrame([np.abs(res_old), np.abs(res_new), np.abs(res_new_v2), \
        # res_skus]).T.reset_index(drop=True)
    res_df = pd.DataFrame([np.abs(res_new), res_skus]).T.dropna().reset_index(drop=True)
    # res_df.columns = ['old', 'new', 'new_v2', 'sku'] 
    res_df.columns = ['new', 'sku']
    res_df.sku = res_df.sku.astype(int)
    return res_df, problems, diverged

# Первый список

In [35]:
# sample = pd.read_excel('Выборки_товаров_для_ИИ_итоговый_24_05_06.xlsx', sheet_name=3)
# skus = sample['Номенклатура'].to_numpy(copy=True)

In [36]:
L = os.listdir('data/products/')
skus = [int(sku[:-4]) for sku in L]

In [None]:
res_df, problems, diveged = simulate_all_skus(skus, 30)

In [60]:
problems, diveged

(set(), set())

In [88]:
res_df.mean().tolist()

[0.3087477201145238, 84626283.26829268]

In [17]:
res_df.mean().tolist()

[0.44134537964393156,
 0.6495521126868835,
 0.22970833237212573,
 84626283.26829268]

In [None]:
res_df

In [170]:
res_df.mean().tolist()

[0.44134537964393156,
 0.23078187370694683,
 0.22525083071975918,
 84626283.26829268]

In [167]:
res_df_ch = res_df.copy()

In [171]:
res_df_nch = res_df.copy()

In [192]:
res_df_ch.drop(columns='sku')[res_df_ch != res_df_nch].dropna()

Unnamed: 0,old,new,new_v2
17,0.949054,0.952772,1.0
24,0.955756,0.969433,1.0
28,0.972675,0.974825,1.0
32,0.953416,0.960418,1.0
37,0.937864,0.959995,1.0
40,0.95401,0.954797,1.0


In [197]:
t = pd.read_csv(PRODUCTS_DIR+f'{52711355}.csv')
t.sales[-30:].sum()

139

In [191]:
res_df_ch.drop([17, 24, 28, 32, 37, 40]).mean().tolist()

[0.43898622975315993,
 0.24983580205218361,
 0.7309191021415326,
 88876117.14285715]

In [187]:
res_df_nch.iloc[17].tolist()

[0.6144228705900813, 0.1840638095546471, 0.11792013294245896, 52711355.0]

In [32]:
res_df[res_df.new < res_df.old].shape[0]/res_df.shape[0], res_df[res_df.old < res_df.new].shape[0]/res_df.shape[0]

(0.4, 0.6)

In [33]:
res_df.mean().to_numpy(copy=True).tolist()[:2]

[0.46356538883995935, 0.6475075012041599]

# Второй список

In [None]:
sample = pd.read_excel('Выборки_товаров_для_ИИ_21.06.xlsx', sheet_name=3)
skus = sample['NM ID'].to_numpy(copy=True)
res_df2, _, _ = simulate_all_skus(skus, 30)

In [17]:
res_df2.drop([36, 42, 31]).mean().to_numpy(copy=True).tolist()[:2]

[0.38418689468856015, 2.460955168762658]

In [None]:
res_df2

In [None]:
t = pd.read_csv(PRODUCTS_DIR+f'{52711355}.csv')
t.sales.value_counts()
t.iloc[-30:]