In [62]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import datetime
import pickle

from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso
from sklearn.model_selection import train_test_split 
from sklearn import metrics, preprocessing, svm

from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters

In [115]:
# read data
df = pd.read_csv('data/train_data.csv')
df.drop(['merchant_name'], axis=1, inplace=True)
df.drop(['category'], axis=1, inplace=True)
df

Unnamed: 0,date,cashback,id
0,2022-02-10,681.27,0
1,2022-02-11,1171.52,0
2,2022-02-12,1068.24,0
3,2022-02-13,1023.07,0
4,2022-02-14,559.56,0
...,...,...,...
27297,2022-10-29,195.97,2547
27298,2022-10-30,223.59,2547
27299,2022-10-31,153.72,2547
27300,2022-11-01,387.80,2547


In [65]:
def model_predict(data, model):
    extraction_settings = ComprehensiveFCParameters()

    X = extract_features(data, column_id='id', column_sort='date', column_value='cashback',
                     default_fc_parameters=extraction_settings,
                     # we impute = remove all NaN features automatically
                     impute_function=impute)

    X = X[[
        'cashback__kurtosis',
        'cashback__mean_second_derivative_central',
        'cashback__fft_aggregated__aggtype_"variance"',
        'cashback__agg_autocorrelation__f_agg_"var"__maxlag_40',
        'cashback__fft_aggregated__aggtype_"centroid"',
        'cashback__cid_ce__normalize_True',
        'cashback__cid_ce__normalize_False',
        'cashback__variation_coefficient',
        'cashback__skewness'
    ]]
    
    return int(model.predict(X)[0])

In [154]:
def test_model(all_data, model, tests=10):
    s_mae = 0
    s_mse = 0
    s_tinkoff_metric = 0

    s_tests = 0
    while s_tests < tests:
        rid = int(np.random.random()*100)%len(set(all_data['id']))
        dx = all_data.loc[all_data['id'] == rid]
        if len(dx.index) > 10:
            r_ind = 1+int(np.random.random()*100)%(len(dx.index)-6)
            test_data = dx[:r_ind]
            test_out = dx[r_ind:r_ind+5]['cashback'].sum()
            pred = model_predict(test_data, model)
            
            s_mae += abs(test_out-pred)
            s_mse += (test_out-pred)**2
            s_tinkoff_metric += (test_out - 5*pred)
            s_tests += 1
            
    return (tests, s_mae/tests, s_mse**0.5/tests, s_tinkoff_metric/tests)
        

In [232]:
def test_site(all_data, tests=10):
    s_tinkoff_metric = 0

    s_tests = 0
    batch_size = 3
    
    while s_tests < tests:
        rid = int(np.random.random()*100)%len(set(all_data['id']))
        dx = all_data.loc[all_data['id'] == rid]

        budget = (dx['cashback'].sum())*np.random.random()
        # POST partner
        
        if len(dx.index) > 10:
            for i in range(0, len(dx.index), batch_size):
                pass
                # PUT cashback from dx[i:i+batch_size]
                # GET partner info
            s_tests+=1
        # GET partner info -> spent_budget
        s_tinkoff_metric += budget - 5*spent_budget
    return s_tinkoff_metric/s_tests

In [233]:
#test_site(df)

In [162]:
# import model
model = Lasso()
with open('models/model_2.0.pkl', 'rb') as inp:
    model = pickle.load(inp)
    print(model.coef_)

[ 3.33953680e+02 -2.59930007e+01 -4.15590525e+00 -1.22823178e+04
 -6.13099526e+02  4.62742613e+02  1.34316447e+00  5.50078129e+03
 -5.73484366e+03]


In [165]:
test_model(df, model)

Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.26it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.31it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.33it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.29it/s]
Feature Extraction: 100%|██████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.14it/s]
Featur

(10, 5251.195999999999, 2035.1593806461449, -33101.191999999995)