In [1]:
import optuna

import pandas as pd
import numpy as np

from prepare_data import read_data

%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
poi_coords = pd.read_excel('../initial_data/terminal_data_hackathon v4.xlsx', sheet_name='TIDS')
cash_ts = pd.read_excel('../initial_data/terminal_data_hackathon v4.xlsx', sheet_name='Incomes')

In [4]:
travel_times = pd.read_csv('../initial_data/times v4.csv')

In [2]:
cash_ts, _, _ = read_data()

In [3]:
cash_ts.index = cash_ts['TID']

In [6]:
poi_coords[:2]

Unnamed: 0,TID,longitude,latitude
0,692835,37.646257,55.742062
1,698656,37.666136,55.731231


In [7]:
cash_ts[:2]

Unnamed: 0_level_0,TID,остаток на 31.08.2022 (входящий),2022-09-01 00:00:00,2022-09-02 00:00:00,2022-09-03 00:00:00,2022-09-04 00:00:00,2022-09-05 00:00:00,2022-09-06 00:00:00,2022-09-07 00:00:00,2022-09-08 00:00:00,...,2022-11-21 00:00:00,2022-11-22 00:00:00,2022-11-23 00:00:00,2022-11-24 00:00:00,2022-11-25 00:00:00,2022-11-26 00:00:00,2022-11-27 00:00:00,2022-11-28 00:00:00,2022-11-29 00:00:00,2022-11-30 00:00:00
TID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
406136,406136,160000,90000,105000,99000,107000,110000,60000,75000,89000,...,91000,78000,0,165000,0,189000,106000,94000,75000,74000
406139,406139,387000,103000,206000,168000,124000,78000,165000,164000,174000,...,164000,153000,151000,157000,206000,182000,123000,138000,112000,179000


In [4]:
PCT = (2/100/365)

INCASSATION_PCT = .0001
INCASSATION_MIN = 100

MAX_DOWNTIME = 14
MAX_AMT = 1_000_000

ARMORED_CAR_PRICE = 20_000
MAX_TRAVEL_TIME = 12 * 60
STOP_TIME = 10

In [5]:
def incassation_price(amount):
    return max(INCASSATION_MIN, INCASSATION_PCT * amount)


def incassation_price_fix(amount, n_cars, n_points):
    return incassation_price(amount) + (n_cars / n_points) * ARMORED_CAR_PRICE


def incassation_price_by_time(amount, waiting_days):
    """Return pencents from start time to time then we NEED to do incassation."""
    # if we have some money and need to wait n_days to next incassation
    # it can be more profitable to do it now 
    return incassation_price(amount) * waiting_days


def amount_price(amount):
    return amount * INCASSATION_PCT


def estimate_waiting_days(amt_history):
    pass

In [24]:
class PoiStats():
    """Counting poi statistics class."""
    
    def __init__(self, sum_dict, top_k=100, start_date=None, weights=(0.5, 0.5)):
        if start_date is None:
            raise ValueError('start_date must be non empty')
        else:
            self.start_date = pd.to_datetime(start_date)
        
        self._state_date = self.start_date
        self.sum_dict = sum_dict
        self.state_dict = {elem: 0 for elem in sum_dict.keys()}
        self.top_k = top_k
        
        self._required_list = []
        self._optional_list = []
        self._daily_list = []
        
        self._n_violations = 0
        self._weights = weights
    
    def update_day(self, day_sum_dict, daily_list=None):
        if daily_list is None:
            self._daily_list = self._get_required_poi() + self._get_optional_poi()
        else:
            self._daily_list = daily_list
        
        for elem in self.sum_dict:
            if elem in self._daily_list:
                self.state_dict[elem] = 0
                self.sum_dict[elem] = day_sum_dict[elem]
            else:
                self.state_dict[elem] += 1
                self.sum_dict[elem] += day_sum_dict[elem]
                
        self._check_violations()
        self._state_date += pd.Timedelta('24 hours')
    
    def _check_violations(self):
        self._n_violations += len([elem for elem, val in self.state_dict.items() if val >= MAX_DOWNTIME])
        self._n_violations += max(0, len(self._required_list) - self.top_k)
    
    def _get_required_poi(self):
        self._required_list = [elem for elem, val in self.sum_dict.items() if val >= MAX_AMT]
        return self._required_list
        
    def _get_optional_poi(self):
        # normalized score is sum of normalized (between 0 and 1) daily and amount score
        self._normalized_dict = {}
        for elem in self.sum_dict:
            self._normalized_dict[elem] = (self._weights[0] * self.sum_dict[elem] / MAX_AMT + 
                                          self._weights[1] * self.state_dict[elem] / MAX_DOWNTIME)
        
        self._optional_list = [
            elem for elem, _ in sorted(self._normalized_dict.items(), 
            key=lambda x: x[1], reverse=True) if elem not in self._required_list][:max(0, self.top_k - len(self._required_list))
        ]
        
        return self._optional_list

In [10]:
def objective_function(trial):
    amt_weight = trial.suggest_float("amt_weight", 0.01, 0.99)
    obj = 10**6
    
    for n_points in range(10, 220, 10):
        stat_obj = PoiStats(cash_ts[days[0]].to_dict(), n_points, start_date='2022-08-31 00:00:00', 
                           weights=(amt_weight, 1 - amt_weight))
    
        for day in days[1:]:
            stat_obj.update_day(cash_ts[day].to_dict())
            
        step_obj = int(stat_obj._n_violations > 0) * 10**5 + n_points 
        
        if obj > step_obj:
            obj = step_obj
        
    return obj

In [11]:
days = cash_ts.columns[1:]

In [12]:
study = optuna.create_study(sampler=optuna.samplers.TPESampler(seed=2023))
study.optimize(objective_function, n_trials=10)

print(study.best_params, study.best_value)

[32m[I 2023-05-24 19:36:05,967][0m A new study created in memory with name: no-name-b671209e-f073-4088-94d2-a8d623d6a633[0m
[32m[I 2023-05-24 19:36:11,236][0m Trial 0 finished with value: 160.0 and parameters: {'amt_weight': 0.3255485379121284}. Best is trial 0 with value: 160.0.[0m
[32m[I 2023-05-24 19:36:16,094][0m Trial 1 finished with value: 100010.0 and parameters: {'amt_weight': 0.8826140027536912}. Best is trial 0 with value: 160.0.[0m
[32m[I 2023-05-24 19:36:21,104][0m Trial 2 finished with value: 100010.0 and parameters: {'amt_weight': 0.5862912103414644}. Best is trial 0 with value: 160.0.[0m
[32m[I 2023-05-24 19:36:26,444][0m Trial 3 finished with value: 130.0 and parameters: {'amt_weight': 0.13406417163420542}. Best is trial 3 with value: 130.0.[0m
[32m[I 2023-05-24 19:36:31,800][0m Trial 4 finished with value: 130.0 and parameters: {'amt_weight': 0.14851439812335535}. Best is trial 3 with value: 130.0.[0m
[32m[I 2023-05-24 19:36:36,938][0m Trial 5 finis

{'amt_weight': 0.031647868385793594} 120.0


In [13]:
if study.best_value > len(cash_ts):
    raise NameError("There is no parameters without 14-days rule violation")

In [14]:
stat_obj = PoiStats(cash_ts[days[0]].to_dict(), int(study.best_value), start_date='2022-08-31 00:00:00', 
                           weights=(study.best_params['amt_weight'], 1 - study.best_params['amt_weight']))

for day in days[1:]:
    stat_obj.update_day(cash_ts[day].to_dict())
    print(stat_obj._state_date, len(stat_obj._required_list), stat_obj._n_violations)
    if int(study.best_value) < len(stat_obj._required_list):
        raise NameError("There is no parameters without 1 mln RUB rule violation")

2022-09-01 00:00:00 11 0
2022-09-02 00:00:00 0 0
2022-09-03 00:00:00 2 0
2022-09-04 00:00:00 2 0
2022-09-05 00:00:00 2 0
2022-09-06 00:00:00 3 0
2022-09-07 00:00:00 11 0
2022-09-08 00:00:00 13 0
2022-09-09 00:00:00 23 0
2022-09-10 00:00:00 25 0
2022-09-11 00:00:00 28 0
2022-09-12 00:00:00 33 0
2022-09-13 00:00:00 27 0
2022-09-14 00:00:00 51 0
2022-09-15 00:00:00 30 0
2022-09-16 00:00:00 27 0
2022-09-17 00:00:00 32 0
2022-09-18 00:00:00 17 0
2022-09-19 00:00:00 23 0
2022-09-20 00:00:00 21 0
2022-09-21 00:00:00 23 0
2022-09-22 00:00:00 20 0
2022-09-23 00:00:00 26 0
2022-09-24 00:00:00 32 0
2022-09-25 00:00:00 26 0
2022-09-26 00:00:00 25 0
2022-09-27 00:00:00 21 0
2022-09-28 00:00:00 27 0
2022-09-29 00:00:00 27 0
2022-09-30 00:00:00 25 0
2022-10-01 00:00:00 16 0
2022-10-02 00:00:00 26 0
2022-10-03 00:00:00 30 0
2022-10-04 00:00:00 26 0
2022-10-05 00:00:00 19 0
2022-10-06 00:00:00 29 0
2022-10-07 00:00:00 29 0
2022-10-08 00:00:00 28 0
2022-10-09 00:00:00 23 0
2022-10-10 00:00:00 27 0
2022-

In [27]:
def objective_function(trial):
    amt_weight = trial.suggest_float("amt_weight", 0.01, 0.99)
    n_points = trial.suggest_int("n_points", 90, 150)
    obj = 10**6

    stat_obj = PoiStats(cash_ts[days[0]].to_dict(), n_points, start_date='2022-08-31 00:00:00', 
                        weights=(amt_weight, 1 - amt_weight))

    for day in days[1:]:
        stat_obj.update_day(cash_ts[day].to_dict())
        
    step_obj = int(stat_obj._n_violations > 0) * 10**5 + n_points 
    
    if obj > step_obj:
        obj = step_obj
        
    return obj

In [28]:
days = cash_ts.columns[1:]

study = optuna.create_study(sampler=optuna.samplers.TPESampler(seed=2023))
study.optimize(objective_function, n_trials=100)

print(study.best_params, study.best_value)

[32m[I 2023-05-27 00:42:15,943][0m A new study created in memory with name: no-name-e8843c9d-a8cd-46b4-8627-8d7500968ab5[0m
[32m[I 2023-05-27 00:42:16,196][0m Trial 0 finished with value: 100144.0 and parameters: {'amt_weight': 0.3255485379121284, 'n_points': 144}. Best is trial 0 with value: 100144.0.[0m
[32m[I 2023-05-27 00:42:16,440][0m Trial 1 finished with value: 100097.0 and parameters: {'amt_weight': 0.5862912103414644, 'n_points': 97}. Best is trial 1 with value: 100097.0.[0m
[32m[I 2023-05-27 00:42:16,693][0m Trial 2 finished with value: 100118.0 and parameters: {'amt_weight': 0.14851439812335535, 'n_points': 118}. Best is trial 1 with value: 100097.0.[0m
[32m[I 2023-05-27 00:42:16,953][0m Trial 3 finished with value: 134.0 and parameters: {'amt_weight': 0.031647868385793594, 'n_points': 134}. Best is trial 3 with value: 134.0.[0m
[32m[I 2023-05-27 00:42:17,195][0m Trial 4 finished with value: 100123.0 and parameters: {'amt_weight': 0.5238995954267781, 'n_poin

{'amt_weight': 0.07387688919651192, 'n_points': 133} 133.0


In [13]:
best_amt_weight = 0.040993543384900855
best_value = 116

stat_obj = PoiStats(
    cash_ts[days[0]].to_dict(),
    int(best_value),
    start_date='2022-08-31 00:00:00', 
    weights=(best_amt_weight, 1 - best_amt_weight)
)

day_required_terminals = {}

for day in days[1:]:
    stat_obj.update_day(cash_ts[day].to_dict())
    day_required_terminals[day] = stat_obj._daily_list
    # print(stat_obj._state_date, len(stat_obj._required_list), stat_obj._n_violations)
    if int(best_value) < len(stat_obj._required_list):
        raise NameError("There is no parameters without 1 mln RUB rule violation")

In [15]:
sum([1 for v in day_required_terminals.values() if 699664 in v])

7

In [19]:
[day for day, v in day_required_terminals.items() if 699664 in v]

['2022-09-12 00:00:00',
 '2022-09-24 00:00:00',
 '2022-10-06 00:00:00',
 '2022-10-18 00:00:00',
 '2022-10-30 00:00:00',
 '2022-11-11 00:00:00',
 '2022-11-23 00:00:00']

In [25]:
cash_ts, _, _ = read_data()
cash_ts.index = cash_ts['TID']

days = cash_ts.columns[1:]

# study = optuna.create_study(sampler=optuna.samplers.TPESampler(seed=2023))
# study.optimize(objective_function, n_trials=10)
# print(study.best_params, study.best_value)

best_amt_weight = 0.040993543384900855
best_value = 116

stat_obj = PoiStats(
    cash_ts[days[0]].to_dict(),
    int(best_value),
    start_date='2022-08-31 00:00:00', 
    weights=(best_amt_weight, 1 - best_amt_weight)
)

day_required_terminals = {}

for day in days[1:]:
    stat_obj.update_day(cash_ts[day].to_dict())
    day_required_terminals[day] = stat_obj._daily_list
    # print(stat_obj._state_date, len(stat_obj._required_list), stat_obj._n_violations)
    if int(best_value) < len(stat_obj._required_list):
        raise NameError("There is no parameters without 1 mln RUB rule violation")

In [23]:
[day for day, v in day_required_terminals.items() if 699664 in v]

['2022-09-12 00:00:00',
 '2022-09-24 00:00:00',
 '2022-10-06 00:00:00',
 '2022-10-18 00:00:00',
 '2022-10-30 00:00:00',
 '2022-11-11 00:00:00',
 '2022-11-23 00:00:00']