In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import random
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import math
import gc

from sklearn.metrics import roc_auc_score

import lightgbm as lgbm

import pickle
def dump_pkl(data, filename):
  with open(filename, 'wb') as handle:
    pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)

def load_pkl(filename):
  with open(filename, 'rb') as handle:
    data = pickle.load(handle)
  return data

In [4]:
pd.set_option('display.max_rows', 5)
pd.set_option('display.max_columns', 50)

In [7]:
df = load_pkl('data/feat_engin/lgbm/data_1hour_1day.pkl')

In [10]:
model = load_pkl('./model.pkl')

features_fe = ['close_w14_min/close_w14_max',
 'close_1day_w5_norm_std/close_1day_w100_norm_std',
 'close_1day/close_1day_w100_ma',
 'volume_1day_w20_rsi',
 'close_1day_w3_std/close_1day_w100_std',
 'close_1day_w20_roc',
 'close_w70_lvl_1.02-1.03',
 'tmos_close_1day_w5_norm_std',
 'tmos_close_w14_ma/tmos_close_w70_ma',
 'close_1day_w5_ma/close_1day_w100_ma',
 'tmos_close/tmos_close_w70_ma',
 'tmos_close_w14_max/tmos_close',
 'close_1day_w20_norm_std',
 'close_w14_mean_abs_pct/close_w70_mean_abs_pct',
 'tmos_close_1day_w20_rsi',
 'tmos_close_w70_lvl_1-1.005',
 'tmos_close_1day_w5_min/tmos_close_1day_w5_max',
 'close_1day_w100_lvl_1.02-1.03',
 'close_1day_w20_min/close_1day_w20_max',
 'close_w14_mean_abs_pct']

THRESHOLD = 0.3487470259478544

In [12]:
df['y_pred'] = model.predict(df[features_fe])

In [14]:
df['y_pred']

0          0.228674
1          0.228674
             ...   
1006241    0.125887
1006242    0.126093
Name: y_pred, Length: 1006243, dtype: float64

In [16]:
D_sample = lgbm.Dataset(df[features_fe], feature_name=features_fe, categorical_feature=[], free_raw_data=False)

In [18]:
(df['y_pred'].values == model.predict(D_sample.data)).all()

True

In [131]:
hour = 20

mask_hour = np.array((pd.Timestamp(f'2025-01-29 {hour-1}:00:00') <= df['time']) & (df['time'] < pd.Timestamp(f'2025-01-29 {hour}:00:00')))
mask_hour.sum()

125

In [133]:
df_hour = df.loc[mask_hour, ['time', 'ticker', 'close', 'y_pred']+features_fe].sort_values(by='ticker')
df_hour

Unnamed: 0,time,ticker,close,y_pred,close_w14_min/close_w14_max,close_1day_w5_norm_std/close_1day_w100_norm_std,close_1day/close_1day_w100_ma,volume_1day_w20_rsi,close_1day_w3_std/close_1day_w100_std,close_1day_w20_roc,close_w70_lvl_1.02-1.03,tmos_close_1day_w5_norm_std,tmos_close_w14_ma/tmos_close_w70_ma,close_1day_w5_ma/close_1day_w100_ma,tmos_close/tmos_close_w70_ma,tmos_close_w14_max/tmos_close,close_1day_w20_norm_std,close_w14_mean_abs_pct/close_w70_mean_abs_pct,tmos_close_1day_w20_rsi,tmos_close_w70_lvl_1-1.005,tmos_close_1day_w5_min/tmos_close_1day_w5_max,close_1day_w100_lvl_1.02-1.03,close_1day_w20_min/close_1day_w20_max,close_w14_mean_abs_pct
418070,2025-01-29 19:00:00,ABIO,89.639999,0.123648,0.992471,0.111778,1.051965,49.351826,0.114424,0.082790,5.0,0.006858,1.00308,1.055154,1.002856,1.004673,0.044446,0.764855,64.625847,21.0,0.982919,1.0,0.826998,0.002132
401371,2025-01-29 19:00:00,ABRD,190.600006,0.172919,0.981308,0.375051,0.997651,49.647011,0.198203,0.076136,6.0,0.006858,1.00308,1.018510,1.002856,1.004673,0.044309,0.717117,64.625847,21.0,0.982919,0.0,0.855289,0.003529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213197,2025-01-29 19:00:00,YAKG,60.500000,0.163792,0.960769,0.215715,1.032106,50.911797,0.056447,0.102004,2.0,0.006858,1.00308,1.013170,1.002856,1.004673,0.030550,1.082757,64.625847,21.0,0.982919,1.0,0.896694,0.008697
379915,2025-01-29 19:00:00,YDEX,4091.000000,0.144124,0.971605,0.150446,1.067156,40.791328,0.199303,0.065963,0.0,0.006858,1.00308,1.062374,1.002856,1.004673,0.025060,1.562253,64.625847,21.0,0.982919,2.0,0.919917,0.005079


#### load dev data

In [137]:
#prediction
df_dev_res = load_pkl(f'./dev/data/result/data_result_2025-01-29_{hour}.pkl')
df_dev_res = df_dev_res.sort_values(by='ticker')
df_dev_res

Unnamed: 0,time,ticker,close,y_pred,y_pred_bin,lower_bound,upper_bound
52,2025-01-29 19:00:00,ABIO,89.64,0.123648,0,87.8472,93.2256
50,2025-01-29 19:00:00,ABRD,190.60,0.172919,0,186.7880,198.2240
...,...,...,...,...,...,...,...
25,2025-01-29 19:00:00,YAKG,60.50,0.163792,0,59.2900,62.9200
47,2025-01-29 19:00:00,YDEX,4091.00,0.144124,0,4009.1800,4254.6400


In [139]:
#features
df_dev_lgbm = load_pkl(f'./dev/data/lgbm/data_lgbm_2025-01-29_{hour}.pkl')
df_dev_lgbm = df_dev_lgbm.sort_values(by='ticker')
df_dev_lgbm

Unnamed: 0,time,ticker,close,time_1day,close_w14_min/close_w14_max,close_1day_w5_norm_std/close_1day_w100_norm_std,close_1day/close_1day_w100_ma,volume_1day_w20_rsi,close_1day_w3_std/close_1day_w100_std,close_1day_w20_roc,close_w70_lvl_1.02-1.03,tmos_close_1day_w5_norm_std,tmos_close_w14_ma/tmos_close_w70_ma,close_1day_w5_ma/close_1day_w100_ma,tmos_close/tmos_close_w70_ma,tmos_close_w14_max/tmos_close,close_1day_w20_norm_std,close_w14_mean_abs_pct/close_w70_mean_abs_pct,tmos_close_1day_w20_rsi,tmos_close_w70_lvl_1-1.005,tmos_close_1day_w5_min/tmos_close_1day_w5_max,close_1day_w100_lvl_1.02-1.03,close_1day_w20_min/close_1day_w20_max,close_w14_mean_abs_pct
52,2025-01-29 19:00:00,ABIO,89.64,2025-01-28 03:00:00,0.992471,0.111778,1.051965,49.351825,0.114424,0.082790,5.0,0.006858,1.00308,1.055154,1.002856,1.004673,0.044446,0.764855,64.62585,21.0,0.982919,1.0,0.826998,0.002132
50,2025-01-29 19:00:00,ABRD,190.60,2025-01-28 03:00:00,0.981308,0.375051,0.997651,49.647009,0.198203,0.076136,6.0,0.006858,1.00308,1.018510,1.002856,1.004673,0.044309,0.717117,64.62585,21.0,0.982919,0.0,0.855289,0.003529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,2025-01-29 19:00:00,YAKG,60.50,2025-01-28 03:00:00,0.960769,0.215715,1.032106,50.911797,0.056447,0.102004,2.0,0.006858,1.00308,1.013170,1.002856,1.004673,0.030550,1.082757,64.62585,21.0,0.982919,1.0,0.896694,0.008697
47,2025-01-29 19:00:00,YDEX,4091.00,2025-01-28 03:00:00,0.971605,0.150446,1.067156,40.791327,0.199303,0.065963,0.0,0.006858,1.00308,1.062374,1.002856,1.004673,0.025060,1.562253,64.62585,21.0,0.982919,2.0,0.919917,0.005079


In [141]:
(df_dev_res['ticker'] == df_dev_lgbm['ticker']).all(), (df_dev_res['time'] == df_dev_lgbm['time']).all()

(True, True)

In [144]:


df_dev = df_dev_res.merge(df_dev_lgbm, on=['time', 'ticker'], how='inner', suffixes=('', '2'))

df_dev

Unnamed: 0,time,ticker,close,y_pred,y_pred_bin,lower_bound,upper_bound,close2,time_1day,close_w14_min/close_w14_max,close_1day_w5_norm_std/close_1day_w100_norm_std,close_1day/close_1day_w100_ma,volume_1day_w20_rsi,close_1day_w3_std/close_1day_w100_std,close_1day_w20_roc,close_w70_lvl_1.02-1.03,tmos_close_1day_w5_norm_std,tmos_close_w14_ma/tmos_close_w70_ma,close_1day_w5_ma/close_1day_w100_ma,tmos_close/tmos_close_w70_ma,tmos_close_w14_max/tmos_close,close_1day_w20_norm_std,close_w14_mean_abs_pct/close_w70_mean_abs_pct,tmos_close_1day_w20_rsi,tmos_close_w70_lvl_1-1.005,tmos_close_1day_w5_min/tmos_close_1day_w5_max,close_1day_w100_lvl_1.02-1.03,close_1day_w20_min/close_1day_w20_max,close_w14_mean_abs_pct
0,2025-01-29 19:00:00,ABIO,89.64,0.123648,0,87.8472,93.2256,89.64,2025-01-28 03:00:00,0.992471,0.111778,1.051965,49.351825,0.114424,0.082790,5.0,0.006858,1.00308,1.055154,1.002856,1.004673,0.044446,0.764855,64.62585,21.0,0.982919,1.0,0.826998,0.002132
1,2025-01-29 19:00:00,ABRD,190.60,0.172919,0,186.7880,198.2240,190.60,2025-01-28 03:00:00,0.981308,0.375051,0.997651,49.647009,0.198203,0.076136,6.0,0.006858,1.00308,1.018510,1.002856,1.004673,0.044309,0.717117,64.62585,21.0,0.982919,0.0,0.855289,0.003529
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,2025-01-29 19:00:00,YAKG,60.50,0.163792,0,59.2900,62.9200,60.50,2025-01-28 03:00:00,0.960769,0.215715,1.032106,50.911797,0.056447,0.102004,2.0,0.006858,1.00308,1.013170,1.002856,1.004673,0.030550,1.082757,64.62585,21.0,0.982919,1.0,0.896694,0.008697
124,2025-01-29 19:00:00,YDEX,4091.00,0.144124,0,4009.1800,4254.6400,4091.00,2025-01-28 03:00:00,0.971605,0.150446,1.067156,40.791327,0.199303,0.065963,0.0,0.006858,1.00308,1.062374,1.002856,1.004673,0.025060,1.562253,64.62585,21.0,0.982919,2.0,0.919917,0.005079


In [146]:
del df_dev_lgbm, df_dev_res

In [150]:
(df_dev['ticker'].values == df_hour['ticker'].values).all()

True

In [152]:
df_union = pd.concat([df_dev[['time', 'ticker', 'close', 'y_pred', 'y_pred_bin']].reset_index(drop=True), df_hour[['time', 'ticker', 'close', 'y_pred']].reset_index(drop=True)], axis=1)
df_union

Unnamed: 0,time,ticker,close,y_pred,y_pred_bin,time.1,ticker.1,close.1,y_pred.1
0,2025-01-29 19:00:00,ABIO,89.64,0.123648,0,2025-01-29 19:00:00,ABIO,89.639999,0.123648
1,2025-01-29 19:00:00,ABRD,190.60,0.172919,0,2025-01-29 19:00:00,ABRD,190.600006,0.172919
...,...,...,...,...,...,...,...,...,...
123,2025-01-29 19:00:00,YAKG,60.50,0.163792,0,2025-01-29 19:00:00,YAKG,60.500000,0.163792
124,2025-01-29 19:00:00,YDEX,4091.00,0.144124,0,2025-01-29 19:00:00,YDEX,4091.000000,0.144124


In [155]:
(df_dev['close'].values != df_hour['y_pred'].values).sum(), (df_dev['close'].values != df_hour['close'].values).mean()

(125, 0.704)

In [157]:
#close

# > 0.01% = 0
# > 0.0001% = 0
mask = (np.abs(df_dev['close'].values - df_hour['close'].values) / df_hour['close'].values) > 0.00_00_01
df_union.loc[mask]

Unnamed: 0,time,ticker,close,y_pred,y_pred_bin,time.1,ticker.1,close.1,y_pred.1


In [159]:
# C close все норм

In [162]:
(df_dev['y_pred'].values != df_hour['y_pred'].values).sum(), (df_dev['y_pred'].values != df_hour['y_pred'].values).mean()

(0, 0.0)

In [164]:
mask = (np.abs(df_dev['y_pred'].values - df_hour['y_pred'].values) / df_hour['y_pred'].values) > 0.00_00_01
print(mask.sum())
df_union.loc[mask]

0


Unnamed: 0,time,ticker,close,y_pred,y_pred_bin,time.1,ticker.1,close.1,y_pred.1


In [166]:
#Все отлично

#### features

In [170]:
#по сэмплам
(np.abs(df_dev[features_fe].values - df_hour[features_fe].values) / df_hour[features_fe].values > 0.00_00_01).any(axis=1).mean()

  (np.abs(df_dev[features_fe].values - df_hour[features_fe].values) / df_hour[features_fe].values > 0.00_00_01).any(axis=1).mean()


0.0

In [172]:
#по фичам
(np.abs(df_dev[features_fe].values - df_hour[features_fe].values) / df_hour[features_fe].values > 0.00_01).any(axis=0).mean()

  (np.abs(df_dev[features_fe].values - df_hour[features_fe].values) / df_hour[features_fe].values > 0.00_01).any(axis=0).mean()


0.0

In [174]:
#все четко

In [176]:
list(zip(features_fe, (np.abs(df_dev[features_fe].values - df_hour[features_fe].values) / df_hour[features_fe].values > 0.00_00_01).any(axis=0)))

  list(zip(features_fe, (np.abs(df_dev[features_fe].values - df_hour[features_fe].values) / df_hour[features_fe].values > 0.00_00_01).any(axis=0)))


[('close_w14_min/close_w14_max', False),
 ('close_1day_w5_norm_std/close_1day_w100_norm_std', False),
 ('close_1day/close_1day_w100_ma', False),
 ('volume_1day_w20_rsi', False),
 ('close_1day_w3_std/close_1day_w100_std', False),
 ('close_1day_w20_roc', False),
 ('close_w70_lvl_1.02-1.03', False),
 ('tmos_close_1day_w5_norm_std', False),
 ('tmos_close_w14_ma/tmos_close_w70_ma', False),
 ('close_1day_w5_ma/close_1day_w100_ma', False),
 ('tmos_close/tmos_close_w70_ma', False),
 ('tmos_close_w14_max/tmos_close', False),
 ('close_1day_w20_norm_std', False),
 ('close_w14_mean_abs_pct/close_w70_mean_abs_pct', False),
 ('tmos_close_1day_w20_rsi', False),
 ('tmos_close_w70_lvl_1-1.005', False),
 ('tmos_close_1day_w5_min/tmos_close_1day_w5_max', False),
 ('close_1day_w100_lvl_1.02-1.03', False),
 ('close_1day_w20_min/close_1day_w20_max', False),
 ('close_w14_mean_abs_pct', False)]

In [84]:
[('close_w14_min/close_w14_max', False),
 ('close_1day_w5_norm_std/close_1day_w100_norm_std', True),
 ('close_1day/close_1day_w100_ma', True),
 ('volume_1day_w20_rsi', False),
 ('close_1day_w3_std/close_1day_w100_std', True),
 ('close_1day_w20_roc', True),
 ('close_w70_lvl_1.02-1.03', False),
 ('tmos_close_1day_w5_norm_std', False),
 ('tmos_close_w14_ma/tmos_close_w70_ma', False),
 ('close_1day_w5_ma/close_1day_w100_ma', True),
 ('tmos_close/tmos_close_w70_ma', False),
 ('tmos_close_w14_max/tmos_close', False),
 ('close_1day_w20_norm_std', True),
 ('close_w14_mean_abs_pct/close_w70_mean_abs_pct', False),
 ('tmos_close_1day_w20_rsi', False),
 ('tmos_close_w70_lvl_1-1.005', False),
 ('tmos_close_1day_w5_min/tmos_close_1day_w5_max', False),
 ('close_1day_w100_lvl_1.02-1.03', True),
 ('close_1day_w20_min/close_1day_w20_max', True),
 ('close_w14_mean_abs_pct', False)]

In [None]:
все тмос верны (1day_w5, 1day_w20, w14, w70)
все плохие close_1day и наоборот

In [None]:
Может я неправильно сджоинил 1day и 1hour?

In [None]:
любой df + анализ джоина. Хотя тмос правильно

In [128]:
df_dev

Unnamed: 0,time,ticker,close,y_pred,y_pred_bin,lower_bound,upper_bound,close2,time_1day,close_w14_min/close_w14_max,close_1day_w5_norm_std/close_1day_w100_norm_std,close_1day/close_1day_w100_ma,volume_1day_w20_rsi,close_1day_w3_std/close_1day_w100_std,close_1day_w20_roc,close_w70_lvl_1.02-1.03,tmos_close_1day_w5_norm_std,tmos_close_w14_ma/tmos_close_w70_ma,close_1day_w5_ma/close_1day_w100_ma,tmos_close/tmos_close_w70_ma,tmos_close_w14_max/tmos_close,close_1day_w20_norm_std,close_w14_mean_abs_pct/close_w70_mean_abs_pct,tmos_close_1day_w20_rsi,tmos_close_w70_lvl_1-1.005,tmos_close_1day_w5_min/tmos_close_1day_w5_max,close_1day_w100_lvl_1.02-1.03,close_1day_w20_min/close_1day_w20_max,close_w14_mean_abs_pct
0,2025-01-29 16:00:00,ABIO,89.72,0.134791,0,87.9256,93.3088,89.72,2025-01-28 03:00:00,0.984057,0.111778,1.051965,49.351825,0.114424,0.082790,5.0,0.006858,1.002097,1.055154,0.999531,1.007812,0.044446,0.847723,64.62585,20.0,0.982919,1.0,0.826998,0.002535
1,2025-01-29 16:00:00,ABRD,191.00,0.175697,0,187.1800,198.6400,191.00,2025-01-28 03:00:00,0.978193,0.375051,0.997651,49.647009,0.198203,0.076136,2.0,0.006858,1.002097,1.018510,0.999531,1.007812,0.044309,0.649455,64.62585,20.0,0.982919,0.0,0.855289,0.003909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,2025-01-29 16:00:00,YAKG,60.00,0.163792,0,58.8000,62.4000,60.00,2025-01-28 03:00:00,0.960769,0.215715,1.032106,50.911797,0.056447,0.102004,4.0,0.006858,1.002097,1.013170,0.999531,1.007812,0.030550,1.208297,64.62585,20.0,0.982919,1.0,0.896694,0.009874
126,2025-01-29 16:00:00,YDEX,4065.00,0.133013,0,3983.7000,4227.6000,4065.00,2025-01-28 03:00:00,0.969067,0.150446,1.067156,40.791327,0.199303,0.065963,0.0,0.006858,1.002097,1.062374,0.999531,1.007812,0.025060,1.334686,64.62585,20.0,0.982919,2.0,0.919917,0.004385


In [132]:
df_hour

Unnamed: 0,time,ticker,close,y_pred,close_w14_min/close_w14_max,close_1day_w5_norm_std/close_1day_w100_norm_std,close_1day/close_1day_w100_ma,volume_1day_w20_rsi,close_1day_w3_std/close_1day_w100_std,close_1day_w20_roc,close_w70_lvl_1.02-1.03,tmos_close_1day_w5_norm_std,tmos_close_w14_ma/tmos_close_w70_ma,close_1day_w5_ma/close_1day_w100_ma,tmos_close/tmos_close_w70_ma,tmos_close_w14_max/tmos_close,close_1day_w20_norm_std,close_w14_mean_abs_pct/close_w70_mean_abs_pct,tmos_close_1day_w20_rsi,tmos_close_w70_lvl_1-1.005,tmos_close_1day_w5_min/tmos_close_1day_w5_max,close_1day_w100_lvl_1.02-1.03,close_1day_w20_min/close_1day_w20_max,close_w14_mean_abs_pct
418120,2025-01-29 16:00:00,ABIO,89.720001,0.134791,0.984057,0.111778,1.051965,49.351826,0.114424,0.082790,5.0,0.006858,1.002097,1.055154,0.999531,1.007812,0.044446,0.847723,64.625847,20.0,0.982919,1.0,0.826998,0.002535
401419,2025-01-29 16:00:00,ABRD,191.000000,0.175697,0.978193,0.320435,0.994810,49.647011,0.191604,0.055866,2.0,0.006858,1.002097,1.012917,0.999531,1.007812,0.041034,0.649455,64.625847,20.0,0.982919,2.0,0.858000,0.003909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213220,2025-01-29 16:00:00,YAKG,60.000000,0.155132,0.960769,0.226300,1.029707,50.911797,0.117746,0.089189,4.0,0.006858,1.002097,1.004327,0.999531,1.007812,0.027263,1.208297,64.625847,20.0,0.982919,3.0,0.890819,0.009874
379960,2025-01-29 16:00:00,YDEX,4065.000000,0.133013,0.969067,0.150446,1.067156,40.791328,0.199303,0.065963,0.0,0.006858,1.002097,1.062374,0.999531,1.007812,0.025060,1.334686,64.625847,20.0,0.982919,2.0,0.919917,0.004385


# Сверим в данных

In [164]:
ticker = 'YAKG'
df_dev_ticker = load_pkl('./dev/data/preproc/1day/2025-01-29/YAKG.pkl')
df_dev_ticker

Unnamed: 0,time,close,volume,ticker,tmos_close
0,2024-09-03 03:00:00,55.75,11937.0,YAKG,5.42
1,2024-09-04 03:00:00,59.00,5712.0,YAKG,5.58
...,...,...,...,...,...
99,2025-01-27 03:00:00,60.05,13029.0,YAKG,6.33
100,2025-01-28 03:00:00,60.50,12241.0,YAKG,6.41


In [161]:
df_1day = load_pkl('./df_1day_fe.pkl')
df_1day

Unnamed: 0_level_0,index_1day,time,open_1day,close_1day,volume_1day,low_1day,high_1day,ticker,tmos_close_1day,index_start_1day,close_1day_w1_roc,close_1day_w1_diff,volume_1day_w1_roc,volume_1day_w1_diff,tmos_close_1day_w1_roc,tmos_close_1day_w1_diff,close_1day_w3_ma,close_1day_w3_std,close_1day_w3_norm_std,close_1day_w3_ma_low_2std,close_1day_w3_ma_up_2std,close_1day_w3_ma_low_3std,close_1day_w3_ma_up_3std,close_1day_w3_mean_abs_pct,close_1day_w3_alpha,...,tmos_close_1day_w100_ma_low_2std,tmos_close_1day_w100_ma_up_2std,tmos_close_1day_w100_ma_low_3std,tmos_close_1day_w100_ma_up_3std,tmos_close_1day_w100_mean_abs_pct,tmos_close_1day_w100_alpha,tmos_close_1day_w100_min,tmos_close_1day_w100_max,tmos_close_1day_w100_rsi,tmos_close_1day_w100_roc,tmos_close_1day_w100_diff,tmos_close_1day_w100_lvl_1-1.005,tmos_close_1day_w100_lvl_-0.995-1,tmos_close_1day_w100_lvl_1.005-1.01,tmos_close_1day_w100_lvl_-0.99-0.995,tmos_close_1day_w100_lvl_1.01-1.015,tmos_close_1day_w100_lvl_-0.985-0.99,tmos_close_1day_w100_lvl_1.015-1.02,tmos_close_1day_w100_lvl_-0.98-0.985,tmos_close_1day_w100_lvl_1.02-1.03,tmos_close_1day_w100_lvl_-0.97-0.98,tmos_close_1day_w100_lvl_1.03-1.04,tmos_close_1day_w100_lvl_-0.96-0.97,tmos_close_1day_w100_lvl_1.04-1.05,tmos_close_1day_w100_lvl_-0.95-0.96
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
0,0,2022-05-04 23:00:00,32.120,31.200,1138130.0,31.04,32.68,AFLT,4.18,0,,,,,,,31.200000,,,,,,,,,...,,,,,,,4.18,4.18,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2022-05-05 23:00:00,31.260,30.700,664101.0,30.40,31.74,AFLT,4.22,0,-0.016026,-0.50,-0.416498,-474029.0,0.009569,0.04,30.950000,0.353553,0.011423,30.242893,31.657107,29.889340,32.010660,0.016026,-2.000000,...,4.143431,4.256569,4.115147,4.284853,0.009569,25.000000,4.18,4.22,100.000000,,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87796,87796,2025-01-28 23:00:00,11.300,11.405,8429.0,11.06,11.36,RBCM,6.41,87104,0.005289,0.06,0.059985,477.0,0.012638,0.08,11.428333,0.097125,0.008499,11.234083,11.622584,11.136957,11.719709,0.011954,-6.890459,...,5.207021,6.463579,4.892881,6.777719,0.012770,20.627572,5.22,6.45,56.734694,0.182657,0.99,4.0,0.0,1.0,1.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,2.0,0.0,3.0
87797,87797,2025-01-29 23:00:00,11.485,11.265,10983.0,11.22,11.51,RBCM,6.42,87104,-0.012275,-0.14,0.303002,2554.0,0.001560,0.01,11.338333,0.070238,0.006195,11.197858,11.478809,11.127620,11.549046,0.011345,-8.108108,...,5.206809,6.480591,4.888364,6.799036,0.012490,21.672261,5.22,6.45,55.833333,0.150538,0.84,4.0,1.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,3.0


In [178]:
mask_time = (pd.Timestamp('2024-09-03') < df_1day['time']) & (df_1day['time'] < pd.Timestamp('2025-01-29'))
mask = (df_1day['ticker'] == ticker) & mask_time
df_1day_ticker = df_1day[mask]
df_1day_ticker

Unnamed: 0_level_0,index_1day,time,open_1day,close_1day,volume_1day,low_1day,high_1day,ticker,tmos_close_1day,index_start_1day,close_1day_w1_roc,close_1day_w1_diff,volume_1day_w1_roc,volume_1day_w1_diff,tmos_close_1day_w1_roc,tmos_close_1day_w1_diff,close_1day_w3_ma,close_1day_w3_std,close_1day_w3_norm_std,close_1day_w3_ma_low_2std,close_1day_w3_ma_up_2std,close_1day_w3_ma_low_3std,close_1day_w3_ma_up_3std,close_1day_w3_mean_abs_pct,close_1day_w3_alpha,...,tmos_close_1day_w100_ma_low_2std,tmos_close_1day_w100_ma_up_2std,tmos_close_1day_w100_ma_low_3std,tmos_close_1day_w100_ma_up_3std,tmos_close_1day_w100_mean_abs_pct,tmos_close_1day_w100_alpha,tmos_close_1day_w100_min,tmos_close_1day_w100_max,tmos_close_1day_w100_rsi,tmos_close_1day_w100_roc,tmos_close_1day_w100_diff,tmos_close_1day_w100_lvl_1-1.005,tmos_close_1day_w100_lvl_-0.995-1,tmos_close_1day_w100_lvl_1.005-1.01,tmos_close_1day_w100_lvl_-0.99-0.995,tmos_close_1day_w100_lvl_1.01-1.015,tmos_close_1day_w100_lvl_-0.985-0.99,tmos_close_1day_w100_lvl_1.015-1.02,tmos_close_1day_w100_lvl_-0.98-0.985,tmos_close_1day_w100_lvl_1.02-1.03,tmos_close_1day_w100_lvl_-0.97-0.98,tmos_close_1day_w100_lvl_1.03-1.04,tmos_close_1day_w100_lvl_-0.96-0.97,tmos_close_1day_w100_lvl_1.04-1.05,tmos_close_1day_w100_lvl_-0.95-0.96
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
17886,17886,2024-09-03 23:00:00,57.35,55.35,11937.0,54.70,59.6,YAKG,5.42,17294,-0.039062,-2.25,-0.663860,-23575.0,-0.003676,-0.02,58.716667,4.042380,0.068846,50.631907,66.801426,46.589528,70.843806,0.057181,-0.240196,...,5.642473,7.374127,5.209559,7.807041,0.009861,-63.403811,5.42,7.17,37.559429,-0.224607,-1.57,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
17887,17887,2024-09-04 23:00:00,55.95,59.00,5712.0,55.75,60.9,YAKG,5.58,17294,0.065944,3.65,-0.521488,-6225.0,0.029520,0.16,57.316667,1.841422,0.032127,53.633824,60.999510,51.792402,62.840931,0.064538,0.206439,...,5.614663,7.373137,5.175044,7.812756,0.010113,-62.535609,5.42,7.17,38.819876,-0.205128,-1.44,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,0.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17985,17985,2025-01-27 23:00:00,59.80,59.40,13029.0,58.50,61.3,YAKG,6.33,17294,0.000000,0.00,-0.747044,-38478.0,-0.017081,-0.11,58.916667,0.837158,0.014209,57.242351,60.590982,56.405193,61.428140,0.010366,1.034483,...,5.202533,6.448267,4.891100,6.759700,0.012680,20.106052,5.22,6.45,56.104252,0.163603,0.89,3.0,0.0,0.0,1.0,1.0,1.0,4.0,1.0,0.0,3.0,0.0,3.0,0.0,4.0
17986,17986,2025-01-28 23:00:00,59.05,60.45,12241.0,59.00,61.5,YAKG,6.41,17294,0.017677,1.05,-0.060480,-788.0,0.012638,0.08,59.750000,0.606218,0.010146,58.537564,60.962436,57.931347,61.568653,0.014233,1.428571,...,5.207021,6.463579,4.892881,6.777719,0.012770,20.627572,5.22,6.45,56.734694,0.182657,0.99,4.0,0.0,1.0,1.0,0.0,3.0,0.0,0.0,0.0,3.0,0.0,2.0,0.0,3.0


In [190]:
#close, volume, tmos_close
df_dev_ticker['close'].values == df_1day_ticker['close_1day'].values

array([False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False])

In [200]:
pd.concat([df_dev_ticker['close'].reset_index(drop=True), df_1day_ticker['close_1day'].reset_index(drop=True)], axis=1)

Unnamed: 0,close,close_1day
0,55.75,55.35
1,59.00,59.00
...,...,...
99,60.05,59.40
100,60.50,60.45


In [192]:
df_dev_ticker['volume'].values == df_1day_ticker['volume_1day'].values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [196]:
df_dev_ticker['tmos_close'].values == df_1day_ticker['tmos_close_1day'].values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [202]:
#super check
df_ticker = load_pkl('./data/preproc/1day/YAKG.pkl')
df_ticker


Unnamed: 0,time,open,close,volume,low,high,ticker,tmos_close
0,2022-05-04 03:00:00,121.85,123.50,3455.0,115.10,128.80,YAKG,4.18
1,2022-05-05 03:00:00,124.80,119.50,2293.0,119.00,124.85,YAKG,4.22
...,...,...,...,...,...,...,...,...
692,2025-01-28 03:00:00,59.05,60.50,12241.0,59.00,61.50,YAKG,6.41
693,2025-01-29 03:00:00,60.95,60.55,37457.0,58.95,63.50,YAKG,6.42


In [208]:
mask_time = (pd.Timestamp('2024-09-03') < df_1day['time']) & (df_1day['time'] < pd.Timestamp('2025-01-29'))
df_ticker = df_ticker[mask_time]
df_ticker

  df_ticker = df_ticker[mask_time]


Unnamed: 0,time,open,close,volume,low,high,ticker,tmos_close
592,2024-09-03 03:00:00,57.35,55.75,11937.0,54.70,59.6,YAKG,5.42
593,2024-09-04 03:00:00,55.95,59.00,5712.0,55.75,60.9,YAKG,5.58
...,...,...,...,...,...,...,...,...
691,2025-01-27 03:00:00,59.80,60.05,13029.0,58.50,61.3,YAKG,6.33
692,2025-01-28 03:00:00,59.05,60.50,12241.0,59.00,61.5,YAKG,6.41


In [212]:
df_dev_ticker['close'].values == df_ticker['close'].values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [None]:
#Бага найдена