In [1]:
"""Получение и вывод в консоль свечей с часовым интервалом за n дней"""
import pandas as pd
import os
from pprint import pprint
from datetime import timedelta

from tinkoff.invest import CandleInterval, Client
from tinkoff.invest.schemas import CandleSource
from tinkoff.invest.utils import now

TOKEN = os.environ["TINKOFF_TOKEN"]


def main(n):
    candles = []
    with Client(TOKEN) as client:
        for candle in client.get_all_candles(
            instrument_id="BBG004730N88",
            from_=now() - timedelta(days=n),
            interval=CandleInterval.CANDLE_INTERVAL_HOUR,
            candle_source_type=CandleSource.CANDLE_SOURCE_UNSPECIFIED,
        ):
            candles.append(candle)
            
    return candles
# df = pd.DataFrame(main)
# df.to_csv('tinkoff.csv', index=False)
# df
candle_list = main(60) 

# if __name__ == "__main__":
#     main()

In [2]:
from datetime import datetime, timezone

# Эмуляция класса Quotation
class Quotation:
    def __init__(self, units, nano):
        self.units = units
        self.nano = nano

# Эмуляция класса HistoricCandle
class HistoricCandle:
    def __init__(self, open, high, low, close, volume, time, is_complete, candle_source):
        self.open = open
        self.high = high
        self.low = low
        self.close = close
        self.volume = volume
        self.time = time
        self.is_complete = is_complete
        self.candle_source = candle_source

# Эмуляция Enum для CandleSource
class CandleSource:
    CANDLE_SOURCE_EXCHANGE = 1

# Функция для преобразования 'Quotation' в полное значение
def to_full_value(quotation):
    return quotation.units + quotation.nano / 1e9

# Преобразование списка объектов в список словарей
candle_dicts = []
for candle in candle_list:
    candle_dict = {
        'open': to_full_value(candle.open),
        'high': to_full_value(candle.high),
        'low': to_full_value(candle.low),
        'close': to_full_value(candle.close),
        'volume': candle.volume,
        'time': candle.time.strftime('%Y-%m-%d %H:%M:%S')
    }
    candle_dicts.append(candle_dict)

# Результат
# pprint(candle_dicts)


In [3]:
df = pd.DataFrame(candle_dicts, index=list(range(0, len(candle_dicts))))
df.head()

Unnamed: 0,open,high,low,close,volume,time
0,244.62,245.02,244.08,245.02,19204,2024-10-28 06:00:00
1,245.07,245.85,243.34,245.51,1317708,2024-10-28 07:00:00
2,245.52,248.47,244.98,247.8,1208087,2024-10-28 08:00:00
3,247.81,249.0,247.16,247.18,932857,2024-10-28 09:00:00
4,247.19,247.2,245.8,246.93,518793,2024-10-28 10:00:00


In [4]:
df.tail()

Unnamed: 0,open,high,low,close,volume,time
1035,269.88,270.18,269.1,269.32,243300,2024-12-26 19:00:00
1036,269.3,270.0,269.2,269.56,86719,2024-12-26 20:00:00
1037,270.35,270.35,268.75,269.68,2179,2024-12-27 04:00:00
1038,269.68,269.8,269.22,269.22,2599,2024-12-27 05:00:00
1039,269.22,270.65,269.2,269.47,3672,2024-12-27 06:00:00


In [5]:
df.shape

(1040, 6)

In [6]:
df['y'] = (df.open - df.close).apply(lambda x: 0 if x<0 else 1) 

In [7]:
df.head()

Unnamed: 0,open,high,low,close,volume,time,y
0,244.62,245.02,244.08,245.02,19204,2024-10-28 06:00:00,0
1,245.07,245.85,243.34,245.51,1317708,2024-10-28 07:00:00,0
2,245.52,248.47,244.98,247.8,1208087,2024-10-28 08:00:00,0
3,247.81,249.0,247.16,247.18,932857,2024-10-28 09:00:00,1
4,247.19,247.2,245.8,246.93,518793,2024-10-28 10:00:00,1


## transform data

In [8]:
data = df.copy()

In [9]:
# Преобразование времени в формат datetime и сортировка данных по времени
data['time'] = pd.to_datetime(data['time'])
data = data.sort_values(by='time').reset_index(drop=True)

# Сдвиг метки `y` для предсказания следующей свечи
data['y_next'] = data['y'].shift(-1)

# Удаление последних данных, где `y_next` будет NaN
data = data[:-1]

# Проверка результата
data.head()

Unnamed: 0,open,high,low,close,volume,time,y,y_next
0,244.62,245.02,244.08,245.02,19204,2024-10-28 06:00:00,0,0.0
1,245.07,245.85,243.34,245.51,1317708,2024-10-28 07:00:00,0,0.0
2,245.52,248.47,244.98,247.8,1208087,2024-10-28 08:00:00,0,1.0
3,247.81,249.0,247.16,247.18,932857,2024-10-28 09:00:00,1,1.0
4,247.19,247.2,245.8,246.93,518793,2024-10-28 10:00:00,1,1.0


In [10]:
# Создание временных признаков: информация о предыдущих свечах
def create_lag_features(data, lags):
    for lag in range(1, lags + 1):
        data[f'open_lag{lag}'] = data['open'].shift(lag)
        data[f'high_lag{lag}'] = data['high'].shift(lag)
        data[f'low_lag{lag}'] = data['low'].shift(lag)
        data[f'close_lag{lag}'] = data['close'].shift(lag)
        data[f'volume_lag{lag}'] = data['volume'].shift(lag)
    return data

# Добавляем признаки за последние 3 свечи
data = create_lag_features(data, lags=3)

# Удаляем строки с NaN, образовавшиеся из-за сдвигов
data = data.dropna().reset_index(drop=True)

# Проверка результата
data.head()

Unnamed: 0,open,high,low,close,volume,time,y,y_next,open_lag1,high_lag1,...,open_lag2,high_lag2,low_lag2,close_lag2,volume_lag2,open_lag3,high_lag3,low_lag3,close_lag3,volume_lag3
0,247.81,249.0,247.16,247.18,932857,2024-10-28 09:00:00,1,1.0,245.52,248.47,...,245.07,245.85,243.34,245.51,1317708.0,244.62,245.02,244.08,245.02,19204.0
1,247.19,247.2,245.8,246.93,518793,2024-10-28 10:00:00,1,1.0,247.81,249.0,...,245.52,248.47,244.98,247.8,1208087.0,245.07,245.85,243.34,245.51,1317708.0
2,246.93,246.95,244.64,244.91,568411,2024-10-28 11:00:00,1,0.0,247.19,247.2,...,247.81,249.0,247.16,247.18,932857.0,245.52,248.47,244.98,247.8,1208087.0
3,244.91,246.35,244.5,244.99,452983,2024-10-28 12:00:00,0,1.0,246.93,246.95,...,247.19,247.2,245.8,246.93,518793.0,247.81,249.0,247.16,247.18,932857.0
4,244.99,245.44,243.81,243.94,407611,2024-10-28 13:00:00,1,0.0,244.91,246.35,...,246.93,246.95,244.64,244.91,568411.0,247.19,247.2,245.8,246.93,518793.0


## tree

In [None]:
# import numpy as np
# import pandas as pd
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.model_selection import GridSearchCV

# y_train = train.y
# X_train = train.drop('y', axis=1)

# x_test=test

# parameters = {'max_depth':(1, 11),'min_samples_split':range(2,11),'min_samples_leaf':range(1,11)}
# dt = DecisionTreeClassifier()
# search = GridSearchCV(dt,parameters,cv=5)

# search.fit(X_train,y_train)
# best_tree=search.best_estimator_
# predictions=best_tree.predict(x_test)


In [11]:
# from sklearn.model_selection import train_test_split
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, classification_report

# # Определение признаков и целевой переменной
# features = [col for col in data.columns if col not in ['y', 'y_next', 'time']]
# target = 'y_next'

# # Разделение данных на обучающую и тестовую выборки
# X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# # Обучение модели Random Forest
# model = RandomForestClassifier(random_state=42)
# model.fit(X_train, y_train)

# # Предсказания на тестовых данных
# y_pred = model.predict(X_test)

# # Оценка качества модели
# accuracy = accuracy_score(y_test, y_pred)
# report = classification_report(y_test, y_pred)

# accuracy, report

## best_tree

In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

# Определение признаков и целевой переменной
features = [col for col in data.columns if col not in ['y', 'y_next', 'time']]
target = 'y_next'

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

parameters = {'max_depth':(10, 100),'min_samples_split':range(2,11),'min_samples_leaf':range(1,11)}
dt = DecisionTreeClassifier()
search = GridSearchCV(dt,parameters,cv=5)

search.fit(X_train,y_train)
best_tree=search.best_estimator_
predictions=best_tree.predict(X_test)

In [34]:
best_tree

In [35]:
# Обучение модели Random Forest
# model = RandomForestClassifier(random_state=42)
# model = best_tree
# model.fit(X_train, y_train)

# Предсказания на тестовых данных
from sklearn.metrics import accuracy_score, classification_report
y_pred = best_tree.predict(X_test)

# Оценка качества модели
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

accuracy, report

(0.5288461538461539,
 '              precision    recall  f1-score   support\n\n         0.0       0.50      0.53      0.51        99\n         1.0       0.55      0.53      0.54       109\n\n    accuracy                           0.53       208\n   macro avg       0.53      0.53      0.53       208\nweighted avg       0.53      0.53      0.53       208\n')

In [20]:
X_test.head()

Unnamed: 0,open,high,low,close,volume,open_lag1,high_lag1,low_lag1,close_lag1,volume_lag1,open_lag2,high_lag2,low_lag2,close_lag2,volume_lag2,open_lag3,high_lag3,low_lag3,close_lag3,volume_lag3
622,231.48,231.69,230.7,230.8,96559,231.52,231.86,230.57,231.52,167463.0,231.79,232.49,231.37,231.52,245329.0,231.25,231.8,230.29,231.8,369064.0
109,240.3,240.3,239.79,239.99,3329,239.29,241.45,239.05,240.3,14121.0,239.27,239.9,238.91,239.13,2447.0,239.31,239.33,239.02,239.27,817.0
649,226.29,227.0,225.15,225.5,332993,225.19,226.7,224.51,226.3,567206.0,226.27,227.69,225.02,225.19,700468.0,224.6,227.44,224.06,226.29,1113000.0
458,236.1,236.22,236.01,236.02,572,236.33,236.33,236.01,236.19,652.0,236.55,236.55,236.01,236.33,507.0,236.03,236.21,236.02,236.03,542.0
541,229.56,229.75,227.8,228.9,9171,229.29,229.3,228.53,228.56,95403.0,228.8,229.33,228.53,229.29,104202.0,228.45,229.0,228.41,228.81,128453.0


In [21]:
data[features].head()

Unnamed: 0,open,high,low,close,volume,open_lag1,high_lag1,low_lag1,close_lag1,volume_lag1,open_lag2,high_lag2,low_lag2,close_lag2,volume_lag2,open_lag3,high_lag3,low_lag3,close_lag3,volume_lag3
0,247.81,249.0,247.16,247.18,932857,245.52,248.47,244.98,247.8,1208087.0,245.07,245.85,243.34,245.51,1317708.0,244.62,245.02,244.08,245.02,19204.0
1,247.19,247.2,245.8,246.93,518793,247.81,249.0,247.16,247.18,932857.0,245.52,248.47,244.98,247.8,1208087.0,245.07,245.85,243.34,245.51,1317708.0
2,246.93,246.95,244.64,244.91,568411,247.19,247.2,245.8,246.93,518793.0,247.81,249.0,247.16,247.18,932857.0,245.52,248.47,244.98,247.8,1208087.0
3,244.91,246.35,244.5,244.99,452983,246.93,246.95,244.64,244.91,568411.0,247.19,247.2,245.8,246.93,518793.0,247.81,249.0,247.16,247.18,932857.0
4,244.99,245.44,243.81,243.94,407611,244.91,246.35,244.5,244.99,452983.0,246.93,246.95,244.64,244.91,568411.0,247.19,247.2,245.8,246.93,518793.0


In [22]:
accuracy

0.5288461538461539

In [23]:
report

'              precision    recall  f1-score   support\n\n         0.0       1.00      0.01      0.02        99\n         1.0       0.53      1.00      0.69       109\n\n    accuracy                           0.53       208\n   macro avg       0.76      0.51      0.35       208\nweighted avg       0.75      0.53      0.37       208\n'

## model test

In [40]:
def main_h(n):
    candles_h = []
    with Client(TOKEN) as client:
        for candle in client.get_all_candles(
            instrument_id="BBG004730N88",
            from_=now() - timedelta(hours=n),
            interval=CandleInterval.CANDLE_INTERVAL_HOUR,
            candle_source_type=CandleSource.CANDLE_SOURCE_UNSPECIFIED,
        ):
            candles_h.append(candle)
    return candles_h    

In [41]:
# Создание временных признаков: информация о предыдущих свечах
def create_lag_features_h(data, lags=3):
    for lag in range(1, lags + 1):
        data[f'open_lag{lag}'] = data['open'].shift(lag)
        data[f'high_lag{lag}'] = data['high'].shift(lag)
        data[f'low_lag{lag}'] = data['low'].shift(lag)
        data[f'close_lag{lag}'] = data['close'].shift(lag)
        data[f'volume_lag{lag}'] = data['volume'].shift(lag)
    return data.dropna().reset_index(drop=True).drop(columns=['time'])


## !!! run !!!

In [42]:
from tinkoff.invest import CandleInterval, Client
from tinkoff.invest.schemas import CandleSource
from tinkoff.invest.utils import now

TOKEN = os.environ["TINKOFF_TOKEN"]

In [99]:
candle_hour = main_h(10)

candle_hours = []
for candle in candle_hour:
    candle_hour = {
        'open': to_full_value(candle.open),
        'high': to_full_value(candle.high),
        'low': to_full_value(candle.low),
        'close': to_full_value(candle.close),
        'volume': candle.volume,
        'time': candle.time.strftime('%Y-%m-%d %H:%M:%S')
    }
    candle_hours.append(candle_hour)

df_h = pd.DataFrame(candle_hours, index=list(range(0, len(candle_hours))))
# df_h['y'] = (df_h.open - df_h.close).apply(lambda x: 0 if x<0 else 1) 
df_h

Unnamed: 0,open,high,low,close,volume,time
0,269.68,269.8,269.22,269.22,2599,2024-12-27 05:00:00
1,269.61,270.65,269.2,269.5,6554,2024-12-27 06:00:00
2,269.69,271.6,268.57,270.86,988636,2024-12-27 07:00:00
3,270.88,272.16,270.51,271.3,667636,2024-12-27 08:00:00
4,271.3,271.85,270.88,271.52,455486,2024-12-27 09:00:00
5,271.52,271.52,269.7,269.84,766649,2024-12-27 10:00:00
6,269.84,270.52,269.2,270.42,466248,2024-12-27 11:00:00
7,270.4,270.56,269.76,270.52,349937,2024-12-27 12:00:00
8,270.52,271.4,270.29,270.78,346790,2024-12-27 13:00:00
9,270.8,270.81,270.29,270.57,289066,2024-12-27 14:00:00


In [100]:
x_now = create_lag_features_h(df_h)
x_now

Unnamed: 0,open,high,low,close,volume,open_lag1,high_lag1,low_lag1,close_lag1,volume_lag1,open_lag2,high_lag2,low_lag2,close_lag2,volume_lag2,open_lag3,high_lag3,low_lag3,close_lag3,volume_lag3
0,270.88,272.16,270.51,271.3,667636,269.69,271.6,268.57,270.86,988636.0,269.61,270.65,269.2,269.5,6554.0,269.68,269.8,269.22,269.22,2599.0
1,271.3,271.85,270.88,271.52,455486,270.88,272.16,270.51,271.3,667636.0,269.69,271.6,268.57,270.86,988636.0,269.61,270.65,269.2,269.5,6554.0
2,271.52,271.52,269.7,269.84,766649,271.3,271.85,270.88,271.52,455486.0,270.88,272.16,270.51,271.3,667636.0,269.69,271.6,268.57,270.86,988636.0
3,269.84,270.52,269.2,270.42,466248,271.52,271.52,269.7,269.84,766649.0,271.3,271.85,270.88,271.52,455486.0,270.88,272.16,270.51,271.3,667636.0
4,270.4,270.56,269.76,270.52,349937,269.84,270.52,269.2,270.42,466248.0,271.52,271.52,269.7,269.84,766649.0,271.3,271.85,270.88,271.52,455486.0
5,270.52,271.4,270.29,270.78,346790,270.4,270.56,269.76,270.52,349937.0,269.84,270.52,269.2,270.42,466248.0,271.52,271.52,269.7,269.84,766649.0
6,270.8,270.81,270.29,270.57,289066,270.52,271.4,270.29,270.78,346790.0,270.4,270.56,269.76,270.52,349937.0,269.84,270.52,269.2,270.42,466248.0
7,270.57,270.64,270.46,270.5,4824,270.8,270.81,270.29,270.57,289066.0,270.52,271.4,270.29,270.78,346790.0,270.4,270.56,269.76,270.52,349937.0


In [101]:
x_now = create_lag_features_h(df_h)
# y_now = model.predict(x_now)
y_now = best_tree.predict(x_now)
y_now

array([1., 1., 0., 0., 1., 1., 1., 1.])

In [102]:
df_h_y = (df_h.open - df_h.close).apply(lambda x: 0 if x<0 else 1) 
df_res = pd.DataFrame(df_h_y)
df_res

Unnamed: 0,0
0,1
1,1
2,0
3,0
4,0
5,1
6,0
7,0
8,0
9,1


In [103]:
df_next = pd.Series(y_now)
df_next

0    1.0
1    1.0
2    0.0
3    0.0
4    1.0
5    1.0
6    1.0
7    1.0
dtype: float64

In [104]:
df_res['now'] = df_res[0].shift(-4)
df_res

Unnamed: 0,0,now
0,1,0.0
1,1,1.0
2,0,0.0
3,0,0.0
4,0,0.0
5,1,1.0
6,0,1.0
7,0,
8,0,
9,1,


In [105]:
#df_res['now'] = df_
df_res['next'] = df_next
df_res

Unnamed: 0,0,now,next
0,1,0.0,1.0
1,1,1.0,1.0
2,0,0.0,0.0
3,0,0.0,0.0
4,0,0.0,1.0
5,1,1.0,1.0
6,0,1.0,1.0
7,0,,1.0
8,0,,
9,1,,


In [106]:
df_res = df_res.drop(columns = [0])

In [107]:
df_res.dropna(subset='next')

Unnamed: 0,now,next
0,0.0,1.0
1,1.0,1.0
2,0.0,0.0
3,0.0,0.0
4,0.0,1.0
5,1.0,1.0
6,1.0,1.0
7,,1.0
