# 01. BQuant Data Module Testing

Тестирование функций загрузки, обработки и валидации данных из модуля `bquant.data`.

## План тестирования:
1. **Настройка окружения** - импорты и установка директории данных
2. **Тестирование loader** - загрузка CSV файлов  
3. **Обзор доступных данных** - символы и таймфреймы

In [1]:
# Настройка окружения
import sys
from pathlib import Path
import pandas as pd
import numpy as np

# Добавляем путь к пакету bquant
project_root = Path.cwd().parent.parent
sys.path.insert(0, str(project_root))

print(f"📁 Project root: {project_root}")
print(f"🐍 Python path: {sys.path[0]}")
print(f"📊 Pandas version: {pd.__version__}")

📁 Project root: c:\Users\Ivan\Documents\pro\bquant
🐍 Python path: c:\Users\Ivan\Documents\pro\bquant
📊 Pandas version: 2.3.1


In [2]:
from bquant.core.logging_config import setup_logging

setup_logging(profile="research")

16:16:26 - bquant - INFO - Система логгирования BQuant инициализирована (консоль: INFO)
16:16:26 - bquant - INFO - Логи сохраняются в файл: c:\Users\Ivan\Documents\pro\bquant\logs\bquant.log (уровень: INFO)


<Logger bquant (DEBUG)>

In [3]:
# Импорты из bquant
from bquant.core.config import (
    get_data_dir, set_data_dir, set_data_dir, reset_directories_to_defaults, PROJECT_ROOT
)

from bquant.data.loader import (
    load_ohlcv_data, get_data_info, load_symbol_data,
    get_available_symbols, get_available_timeframes
)

from bquant.data.samples import (
    get_sample_data,
    list_datasets,
    get_dataset_info
)
print("✅ BQuant modules imported successfully")

✅ BQuant modules imported successfully


In [4]:
reset_directories_to_defaults()

In [5]:
print(PROJECT_ROOT)
print(get_data_dir())
set_data_dir(PROJECT_ROOT/"data"/"row")
print(get_data_dir())

c:\Users\Ivan\Documents\pro\bquant
c:\Users\Ivan\Documents\pro\bquant\data
c:\Users\Ivan\Documents\pro\bquant\data\row


## Шаг 1: Тестирование загрузки OANDA_XAUUSD, 60.csv

Загружаем данные OANDA формата с валидацией

In [6]:
symbols = get_available_symbols()
for sym in symbols:
    print(sym, get_available_timeframes(sym))


XAUUSD ['1D', '60', 'H1', 'M15']


In [7]:
dftv = load_symbol_data("XAUUSD","1h",data_source="tradingview",quote_provider="oanda")

In [8]:
dftv.head()

Unnamed: 0_level_0,open,high,low,close,volume,accumulation/distribution,macd,signal,rsi,rsi-based ma,regular bullish,regular bullish label,regular bearish,regular bearish label
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-01-03 06:00:00+07:00,1828.775,1831.835,1828.09,1828.51,687,-532.906542,,,,,,,,
2022-01-03 07:00:00+07:00,1828.51,1831.095,1827.126,1830.37,998,100.492803,,,,,,,,
2022-01-03 08:00:00+07:00,1830.37,1831.646,1827.705,1828.915,1032,-297.800016,,,,,,,,
2022-01-03 09:00:00+07:00,1828.915,1828.955,1825.71,1826.24,737,-794.054254,,,,,,,,
2022-01-03 10:00:00+07:00,1826.24,1826.976,1824.285,1825.82,624,-706.170196,,,,,,,,


In [9]:
dftv_info = get_data_info(dftv)
dftv_info.keys()

dict_keys(['rows', 'columns', 'date_range', 'memory_usage_mb', 'missing_values', 'data_types'])

In [10]:
print(f"\n📊 Data Info:")
print(f"  Rows: {dftv_info['rows']}")
print(f"  Memory usage: {dftv_info['memory_usage_mb']:.2f} MB")
print(f"  Missing values: {dftv_info['missing_values']}")


📊 Data Info:
  Rows: 21357
  Memory usage: 2.44 MB
  Missing values: {'open': 0, 'high': 0, 'low': 0, 'close': 0, 'volume': 0, 'accumulation/distribution': 0, 'macd': 25, 'signal': 33, 'rsi': 14, 'rsi-based ma': 27, 'regular bullish': 21357, 'regular bullish label': 21357, 'regular bearish': 21357, 'regular bearish label': 21357}


In [20]:
dftv.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 21357 entries, 2022-01-03 06:00:00+07:00 to 2025-08-12 13:00:00+07:00
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   open                       21357 non-null  float64
 1   high                       21357 non-null  float64
 2   low                        21357 non-null  float64
 3   close                      21357 non-null  float64
 4   volume                     21357 non-null  int64  
 5   accumulation/distribution  21357 non-null  float64
 6   macd                       21332 non-null  float64
 7   signal                     21324 non-null  float64
 8   rsi                        21343 non-null  float64
 9   rsi-based ma               21330 non-null  float64
 10  regular bullish            0 non-null      float64
 11  regular bullish label      0 non-null      float64
 12  regular bearish            0 non-null      float64
 13 

In [21]:
type(dftv.index)

pandas.core.indexes.datetimes.DatetimeIndex

## Шаг 2: Тестирование загрузки XAUUSDH1.csv

Загружаем данные MetaTrader формата

In [11]:
mt_file = get_data_dir() / "XAUUSDH1.csv"
dfmt = load_ohlcv_data(
            mt_file,
            symbol='XAUUSD',
            timeframe='1h',
            validate_data=True
        )

dfmt_info = get_data_info(dfmt)
print(f"\n📊 Data Info:")
print(f"  Rows: {dfmt_info['rows']}")
print(f"  Memory usage: {dfmt_info['memory_usage_mb']:.2f} MB")
print(f"  Missing values: {dfmt_info['missing_values']}")



📊 Data Info:
  Rows: 85689
  Memory usage: 4.58 MB
  Missing values: {'open': 0, 'high': 0, 'low': 0, 'close': 0, 'volume': 0, 'col_6': 0}


In [17]:
dfmt.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 85689 entries, 2000-06-20 00:00:00 to 2025-08-22 16:00:00
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    85689 non-null  float64
 1   high    85689 non-null  float64
 2   low     85689 non-null  float64
 3   close   85689 non-null  float64
 4   volume  85689 non-null  int64  
 5   col_6   85689 non-null  int64  
dtypes: float64(4), int64(2)
memory usage: 4.6 MB


In [18]:
type(dfmt.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [19]:
dfmt.head()

Unnamed: 0_level_0,open,high,low,close,volume,col_6
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-06-20,285.25,288.1,284.85,286.75,501,0
2000-06-21,285.75,288.05,285.1,286.4,526,0
2000-06-22,285.65,289.95,284.4,285.9,1086,0
2000-06-23,285.05,287.15,282.2,283.4,826,0
2000-06-26,282.9,285.2,281.55,284.2,601,0


# Работа с встроенными sample-данными

In [12]:
# list_datasets()

In [13]:
# get_dataset_info('tv_xauusd_1h')

In [14]:
df_sample_tv = get_sample_data('tv_xauusd_1h')
df_sample_tv.head()

Unnamed: 0,time,open,high,low,close,volume,accumulation_distribution,macd,signal,rsi,rsi_based_ma,regular_bullish,regular_bullish_label,regular_bearish,regular_bearish_label
0,2025-06-11 20:00:00+07:00,3336.94,3344.77,3327.95,3330.0,54323.0,6642770.0,1.940145,2.765371,47.827521,55.231967,,,,
1,2025-06-11 21:00:00+07:00,3329.98,3341.31,3328.575,3337.635,37324.0,6658553.0,2.0303,2.618357,53.656385,55.138886,,,,
2,2025-06-11 22:00:00+07:00,3337.715,3342.93,3329.655,3333.22,22725.0,6648033.0,1.725604,2.439806,50.166113,55.035372,,,,
3,2025-06-11 23:00:00+07:00,3333.265,3334.57,3328.185,3330.39,16495.0,6642931.0,1.241461,2.200137,48.010293,54.292673,,,,
4,2025-06-12 00:00:00+07:00,3330.41,3331.435,3319.38,3324.365,21476.0,6639217.0,0.367373,1.833584,43.704225,53.189298,,,,


In [16]:
df_sample_tv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 15 columns):
 #   Column                     Non-Null Count  Dtype                    
---  ------                     --------------  -----                    
 0   time                       1000 non-null   datetime64[ns, UTC+07:00]
 1   open                       1000 non-null   float64                  
 2   high                       1000 non-null   float64                  
 3   low                        1000 non-null   float64                  
 4   close                      1000 non-null   float64                  
 5   volume                     1000 non-null   float64                  
 6   accumulation_distribution  1000 non-null   float64                  
 7   macd                       1000 non-null   float64                  
 8   signal                     1000 non-null   float64                  
 9   rsi                        1000 non-null   float64                  
 10  r