# Notebook for API debug

## Coinlore API -> [website](https://www.coinlore.com/cryptocurrency-data-api)

In [None]:
import requests

class Client:

    def getglobal(self):
        '''
        Get global crypto statistics, including the total count of coins, overall market capitalization, BTC dominance, total trading volume, ATH market capitalization, and more
        '''
        self.r = requests.get('https://api.coinlore.com/api/global/')
        return self.r.json()[0]

    def getcoin(self, coin):
        '''
        coin: ID
        Retrieve tick data for specific coin, you should pass ID which will be given by /api/tickers/ endpoint, data includes details such as name, ID, symbol, price, price change, market cap, volume, and supply for each ticker.
        '''
        self.r = requests.get('https://api.coinlore.com/api/ticker/?id=' + str(coin))
        return self.r.json()[0]

    def getcoins(self, start='0', limit='100'):
        '''
        start: str numeric - start index of coins sorted by market cap
        limit: str numeric - limit number of coins to return
        MAXIMUM LIMIT: 100 per request
        
        Retrieve tick data for multiple crypto coins, sorted by market cap. The data includes details such as name, ID, symbol, price, price change, market cap, volume, and supply for each ticker.
        '''
        self.r = requests.get('https://api.coinlore.com/api/tickers/?start=' + str(start) + '&limit=' + str(limit))
        return self.r.json()

    def getmarkets(self, coin):
        self.r = requests.get('https://api.coinlore.com/api/coin/markets/?id=' + str(coin))
        return self.r.json()

    def getsocial(self, coin):
        self.r = requests.get('https://api.coinlore.com/api/coin/social_stats/?id=' + str(coin))
        return self.r.json()

In [None]:
from coinlore.client import Client

client = Client()

#Get global market info
print(client.getglobal())

#Get Bitcoin Info (Bitcoin)
print(client.getcoin(90))

#Get coins from 0 to 100
print(client.getcoins("0", "100"))

#Get coin markets (Bitcoin)
print(client.getmarkets(90))

#Get social stats (Ethereum)
print(client.getsocial(80))


{'coins_count': 14542, 'active_markets': 36086, 'total_mcap': 3452793498816.4067, 'total_volume': 120001669176.84729, 'btc_d': '62.77', 'eth_d': '9.17', 'mcap_change': '0.83', 'volume_change': '-2.08', 'avg_change_percent': '0.19', 'volume_ath': 344187126292428700, 'mcap_ath': 33242498693028.46}
{'id': '90', 'symbol': 'BTC', 'name': 'Bitcoin', 'nameid': 'bitcoin', 'rank': 1, 'price_usd': '109079.33', 'percent_change_24h': '0.28', 'percent_change_1h': '0.22', 'percent_change_7d': '3.29', 'price_btc': '1.00', 'market_cap_usd': '2167032982582.20', 'volume24': 24249258801.352173, 'volume24a': 26400008140.4453, 'csupply': '19866577.00', 'tsupply': '19866577', 'msupply': '21000000'}
{'data': [{'id': '90', 'symbol': 'BTC', 'name': 'Bitcoin', 'nameid': 'bitcoin', 'rank': 1, 'price_usd': '109079.33', 'percent_change_24h': '0.28', 'percent_change_1h': '0.22', 'percent_change_7d': '3.29', 'price_btc': '1.00', 'market_cap_usd': '2167032982582.20', 'volume24': 24249258801.352173, 'volume24a': 26400

# Alpha Vantage <- only 1 hours interval. 1min for premium

In [None]:
from alpha_vantage.cryptocurrencies import CryptoCurrencies
import matplotlib.pyplot as plt

API_KEY = ''
CRYPTO_SYMBOL = 'BTC'
MARKET = 'USD'  
INTERVAL = '1min'  
OUTPUT_SIZE = 'compact' 

cc = CryptoCurrencies(
    key=API_KEY,
    output_format='pandas',
    indexing_type='date'     
)

data, meta_data = cc.get_digital_currency_daily(
    symbol=CRYPTO_SYMBOL,
    market=MARKET,
    # interval=INTERVAL,
    # outputsize=OUTPUT_SIZE
)


print("Метаданные:")
print(meta_data)
print("\nПоследние 5 записей:")
print(data.head())



Метаданные:
{'1. Information': 'Daily Prices and Volumes for Digital Currency', '2. Digital Currency Code': 'BTC', '3. Digital Currency Name': 'Bitcoin', '4. Market Code': 'USD', '5. Market Name': 'United States Dollar', '6. Last Refreshed': '2025-07-09 00:00:00', '7. Time Zone': 'UTC'}

Последние 5 записей:
              1. open    2. high     3. low   4. close    5. volume
date                                                               
2025-07-09  108953.58  108987.73  108806.75  108849.69    35.528228
2025-07-08  108271.49  109255.99  107438.33  108958.04  3785.390742
2025-07-07  109217.98  109741.64  107507.00  108269.84  4455.083530
2025-07-06  108246.66  109736.64  107837.70  109217.98  1651.181202
2025-07-05  108028.60  108454.19  107783.10  108246.65  1513.154286


KeyError: '4b. close (USD)'

# Binance API <- храни тебя господь родной

In [None]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import json
import os

class BinanceDataCollector:
    def __init__(self):
        self.base_url = "https://api.binance.com/api/v3"
        self.session = requests.Session()
        
    def get_klines(self, symbol, interval, start_time, end_time, limit=1000):
        """
        Получает данные свечей с Binance API
        
        Args:
            symbol: торговая пара (например, 'BTCUSDT')
            interval: интервал ('1m', '5m', '1h', '1d')
            start_time: начальное время (timestamp в миллисекундах)
            end_time: конечное время (timestamp в миллисекундах)
            limit: количество свечей за запрос (макс 1000)
        """
        url = f"{self.base_url}/klines"
        params = {
            'symbol': symbol,
            'interval': interval,
            'startTime': start_time,
            'endTime': end_time,
            'limit': limit
        }
        
        try:
            response = self.session.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Ошибка при запросе данных: {e}")
            return None
    
    def collect_historical_data(self, symbol, interval, days_back=730):
        """
        Собирает исторические данные за указанный период
        
        Args:
            symbol: торговая пара
            interval: интервал
            days_back: количество дней назад (по умолчанию 730 = 2 года)
        """
        print(f"Начинаем сбор данных для {symbol} с интервалом {interval}")
        
        end_time = datetime.now()
        start_time = end_time - timedelta(days=days_back)
        
        start_timestamp = int(start_time.timestamp() * 1000)
        end_timestamp = int(end_time.timestamp() * 1000)
        
        all_data = []
        current_start = start_timestamp
        
        interval_ms = self._get_interval_ms(interval)
        
        while current_start < end_timestamp:
            current_end = min(current_start + (1000 * interval_ms), end_timestamp)
            
            print(f"Загружаем данные с {datetime.fromtimestamp(current_start/1000)} по {datetime.fromtimestamp(current_end/1000)}")
            
            klines = self.get_klines(symbol, interval, current_start, current_end)
            if klines:
                all_data.extend(klines)
                print(f"Получено {len(klines)} свечей")
            else:
                print("Ошибка при получении данных")
                break
            
            current_start = current_end + interval_ms
            
            time.sleep(0.1)
        
        return all_data
    
    def _get_interval_ms(self, interval):
        """Конвертирует интервал в миллисекунды"""
        intervals = {
            '1m': 60 * 1000,
            '3m': 3 * 60 * 1000,
            '5m': 5 * 60 * 1000,
            '15m': 15 * 60 * 1000,
            '30m': 30 * 60 * 1000,
            '1h': 60 * 60 * 1000,
            '2h': 2 * 60 * 60 * 1000,
            '4h': 4 * 60 * 60 * 1000,
            '6h': 6 * 60 * 60 * 1000,
            '8h': 8 * 60 * 60 * 1000,
            '12h': 12 * 60 * 60 * 1000,
            '1d': 24 * 60 * 60 * 1000,
            '3d': 3 * 24 * 60 * 60 * 1000,
            '1w': 7 * 24 * 60 * 60 * 1000,
            '1M': 30 * 24 * 60 * 60 * 1000
        }
        return intervals.get(interval, 60 * 1000)
    
    def process_data_to_dataframe(self, raw_data):
        """
        Преобразует сырые данные в pandas DataFrame
        """
        if not raw_data:
            return pd.DataFrame()
        
        df = pd.DataFrame(raw_data, columns=[
            'open_time', 'open', 'high', 'low', 'close', 'volume',
            'close_time', 'quote_asset_volume', 'number_of_trades',
            'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
        ])
        
        df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
        df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
        
        price_volume_columns = ['open', 'high', 'low', 'close', 'volume', 
                               'quote_asset_volume', 'taker_buy_base_asset_volume', 
                               'taker_buy_quote_asset_volume']
        
        for col in price_volume_columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        
        df['number_of_trades'] = pd.to_numeric(df['number_of_trades'], errors='coerce')
        
        df = df.drop('ignore', axis=1)
        
        df = df.sort_values('open_time').reset_index(drop=True)
        
        return df
    
    def save_data(self, df, filename):
        """Сохраняет данные в CSV файл"""
        os.makedirs('data', exist_ok=True)
        filepath = f'data/{filename}'
        df.to_csv(filepath, index=False)
        print(f"Данные сохранены в {filepath}")
    
    def load_data(self, filename):
        """Загружает данные из CSV файла"""
        filepath = f'data/{filename}'
        if os.path.exists(filepath):
            df = pd.read_csv(filepath)
            df['open_time'] = pd.to_datetime(df['open_time'])
            df['close_time'] = pd.to_datetime(df['close_time'])
            return df
        else:
            print(f"Файл {filepath} не найден")
            return None

def main():
    collector = BinanceDataCollector()
    
    symbol = 'BTCUSDT'  
    interval = '5m'     
    days_back = 365     # Days back 
    
    print("Начинаем сбор данных...")
    raw_data = collector.collect_historical_data(symbol, interval, days_back)
    
    if raw_data:
        print(f"Собрано {len(raw_data)} свечей")
        
        df = collector.process_data_to_dataframe(raw_data)
        
        print("\nИнформация о данных:")
        print(f"Период: с {df['open_time'].min()} по {df['open_time'].max()}")
        print(f"Количество записей: {len(df)}")
        print(f"Размер данных: {df.shape}")
        
        print("\nПервые 5 записей:")
        print(df.head())
        
        filename = f'{symbol}_{interval}_{days_back}days.csv'
        collector.save_data(df, filename)
        
        print("\nБазовая статистика по ценам:")
        print(df[['open', 'high', 'low', 'close', 'volume']].describe())
    
    else:
        print("Не удалось собрать данные")

def collect_multiple_coins():
    collector = BinanceDataCollector()
    
    symbols = ['BTCUSDT', 'ETHUSDT', 'ADAUSDT', 'DOTUSDT']
    interval = '5m'  
    days_back = 365  
    
    for symbol in symbols:
        print(f"\n{'='*50}")
        print(f"Собираем данные для {symbol}")
        print(f"{'='*50}")
        
        raw_data = collector.collect_historical_data(symbol, interval, days_back)
        
        if raw_data:
            df = collector.process_data_to_dataframe(raw_data)
            filename = f'{symbol}_{interval}_{days_back}days.csv'
            collector.save_data(df, filename)
            
            print(f"✅ Данные для {symbol} сохранены: {len(df)} записей")
        else:
            print(f"❌ Ошибка при сборе данных для {symbol}")
        
        time.sleep(1)

if __name__ == "__main__":
    main()
    
    # Раскомментируйте для сбора данных по нескольким монетам
    # collect_multiple_coins()

Начинаем сбор данных...
Начинаем сбор данных для BTCUSDT с интервалом 5m
Загружаем данные с 2024-07-09 15:38:57.203000 по 2024-07-13 02:58:57.203000
Получено 1000 свечей
Загружаем данные с 2024-07-13 03:03:57.203000 по 2024-07-16 14:23:57.203000
Получено 1000 свечей
Загружаем данные с 2024-07-16 14:28:57.203000 по 2024-07-20 01:48:57.203000
Получено 1000 свечей
Загружаем данные с 2024-07-20 01:53:57.203000 по 2024-07-23 13:13:57.203000
Получено 1000 свечей
Загружаем данные с 2024-07-23 13:18:57.203000 по 2024-07-27 00:38:57.203000
Получено 1000 свечей
Загружаем данные с 2024-07-27 00:43:57.203000 по 2024-07-30 12:03:57.203000
Получено 1000 свечей
Загружаем данные с 2024-07-30 12:08:57.203000 по 2024-08-02 23:28:57.203000
Получено 1000 свечей
Загружаем данные с 2024-08-02 23:33:57.203000 по 2024-08-06 10:53:57.203000
Получено 1000 свечей
Загружаем данные с 2024-08-06 10:58:57.203000 по 2024-08-09 22:18:57.203000
Получено 1000 свечей
Загружаем данные с 2024-08-09 22:23:57.203000 по 2024-

# Applying TA to data

In [3]:
import sys
import os
sys.path.insert(0, "C:/Users/shari/PycharmProjects/StockPrediction")

In [4]:
from graphsParsing import BinanceDataCollector
collector = BinanceDataCollector()

In [6]:
symbol = 'BTCUSDT'  
interval = '5m'     
days_back = 1

raw_data = collector.collect_historical_data(symbol, interval, days_back)
raw_data

[[1752237000000,
  '117751.04000000',
  '117902.31000000',
  '117750.29000000',
  '117870.16000000',
  '90.17243000',
  1752237299999,
  '10623686.63666800',
  10477,
  '69.13522000',
  '8145141.50692370',
  '0'],
 [1752237300000,
  '117870.16000000',
  '117873.18000000',
  '117803.55000000',
  '117819.76000000',
  '72.71120000',
  1752237599999,
  '8568062.83274400',
  9249,
  '51.62790000',
  '6083584.28564520',
  '0'],
 [1752237600000,
  '117819.75000000',
  '117895.18000000',
  '117750.01000000',
  '117762.86000000',
  '67.47368000',
  1752237899999,
  '7950193.55067780',
  8869,
  '29.72966000',
  '3502933.18625280',
  '0'],
 [1752237900000,
  '117762.86000000',
  '117819.71000000',
  '117741.94000000',
  '117752.01000000',
  '56.48698000',
  1752238199999,
  '6652399.04821020',
  9674,
  '36.82332000',
  '4336645.63188610',
  '0'],
 [1752238200000,
  '117752.00000000',
  '117869.21000000',
  '117752.00000000',
  '117790.66000000',
  '95.91928000',
  1752238499999,
  '11299060.243

In [11]:
raw_data.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume
0,2024-07-09 12:40:00,57269.87,57340.28,57269.86,57312.01,199.42157,2024-07-09 12:44:59.999,11427200.0,8996,97.54525,5589154.0
1,2024-07-09 12:45:00,57312.01,57473.8,57304.0,57471.38,282.96334,2024-07-09 12:49:59.999,16234270.0,7193,157.24835,9022225.0
2,2024-07-09 12:50:00,57471.38,57560.0,57394.0,57428.15,339.54283,2024-07-09 12:54:59.999,19514320.0,9051,167.57547,9632002.0
3,2024-07-09 12:55:00,57428.15,57428.15,57307.2,57338.01,127.04527,2024-07-09 12:59:59.999,7285382.0,6138,69.1135,3963107.0
4,2024-07-09 13:00:00,57338.01,57378.0,57280.83,57290.41,153.66697,2024-07-09 13:04:59.999,8811832.0,6321,81.70153,4684797.0


In [37]:
from finta import TA
import pandas as pd
import numpy as np
raw_data.shape[0]

105015

In [20]:
check = TA.SMA(raw_data, 42)
check.isna().sum()

np.int64(41)

In [None]:
# All indicators

# * Simple Moving Average 'SMA'
# * Simple Moving Median 'SMM'
# * Smoothed Simple Moving Average 'SSMA'
# * Exponential Moving Average 'EMA'
# * Double Exponential Moving Average 'DEMA'
# * Triple Exponential Moving Average 'TEMA'
# * Triangular Moving Average 'TRIMA'
# * Triple Exponential Moving Average Oscillator 'TRIX'
# * Volume Adjusted Moving Average 'VAMA'
# * Kaufman Efficiency Indicator 'ER'
# * Kaufman's Adaptive Moving Average 'KAMA'
# * Zero Lag Exponential Moving Average 'ZLEMA'
# * Weighted Moving Average 'WMA'
# * Hull Moving Average 'HMA'
# * Elastic Volume Moving Average 'EVWMA'
# * Volume Weighted Average Price 'VWAP'
# * Smoothed Moving Average 'SMMA'
# * Fractal Adaptive Moving Average 'FRAMA'
# * Moving Average Convergence Divergence 'MACD'
# * Percentage Price Oscillator 'PPO'
# * Volume-Weighted MACD 'VW_MACD'
# * Elastic-Volume weighted MACD 'EV_MACD'
# * Market Momentum 'MOM'
# * Rate-of-Change 'ROC'
# * Relative Strenght Index 'RSI'
# * Inverse Fisher Transform RSI 'IFT_RSI'
# * True Range 'TR'
# * Average True Range 'ATR'
# * Stop-and-Reverse 'SAR'
# * Bollinger Bands 'BBANDS'
# * Bollinger Bands Width 'BBWIDTH'
# * Momentum Breakout Bands 'MOBO'
# * Percent B 'PERCENT_B'
# * Keltner Channels 'KC'
# * Donchian Channel 'DO'
# * Directional Movement Indicator 'DMI'
# * Average Directional Index 'ADX'
# * Pivot Points 'PIVOT'
# * Fibonacci Pivot Points 'PIVOT_FIB'
# * Stochastic Oscillator %K 'STOCH'
# * Stochastic oscillator %D 'STOCHD'
# * Stochastic RSI 'STOCHRSI'
# * Williams %R 'WILLIAMS'
# * Ultimate Oscillator 'UO'
# * Awesome Oscillator 'AO'
# * Mass Index 'MI'
# * Vortex Indicator 'VORTEX'
# * Know Sure Thing 'KST'
# * True Strength Index 'TSI'
# * Typical Price 'TP'
# * Accumulation-Distribution Line 'ADL'
# * Chaikin Oscillator 'CHAIKIN'
# * Money Flow Index 'MFI'
# * On Balance Volume 'OBV'
# * Weighter OBV 'WOBV'
# * Volume Zone Oscillator 'VZO'
# * Price Zone Oscillator 'PZO'
# * Elder's Force Index 'EFI'
# * Cummulative Force Index 'CFI'
# * Bull power and Bear Power 'EBBP'
# * Ease of Movement 'EMV'
# * Commodity Channel Index 'CCI'
# * Coppock Curve 'COPP'
# * Buy and Sell Pressure 'BASP'
# * Normalized BASP 'BASPN'
# * Chande Momentum Oscillator 'CMO'
# * Chandelier Exit 'CHANDELIER'
# * Qstick 'QSTICK'
# * Twiggs Money Index 'TMF'
# * Wave Trend Oscillator 'WTO'
# * Fisher Transform 'FISH'
# * Ichimoku Cloud 'ICHIMOKU'
# * Adaptive Price Zone 'APZ'
# * Squeeze Momentum Indicator 'SQZMI'
# * Volume Price Trend 'VPT'
# * Finite Volume Element 'FVE'
# * Volume Flow Indicator 'VFI'
# * Moving Standard deviation 'MSD'
# * Schaff Trend Cycle 'STC'
# * Mark Whistler's WAVE PM 'WAVEPM'

def add_all_indicators_finta(data) -> pd.DataFrame:
        """
        Добавляет все популярные индикаторы используя finta
        """
            
        result_df = data.copy()
        
        # Скользящие средние
        result_df['SMA_20'] = TA.SMA(result_df, 20)
        result_df['EMA_12'] = TA.EMA(result_df, 12)
        
        # Momentum индикаторы
        result_df['RSI'] = TA.RSI(result_df)
        result_df['MACD'] = TA.MACD(result_df)['MACD']
        result_df['MACD_signal'] = TA.MACD(result_df)['SIGNAL']
        result_df['Stoch_K'] = TA.STOCH(result_df)
        result_df['Williams_R'] = TA.WILLIAMS(result_df)
        
        # Volatility индикаторы
        result_df['ATR'] = TA.ATR(result_df)
        bb = TA.BBANDS(result_df)
        result_df['BB_upper'] = bb['BB_UPPER']
        result_df['BB_middle'] = bb['BB_MIDDLE']
        result_df['BB_lower'] = bb['BB_LOWER']
        
        # Volume индикаторы
        result_df['OBV'] = TA.OBV(result_df)
        
        return result_df

In [None]:
processed = add_all_indicators_finta(raw_data)

In [25]:
processed.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,...,RSI,MACD,MACD_signal,Stoch_K,Williams_R,ATR,BB_upper,BB_middle,BB_lower,OBV
0,2024-07-09 12:40:00,57269.87,57340.28,57269.86,57312.01,199.42157,2024-07-09 12:44:59.999,11427200.0,8996,97.54525,...,,0.0,0.0,,,,,,,
1,2024-07-09 12:45:00,57312.01,57473.8,57304.0,57471.38,282.96334,2024-07-09 12:49:59.999,16234270.0,7193,157.24835,...,100.0,3.575609,1.986449,,,,,,,282.96334
2,2024-07-09 12:50:00,57471.38,57560.0,57394.0,57428.15,339.54283,2024-07-09 12:54:59.999,19514320.0,9051,167.57547,...,77.39211,3.215476,2.490149,,,,,,,-56.57949
3,2024-07-09 12:55:00,57428.15,57428.15,57307.2,57338.01,127.04527,2024-07-09 12:59:59.999,7285382.0,6138,69.1135,...,51.332439,-0.281593,1.551212,,,,,,,-183.62476
4,2024-07-09 13:00:00,57338.01,57378.0,57280.83,57290.41,153.66697,2024-07-09 13:04:59.999,8811832.0,6321,81.70153,...,43.082556,-4.070034,-0.120982,,,,,,,-337.29173


In [31]:
processed.isna().sum()

open_time                         0
open                              0
high                              0
low                               0
close                             0
volume                            0
close_time                        0
quote_asset_volume                0
number_of_trades                  0
taker_buy_base_asset_volume       0
taker_buy_quote_asset_volume      0
SMA_20                           19
EMA_12                            0
RSI                               1
MACD                              0
MACD_signal                       0
Stoch_K                          13
Williams_R                       13
ATR                              13
BB_upper                         19
BB_middle                        19
BB_lower                         19
OBV                             229
dtype: int64

In [None]:
processed[processed['OBV'].isna() == True]

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,...,RSI,MACD,MACD_signal,Stoch_K,Williams_R,ATR,BB_upper,BB_middle,BB_lower,OBV
0,2024-07-09 12:40:00,57269.87,57340.28,57269.86,57312.01,199.42157,2024-07-09 12:44:59.999,1.142720e+07,8996,97.54525,...,,0.000000,0.000000,,,,,,,
1055,2024-07-13 04:40:00,57779.26,57779.26,57779.25,57779.26,6.76736,2024-07-13 04:44:59.999,3.910130e+05,1062,4.60155,...,40.305935,-23.878766,-17.135502,6.981216,-93.018784,41.585000,57932.799927,57835.9100,57739.020073,
1260,2024-07-13 21:45:00,58644.84,58644.85,58644.84,58644.85,4.46686,2024-07-13 21:49:59.999,2.619583e+05,708,2.50864,...,52.228154,6.884237,0.842229,58.954545,-41.045455,33.312857,58692.880689,58619.8135,58546.746311,
1670,2024-07-15 07:55:00,62840.11,62893.02,62825.40,62840.12,68.68153,2024-07-15 07:59:59.999,4.316560e+06,3827,33.63522,...,50.516389,35.330387,50.442306,27.846154,-72.153846,108.736429,63000.441435,62880.4395,62760.437565,
1868,2024-07-16 00:25:00,64576.10,64620.00,64576.10,64576.11,138.09278,2024-07-16 00:29:59.999,8.920337e+06,8311,92.66467,...,54.438168,158.580253,204.880345,19.464199,-80.535801,171.719286,64971.706096,64608.3310,64244.955904,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104138,2025-07-06 11:30:00,107993.28,107993.29,107993.28,107993.28,4.17248,2025-07-06 11:34:59.999,4.505998e+05,867,3.22161,...,52.181051,-1.338280,-4.703879,28.908751,-71.091249,28.417143,108058.210463,107966.8630,107875.515537,
104146,2025-07-06 12:10:00,108053.99,108054.00,108053.99,108054.00,4.23537,2025-07-06 12:14:59.999,4.576487e+05,724,2.31107,...,60.135234,20.447640,12.849633,74.382005,-25.617995,15.439286,108087.608647,108016.3390,107945.069353,
104569,2025-07-07 23:25:00,108179.76,108179.77,108179.76,108179.76,7.06673,2025-07-07 23:29:59.999,7.644772e+05,666,2.78064,...,58.637415,37.090276,22.290692,78.304106,-21.695894,51.567857,108232.692236,108085.4850,107938.277764,
104592,2025-07-08 01:20:00,108019.24,108096.00,108019.23,108019.24,17.28469,2025-07-08 01:24:59.999,1.867818e+06,5797,10.25004,...,38.484733,-12.640261,31.410838,0.001977,-99.998023,125.846429,108497.954523,108268.3505,108038.746477,


In [41]:
processed.shape[0]

105015

In [None]:
ids = np.arange(processed.shape[0])[processed['OBV'].isna() == True]
prev_ids = ids - 1
prev_ids[0] = 0
next_ids = ids + 1

In [54]:
ids.shape, prev_ids.shape, next_ids.shape

((229,), (229,), (229,))

In [57]:
ids[:10], prev_ids[:10], next_ids[:10]

(array([   0, 1055, 1260, 1670, 1868, 2126, 2195, 3721, 4052, 4182]),
 array([   0, 1054, 1259, 1669, 1867, 2125, 2194, 3720, 4051, 4181]),
 array([   1, 1056, 1261, 1671, 1869, 2127, 2196, 3722, 4053, 4183]))

In [71]:
processed['OBV'].iloc[prev_ids]

0                  NaN
1054       3580.576210
1259       5073.810690
1669      11154.780730
1867      16115.172660
              ...     
104137    20601.865536
104145    20599.669736
104568    21019.829556
104591    21049.859726
104861    21106.825026
Name: OBV, Length: 229, dtype: float64

In [64]:
processed['OBV'].iloc[next_ids]

1           282.963340
1056       3611.105800
1261       5083.032010
1671      11223.904030
1869      16016.767220
              ...     
104139    20606.084886
104147    20614.538746
104570    21036.865826
104593    21070.268606
104863    21089.247946
Name: OBV, Length: 229, dtype: float64

In [69]:
avg = (processed['OBV'].iloc[prev_ids].values + processed['OBV'].iloc[next_ids].values) / 2
processed.loc[ids, 'OBV'] = avg

In [70]:
processed['OBV'].isna().sum()

np.int64(9)

In [75]:
na_mx = max(processed.isna().sum())
na_mx

19

In [79]:
processed.iloc[na_mx + 1:].fillna(method='bfill')

  processed.iloc[na_mx + 1:].fillna(method='bfill')


Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,...,RSI,MACD,MACD_signal,Stoch_K,Williams_R,ATR,BB_upper,BB_middle,BB_lower,OBV
20,2024-07-09 14:20:00,57618.01,57746.76,57279.99,57424.18,551.47027,2024-07-09 14:24:59.999,3.172668e+07,25331,259.98718,...,52.863768,40.685335,10.932361,41.630267,-58.369733,261.870000,57672.657408,57327.5270,56982.396592,-1777.869360
21,2024-07-09 14:25:00,57424.18,57646.00,57283.29,57590.64,245.66025,2024-07-09 14:29:59.999,1.411941e+07,13029,130.75776,...,57.748183,50.351471,18.874788,55.374910,-44.625090,281.707857,57692.910545,57333.4900,56974.069455,-1532.209110
22,2024-07-09 14:30:00,57590.63,57668.45,57471.98,57657.12,212.38178,2024-07-09 14:34:59.999,1.223064e+07,13551,102.08074,...,59.550919,61.767608,27.504291,60.864180,-39.135820,288.143571,57730.677533,57344.9385,56959.199467,-1319.827330
23,2024-07-09 14:35:00,57657.12,57696.14,57560.00,57640.00,117.20424,2024-07-09 14:39:59.999,6.755750e+06,9152,58.29276,...,58.854511,68.728266,35.788206,59.450578,-40.549422,288.855000,57767.656951,57360.0380,56952.419049,-1437.031570
24,2024-07-09 14:40:00,57639.99,57697.80,57545.68,57631.60,78.23097,2024-07-09 14:44:59.999,4.510249e+06,6176,45.11485,...,58.493067,72.749152,43.208428,58.756988,-41.243012,286.812857,57800.692410,57377.0975,56953.502590,-1515.262540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105010,2025-07-09 12:15:00,109388.01,109455.00,109373.64,109455.00,70.32726,2025-07-09 12:19:59.999,7.694696e+06,9147,35.95689,...,81.000254,131.026739,95.814464,100.000000,-0.000000,84.287857,109369.834385,109044.8920,108719.949615,21528.945046
105011,2025-07-09 12:20:00,109455.00,109488.22,109400.00,109488.21,62.11457,2025-07-09 12:24:59.999,6.798150e+06,7155,34.03593,...,81.864314,147.465812,106.144734,99.998210,-0.001790,88.629286,109444.912415,109076.0560,108707.199585,21591.059616
105012,2025-07-09 12:25:00,109488.22,109500.00,109445.92,109493.99,86.02215,2025-07-09 12:29:59.999,9.417819e+06,6549,40.91945,...,82.017594,159.125989,116.740985,98.946150,-1.053850,89.479286,109507.845283,109106.6590,108705.472717,21677.081766
105013,2025-07-09 12:30:00,109493.98,109498.76,109444.23,109498.72,53.02305,2025-07-09 12:34:59.999,5.805158e+06,7931,24.34970,...,82.150547,166.825376,126.757863,99.775553,-0.224447,88.120000,109557.838999,109138.8835,108719.928001,21730.104816


In [80]:
final_data = processed.iloc[na_mx + 1:].bfill()

In [81]:
final_data.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,...,RSI,MACD,MACD_signal,Stoch_K,Williams_R,ATR,BB_upper,BB_middle,BB_lower,OBV
20,2024-07-09 14:20:00,57618.01,57746.76,57279.99,57424.18,551.47027,2024-07-09 14:24:59.999,31726680.0,25331,259.98718,...,52.863768,40.685335,10.932361,41.630267,-58.369733,261.87,57672.657408,57327.527,56982.396592,-1777.86936
21,2024-07-09 14:25:00,57424.18,57646.0,57283.29,57590.64,245.66025,2024-07-09 14:29:59.999,14119410.0,13029,130.75776,...,57.748183,50.351471,18.874788,55.37491,-44.62509,281.707857,57692.910545,57333.49,56974.069455,-1532.20911
22,2024-07-09 14:30:00,57590.63,57668.45,57471.98,57657.12,212.38178,2024-07-09 14:34:59.999,12230640.0,13551,102.08074,...,59.550919,61.767608,27.504291,60.86418,-39.13582,288.143571,57730.677533,57344.9385,56959.199467,-1319.82733
23,2024-07-09 14:35:00,57657.12,57696.14,57560.0,57640.0,117.20424,2024-07-09 14:39:59.999,6755750.0,9152,58.29276,...,58.854511,68.728266,35.788206,59.450578,-40.549422,288.855,57767.656951,57360.038,56952.419049,-1437.03157
24,2024-07-09 14:40:00,57639.99,57697.8,57545.68,57631.6,78.23097,2024-07-09 14:44:59.999,4510249.0,6176,45.11485,...,58.493067,72.749152,43.208428,58.756988,-41.243012,286.812857,57800.69241,57377.0975,56953.50259,-1515.26254
