## Data Preparation

The first step is to get the data. 

In this example, let us get the data from binance.com. We can use the binance python api.

https://python-binance.readthedocs.io/en/latest/

In [6]:
import pandas as pd
import numpy as np

In [1]:
from binance.client import Client
client = Client()

In [2]:
info = client.get_symbol_info('BTCUSDT')
info

{'symbol': 'BTCUSDT',
 'status': 'TRADING',
 'baseAsset': 'BTC',
 'baseAssetPrecision': 8,
 'quoteAsset': 'USDT',
 'quotePrecision': 8,
 'quoteAssetPrecision': 8,
 'baseCommissionPrecision': 8,
 'quoteCommissionPrecision': 8,
 'orderTypes': ['LIMIT',
  'LIMIT_MAKER',
  'MARKET',
  'STOP_LOSS_LIMIT',
  'TAKE_PROFIT_LIMIT'],
 'icebergAllowed': True,
 'ocoAllowed': True,
 'quoteOrderQtyMarketAllowed': True,
 'isSpotTradingAllowed': True,
 'isMarginTradingAllowed': True,
 'filters': [{'filterType': 'PRICE_FILTER',
   'minPrice': '0.01000000',
   'maxPrice': '1000000.00000000',
   'tickSize': '0.01000000'},
  {'filterType': 'PERCENT_PRICE',
   'multiplierUp': '5',
   'multiplierDown': '0.2',
   'avgPriceMins': 5},
  {'filterType': 'LOT_SIZE',
   'minQty': '0.00001000',
   'maxQty': '9000.00000000',
   'stepSize': '0.00001000'},
  {'filterType': 'MIN_NOTIONAL',
   'minNotional': '10.00000000',
   'applyToMarket': True,
   'avgPriceMins': 5},
  {'filterType': 'ICEBERG_PARTS', 'limit': 10},
  

In [9]:
klines = client.get_historical_klines("BTCUSDT", Client.KLINE_INTERVAL_1DAY, "1 Jan, 2020")
len(klines)

690

In [10]:
bitcoin_df = pd.DataFrame(klines)
bitcoin_df.columns = ["open_time", "open", "high", "low", "close", "volume", "close_time", "quote", "no_trades", "base_buy", "quote_buy", "ignore"]
bitcoin_df

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote,no_trades,base_buy,quote_buy,ignore
0,1577836800000,7195.24000000,7255.00000000,7175.15000000,7200.85000000,16792.38816500,1577923199999,121214452.11606228,194010,8946.95553500,64597785.21233434,0
1,1577923200000,7200.77000000,7212.50000000,6924.74000000,6965.71000000,31951.48393200,1578009599999,225982341.30114030,302667,15141.61134000,107060829.07806464,0
2,1578009600000,6965.49000000,7405.00000000,6871.04000000,7344.96000000,68428.50045100,1578095999999,495098582.96203543,519854,35595.49627300,257713113.85172859,0
3,1578096000000,7345.00000000,7404.00000000,7272.21000000,7354.11000000,29987.97497700,1578182399999,219874240.93994811,279370,16369.38224800,120035111.72407165,0
4,1578182400000,7354.19000000,7495.00000000,7318.00000000,7358.75000000,38331.08560400,1578268799999,284848683.78917621,329209,19455.36956400,144600094.38965074,0
...,...,...,...,...,...,...,...,...,...,...,...,...
685,1637020800000,63606.73000000,63617.31000000,58574.07000000,60058.87000000,77455.15609000,1637107199999,4705744041.29964738,2353681,36375.32415000,2209839153.36972330,0
686,1637107200000,60058.87000000,60840.23000000,58373.00000000,60344.87000000,46289.38491000,1637193599999,2767802055.20042280,1471305,22545.24375000,1348537946.95269170,0
687,1637193600000,60344.86000000,60976.00000000,56474.26000000,56891.62000000,62146.99931000,1637279999999,3644824179.14485240,1960419,29742.20507000,1745202989.74238890,0
688,1637280000000,56891.62000000,58320.00000000,55600.00000000,58052.24000000,50715.88726000,1637366399999,2896189975.91077430,1560459,24072.28575000,1374771758.23673160,0


In [11]:
import datetime
bitcoin_df["date"] = bitcoin_df["open_time"].apply(lambda x: datetime.datetime.fromtimestamp(x/1000))
bitcoin_df

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote,no_trades,base_buy,quote_buy,ignore,date
0,1577836800000,7195.24000000,7255.00000000,7175.15000000,7200.85000000,16792.38816500,1577923199999,121214452.11606228,194010,8946.95553500,64597785.21233434,0,2020-01-01 05:30:00
1,1577923200000,7200.77000000,7212.50000000,6924.74000000,6965.71000000,31951.48393200,1578009599999,225982341.30114030,302667,15141.61134000,107060829.07806464,0,2020-01-02 05:30:00
2,1578009600000,6965.49000000,7405.00000000,6871.04000000,7344.96000000,68428.50045100,1578095999999,495098582.96203543,519854,35595.49627300,257713113.85172859,0,2020-01-03 05:30:00
3,1578096000000,7345.00000000,7404.00000000,7272.21000000,7354.11000000,29987.97497700,1578182399999,219874240.93994811,279370,16369.38224800,120035111.72407165,0,2020-01-04 05:30:00
4,1578182400000,7354.19000000,7495.00000000,7318.00000000,7358.75000000,38331.08560400,1578268799999,284848683.78917621,329209,19455.36956400,144600094.38965074,0,2020-01-05 05:30:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,1637020800000,63606.73000000,63617.31000000,58574.07000000,60058.87000000,77455.15609000,1637107199999,4705744041.29964738,2353681,36375.32415000,2209839153.36972330,0,2021-11-16 05:30:00
686,1637107200000,60058.87000000,60840.23000000,58373.00000000,60344.87000000,46289.38491000,1637193599999,2767802055.20042280,1471305,22545.24375000,1348537946.95269170,0,2021-11-17 05:30:00
687,1637193600000,60344.86000000,60976.00000000,56474.26000000,56891.62000000,62146.99931000,1637279999999,3644824179.14485240,1960419,29742.20507000,1745202989.74238890,0,2021-11-18 05:30:00
688,1637280000000,56891.62000000,58320.00000000,55600.00000000,58052.24000000,50715.88726000,1637366399999,2896189975.91077430,1560459,24072.28575000,1374771758.23673160,0,2021-11-19 05:30:00


In [12]:
bitcoin_df.to_csv("bitcoin_daily_prices.csv", index=False)