<a href="https://colab.research.google.com/github/kennyxu256/investigatingCryptoPumpAndDumps/blob/main/april2021DataExtraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/gdrive')
%cd /content/gdrive/MyDrive/Senior Year/busi 496/dataAnalysis

Drive not mounted, so nothing to flush and unmount.
Mounted at /content/gdrive
/content/gdrive/MyDrive/Senior Year/IRP/IRPAnalysis


In [None]:
!pip install ccxt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ccxt
  Downloading ccxt-3.0.54-py2.py3-none-any.whl (3.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m45.3 MB/s[0m eta [36m0:00:00[0m
Collecting aiodns>=1.1.1
  Downloading aiodns-3.0.0-py3-none-any.whl (5.0 kB)
Collecting aiohttp>=3.8
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
Collecting yarl>=1.7.2
  Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 KB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pycares>=4.0.0
  Downloading pycares-4.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (288 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [None]:
import ccxt
import pandas as pd
import time
import os

In [None]:
def create_ohlcv_df(data):
    header = ['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume']
    df = pd.DataFrame(data, columns=header)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='ms', origin='unix')  # convert timestamp to datetime
    return df

In [None]:
# Pulls the raw data from exchanges. Returns list of fetched symbols.
# -- EXAMPLE INPUT --
# exchange = 'binance'
# from_date = '2019-11-20 00:00:00'
# number_candles = 240 - number of candles
# candle_size = '12h' - candlesticks
# f_path = '../data' - CSV OHLCV file path for output
def pull_data(exchange, from_date, n_candles, c_size, f_path, skip=False):
    count = 1
    millisec = 1000
    hold = 1

    missing_symbols = []

    # -- create a folder --
    newpath = f_path + '/' + exchange + '/'
    if not os.path.exists(newpath):
        os.makedirs(newpath)

    # -- load exchange --
    exc_instance = getattr(ccxt, exchange)()
    exc_instance.load_markets()
    from_timestamp = exc_instance.parse8601(from_date)

    # -- pull ohlcv --
    for symbol in exc_instance.symbols:
        for attempt in range(2):  # 3 attempts max
            try:
                print('Pulling:', exchange, ':', symbol, '[{}/{}]'.format(count, len(exc_instance.symbols)))
                data = exc_instance.fetch_ohlcv(symbol, c_size, from_timestamp, n_candles)

                # if < n_candles returned, skip this pair
                if len(data) < n_candles and skip is True:
                    continue

                # -- create DF --
                df = create_ohlcv_df(data)

                # -- save CSV --
                symbol = symbol.replace("/", "-")
                filename = newpath + '{}_{}_[{}]-TO-[{}].csv'.format(exchange, symbol, df['Timestamp'].iloc[0],
                                                                     df['Timestamp'].iloc[-1])
                filename = filename.replace(":", ".")
                df.to_csv(filename)

            except (ccxt.ExchangeError, ccxt.AuthenticationError, ccxt.ExchangeNotAvailable, ccxt.RequestTimeout,
                    IndexError) as error:
                print('Got an error', type(error).__name__, error.args, ', retrying in', hold, 'seconds...')
                time.sleep(hold)
            else:  # if no error, proceed to next iteration
                break
        else:  # we failed all attempts
            print('All attempts failed, skipping:', symbol)
            missing_symbols.append(symbol)
            continue

        count += 1
        # -- wait for rate limit --
        time.sleep((exc_instance.rateLimit / millisec) + 5)  # rate limit +5 seconds to just to be safe

    # print out any symbols we could not obtain
    if len(missing_symbols) > 0:
        print('Unable to obtain:', missing_symbols)

    return missing_symbols

# Specify the exchanges and from date and other parameters here.
# exchanges = 'binance'
# from_date = '2019-04-20 00:00:00'
# number_candles = 240 - number of candles
# candle_size = '1h' - candlesticks
# f_path = '../data' - CSV OHLCV file path for output

In [None]:
from_date = '2021-04-01 00:00:00'
localFolder = 'freqPumps'
numberCandles = 480

In [None]:
pull_data('bittrex', from_date, numberCandles, '1h', localFolder)

Pulling: bittrex : 1ECO/BTC [1/941]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
Pulling: bittrex : 1ECO-BTC [1/941]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
All attempts failed, skipping: 1ECO-BTC
Pulling: bittrex : 1ECO/USDT [1/941]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
Pulling: bittrex : 1ECO-USDT [1/941]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
All attempts failed, skipping: 1ECO-USDT
Pulling: bittrex : 1INCH/BTC [1/941]
Pulling: bittrex : 1INCH/ETH [2/941]
Pulling: bittrex : 1INCH/USD [3/941]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
Pulling: bittrex : 1INCH-USD [3/941]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
All attempts failed, skipping: 

['1ECO-BTC',
 '1ECO-USDT',
 '1INCH-USD',
 '1PECO-BTC',
 '1PECO-USDT',
 'ACH-USD',
 'ADA-USDC',
 'AGV-USDT',
 'AIN-USDT',
 'AKT-BTC',
 'AKT-USDT',
 'AKTIO-USDT',
 'ALTA-USDT',
 'AMP-ETH',
 'AMP-USD',
 'AMP-USDT',
 'ANKR-USD',
 'ANT-USD',
 'ANTE-USDT',
 'APE-USD',
 'APE-USDT',
 'API3-USDT',
 'APXP-USDT',
 'AR-USD',
 'ARDX-BTC',
 'ARDX-ETH',
 'ARDX-USDT',
 'ARIA20-BTC',
 'ARTIC-USDT',
 'ARTII-BTC',
 'ARV-USDT',
 'ARW-USDT',
 'ASM-USDT',
 'ATRI-USDT',
 'ATTR-USDT',
 'AUDT-USDT',
 'AVAX-USD',
 'AVT-ETH',
 'AVT-USDT',
 'AXS-USD',
 'AXS-USDT',
 'B2M-USDT',
 'BAAS-USDT',
 'BADGER-USD',
 'BAX-USDT',
 'BBF-USDT',
 'BEE-USDT',
 'BERRY-USDT',
 'BFC-BTC',
 'BIOT-USDT',
 'BIST-USDT',
 'BITCI-USDT',
 'BKR-USDT',
 'BLOCK-USDT',
 'BMP-BTC',
 'BMP-USDT',
 'BNA-USDT',
 'BNT-USD',
 'BOND-ETH',
 'BOND-USDT',
 'BOSON-BTC',
 'BOSON-USDT',
 'BST-USDT',
 'BTBS-USDT',
 'BTC-USDC',
 'BTD-USDT',
 'BTRIPS-BTC',
 'BTRST-USD',
 'CADX-BTC',
 'CAIZ-USDT',
 'CAST-USDT',
 'CBANK-USDT',
 'CDEX-USDT',
 'CEDS-USDT',
 'CEL-

In [None]:
pull_data('kraken', from_date, numberCandles, '1h', localFolder)

Pulling: kraken : 1INCH/EUR [1/645]
Pulling: kraken : 1INCH/USD [2/645]
Pulling: kraken : AAVE/BTC [3/645]
Pulling: kraken : AAVE/ETH [4/645]
Pulling: kraken : AAVE/EUR [5/645]
Pulling: kraken : AAVE/GBP [6/645]
Pulling: kraken : AAVE/USD [7/645]
Pulling: kraken : ACA/EUR [8/645]
Pulling: kraken : ACA/USD [9/645]
Pulling: kraken : ACH/EUR [10/645]
Pulling: kraken : ACH/USD [11/645]
Pulling: kraken : ADA/AUD [12/645]
Pulling: kraken : ADA/BTC [13/645]
Pulling: kraken : ADA/ETH [14/645]
Pulling: kraken : ADA/EUR [15/645]
Pulling: kraken : ADA/GBP [16/645]
Pulling: kraken : ADA/USD [17/645]
Pulling: kraken : ADA/USDT [18/645]
Pulling: kraken : ADX/EUR [19/645]
Pulling: kraken : ADX/USD [20/645]
Pulling: kraken : AGLD/EUR [21/645]
Pulling: kraken : AGLD/USD [22/645]
Pulling: kraken : AIR/EUR [23/645]
Pulling: kraken : AIR/USD [24/645]
Pulling: kraken : AKT/EUR [25/645]
Pulling: kraken : AKT/USD [26/645]
Pulling: kraken : ALCX/EUR [27/645]
Pulling: kraken : ALCX/USD [28/645]
Pulling: kraken

['BTC-AED', 'ETH-AED', 'USD-AED']

In [None]:
pull_data('kucoin', from_date, numberCandles, '1h', localFolder)

Pulling: kucoin : 1EARTH/USDT [1/1297]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
Pulling: kucoin : 1EARTH-USDT [1/1297]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
All attempts failed, skipping: 1EARTH-USDT
Pulling: kucoin : 1INCH/USDT [1/1297]
Pulling: kucoin : 2CRZ/USDT [2/1297]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
Pulling: kucoin : 2CRZ-USDT [2/1297]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
All attempts failed, skipping: 2CRZ-USDT
Pulling: kucoin : AAVE/BTC [2/1297]
Pulling: kucoin : AAVE/KCS [3/1297]
Pulling: kucoin : AAVE/USDT [4/1297]
Pulling: kucoin : AAVE3L/USDT [5/1297]
Got an error IndexError ('single positional indexer is out-of-bounds',) , retrying in 1 seconds...
Pulling: kucoin : AAVE3L-USDT [5/1297]
Got an error IndexError ('single positional 

RateLimitExceeded: ignored

In [None]:
pull_data('lbank', from_date, numberCandles, '1h', localFolder)

Pulling: lbank : 1INCH/USDT [1/957]
Pulling: lbank : 1INCH3L/USDT [2/957]
Pulling: lbank : 1INCH3S/USDT [3/957]
Pulling: lbank : 1INCH5L/USDT [4/957]
Pulling: lbank : 1INCH5S/USDT [5/957]
Pulling: lbank : 3ULL/USDT [6/957]
Pulling: lbank : 4JNET/USDT [7/957]
Pulling: lbank : 7PXS/USDT [8/957]
Pulling: lbank : A1A/USDT [9/957]
Pulling: lbank : AAVE/USDT [10/957]
Pulling: lbank : AAVE3L/USDT [11/957]
Pulling: lbank : AAVE3S/USDT [12/957]
Pulling: lbank : ABEY/USDT [13/957]
Pulling: lbank : ACA/USDT [14/957]
Pulling: lbank : ACS/USDT [15/957]
Pulling: lbank : ADA/USDT [16/957]
Pulling: lbank : ADA3L/USDT [17/957]
Pulling: lbank : ADA3S/USDT [18/957]
Pulling: lbank : AGIX/USDT [19/957]
Pulling: lbank : AGIX3L/USDT [20/957]
Pulling: lbank : AGIX3S/USDT [21/957]
Pulling: lbank : AGLD/USDT [22/957]
Pulling: lbank : AGLD3L/USDT [23/957]
Pulling: lbank : AGLD3S/USDT [24/957]
Pulling: lbank : ALGO/USDT [25/957]
Pulling: lbank : ALGO3L/USDT [26/957]
Pulling: lbank : ALGO3S/USDT [27/957]
Pulling: 

[]