# Get cryptocurrencies data 

In [1]:
from yahooquery import Screener
import yfinance as yf
from pathlib import Path 

### Get data

In [2]:
s = Screener()
data = s.get_screeners(['all_cryptocurrencies_us'], count=250)

dicts = data['all_cryptocurrencies_us']['quotes']
symbols = [d['symbol'] for d in dicts]

print(f'# of cryptocurrencies: {len(symbols)}')

# of cryptocurrencies: 250


In [3]:
df_raw = yf.download(tickers = symbols, period = "2y", interval = "1h")

[*********************100%***********************]  250 of 250 completed


### Data engineering

In [4]:
# Remove nan
df = df_raw.ffill(limit=10).dropna(axis=1)

# Remove duplicated column
df = df.drop(columns=['Adj Close'])

# Select top 10 crypto - highest volume
tickers_hg_volume = df.Volume.mean().nlargest(n=10).index

In [5]:
# Reorder columns names
df = df.T.swaplevel(0, 1).T
df = df[tickers_hg_volume]

In [6]:
df.head(3)

Unnamed: 0_level_0,USDT-USD,USDT-USD,USDT-USD,USDT-USD,USDT-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,ADA-USD,ADA-USD,ADA-USD,ADA-USD,ADA-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD
Unnamed: 0_level_1,Close,High,Low,Open,Volume,Close,High,Low,Open,Volume,...,Close,High,Low,Open,Volume,Close,High,Low,Open,Volume
2020-04-07 09:00:00+00:00,1.000498,1.002541,1.000122,1.001162,0.0,7376.598145,7376.598145,7340.555176,7354.05127,0.0,...,0.036377,0.036377,0.036196,0.03626,0.0,0.198263,0.198263,0.197084,0.197758,0.0
2020-04-07 10:00:00+00:00,1.001372,1.00351,0.999029,1.000605,574005248.0,7420.056152,7427.939453,7360.825684,7377.067871,268472320.0,...,0.036766,0.036869,0.036326,0.036408,113320.0,0.199927,0.200089,0.198244,0.198244,64615.0
2020-04-07 11:00:00+00:00,1.001356,1.003241,0.999101,1.001339,35094528.0,7370.503418,7419.23877,7370.503418,7419.23877,0.0,...,0.036246,0.036799,0.036246,0.036799,0.0,0.197452,0.199911,0.197452,0.199911,0.0


### Some info on the dataset

In [7]:
tickers_hg_volume

Index(['USDT-USD', 'BTC-USD', 'XRP-USD', 'TRX-USD', 'ETH-USD', 'ETC-USD',
       'BCH-USD', 'EOS-USD', 'ADA-USD', 'BNT-USD'],
      dtype='object')

In [8]:
display(df.head())
display(df.shape)

Unnamed: 0_level_0,USDT-USD,USDT-USD,USDT-USD,USDT-USD,USDT-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,ADA-USD,ADA-USD,ADA-USD,ADA-USD,ADA-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD
Unnamed: 0_level_1,Close,High,Low,Open,Volume,Close,High,Low,Open,Volume,...,Close,High,Low,Open,Volume,Close,High,Low,Open,Volume
2020-04-07 09:00:00+00:00,1.000498,1.002541,1.000122,1.001162,0.0,7376.598145,7376.598145,7340.555176,7354.05127,0.0,...,0.036377,0.036377,0.036196,0.03626,0.0,0.198263,0.198263,0.197084,0.197758,0.0
2020-04-07 10:00:00+00:00,1.001372,1.00351,0.999029,1.000605,574005248.0,7420.056152,7427.939453,7360.825684,7377.067871,268472320.0,...,0.036766,0.036869,0.036326,0.036408,113320.0,0.199927,0.200089,0.198244,0.198244,64615.0
2020-04-07 11:00:00+00:00,1.001356,1.003241,0.999101,1.001339,35094528.0,7370.503418,7419.23877,7370.503418,7419.23877,0.0,...,0.036246,0.036799,0.036246,0.036799,0.0,0.197452,0.199911,0.197452,0.199911,0.0
2020-04-07 12:00:00+00:00,1.003253,1.006415,0.994773,1.00073,801333248.0,7349.172363,7399.154785,7345.446777,7376.987305,632246272.0,...,0.035958,0.036601,0.035958,0.036336,0.0,0.19798,0.199121,0.197759,0.19858,0.0
2020-04-07 13:00:00+00:00,1.002527,1.003258,0.999762,1.00313,0.0,7358.981445,7375.606445,7345.770996,7346.891602,0.0,...,0.036212,0.036239,0.035875,0.035934,0.0,0.198289,0.199271,0.19758,0.198028,0.0


(17205, 50)

In [9]:
display(df.index.min())
display(df.index.max())

Timestamp('2020-04-07 09:00:00+0000', tz='UTC')

Timestamp('2022-04-07 09:06:00+0000', tz='UTC')

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17205 entries, 2020-04-07 09:00:00+00:00 to 2022-04-07 09:06:00+00:00
Data columns (total 50 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   (USDT-USD, Close)   17205 non-null  float64
 1   (USDT-USD, High)    17205 non-null  float64
 2   (USDT-USD, Low)     17205 non-null  float64
 3   (USDT-USD, Open)    17205 non-null  float64
 4   (USDT-USD, Volume)  17205 non-null  float64
 5   (BTC-USD, Close)    17205 non-null  float64
 6   (BTC-USD, High)     17205 non-null  float64
 7   (BTC-USD, Low)      17205 non-null  float64
 8   (BTC-USD, Open)     17205 non-null  float64
 9   (BTC-USD, Volume)   17205 non-null  float64
 10  (XRP-USD, Close)    17205 non-null  float64
 11  (XRP-USD, High)     17205 non-null  float64
 12  (XRP-USD, Low)      17205 non-null  float64
 13  (XRP-USD, Open)     17205 non-null  float64
 14  (XRP-USD, Volume)   17205 non-null  float64
 15  (TRX-U

### Save data

In [11]:
filepath = Path('../csv/crypto_market_data.csv')

In [12]:
df.to_csv(filepath)