# Get cryptocurrencies data 

In [17]:
from yahooquery import Screener
import yfinance as yf
from pathlib import Path 

### Get data

In [18]:
s = Screener()
data = s.get_screeners(['all_cryptocurrencies_us'], count=250)

dicts = data['all_cryptocurrencies_us']['quotes']
symbols = [d['symbol'] for d in dicts]

print(f'# of cryptocurrencies: {len(symbols)}')

# of cryptocurrencies: 250


In [19]:
df_raw = yf.download(tickers = symbols, period = "2y", interval = "1h")

[*********************100%***********************]  250 of 250 completed


### Data engineering

In [41]:
# Remove nan
df = df_raw.ffill(limit=10).dropna(axis=1)

# Select top 10 crypto - highest volume
tickers_hg_volume = df.Volume.mean().nlargest(n=10).index

In [42]:
# Reorder columns names
df = df.T.swaplevel(0, 1).T
df = df[tickers_hg_volume]

### Some info on the dataset

In [46]:
tickers_hg_volume

Index(['USDT-USD', 'BTC-USD', 'XRP-USD', 'TRX-USD', 'ETH-USD', 'ETC-USD',
       'BCH-USD', 'EOS-USD', 'ADA-USD', 'BNT-USD'],
      dtype='object')

In [44]:
display(df.head())
display(df.shape)

Unnamed: 0_level_0,USDT-USD,USDT-USD,USDT-USD,USDT-USD,USDT-USD,USDT-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,ADA-USD,ADA-USD,ADA-USD,ADA-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD
Unnamed: 0_level_1,Adj Close,Close,High,Low,Open,Volume,Adj Close,Close,High,Low,...,High,Low,Open,Volume,Adj Close,Close,High,Low,Open,Volume
2020-04-07 00:00:00+00:00,0.998488,0.998488,1.000555,0.990544,0.99349,0.0,7366.915527,7366.915527,7407.44873,7268.242188,...,0.036678,0.035576,0.035681,0.0,0.198634,0.198634,0.200798,0.196712,0.196712,0.0
2020-04-07 01:00:00+00:00,0.999708,0.999708,1.000824,0.994457,0.998814,681816100.0,7355.73291,7355.73291,7383.069824,7350.611816,...,0.036497,0.036083,0.036118,1867280.0,0.19731,0.19731,0.198583,0.197102,0.198583,5230.0
2020-04-07 02:00:00+00:00,1.000362,1.000362,1.004532,0.997544,1.00019,1158300000.0,7311.450195,7311.450195,7355.026367,7290.237793,...,0.036182,0.035433,0.036182,2287464.0,0.195936,0.195936,0.197029,0.195246,0.196872,49303.0
2020-04-07 03:00:00+00:00,0.999697,0.999697,1.002091,0.998613,1.000571,782573600.0,7317.592285,7317.592285,7317.592285,7280.316895,...,0.035941,0.035405,0.035751,1956104.0,0.196226,0.196226,0.196335,0.194484,0.196049,94744.0
2020-04-07 04:00:00+00:00,1.001208,1.001208,1.00313,0.998775,0.998775,0.0,7271.876953,7271.876953,7316.8125,7261.082031,...,0.036013,0.035715,0.035956,0.0,0.194846,0.194846,0.196651,0.194554,0.196163,0.0


(17200, 60)

In [47]:
display(df.index.min())
display(df.index.max())

Timestamp('2020-04-07 00:00:00+0000', tz='UTC')

Timestamp('2022-04-07 00:34:00+0000', tz='UTC')

In [48]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17200 entries, 2020-04-07 00:00:00+00:00 to 2022-04-07 00:34:00+00:00
Data columns (total 60 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   (USDT-USD, Adj Close)  17200 non-null  float64
 1   (USDT-USD, Close)      17200 non-null  float64
 2   (USDT-USD, High)       17200 non-null  float64
 3   (USDT-USD, Low)        17200 non-null  float64
 4   (USDT-USD, Open)       17200 non-null  float64
 5   (USDT-USD, Volume)     17200 non-null  float64
 6   (BTC-USD, Adj Close)   17200 non-null  float64
 7   (BTC-USD, Close)       17200 non-null  float64
 8   (BTC-USD, High)        17200 non-null  float64
 9   (BTC-USD, Low)         17200 non-null  float64
 10  (BTC-USD, Open)        17200 non-null  float64
 11  (BTC-USD, Volume)      17200 non-null  float64
 12  (XRP-USD, Adj Close)   17200 non-null  float64
 13  (XRP-USD, Close)       17200 non-null  float64
 14  (XRP-US

### Save data

In [58]:
filepath = Path('../csv/crypto_market_data.csv')

In [59]:
df.to_csv(filepath)