# Get cryptocurrencies data 

In [16]:
from yahooquery import Screener
import yfinance as yf
from pathlib import Path 

### Get data

In [2]:
s = Screener()
data = s.get_screeners(['all_cryptocurrencies_us'], count=250)

dicts = data['all_cryptocurrencies_us']['quotes']
symbols = [d['symbol'] for d in dicts]

print(f'# of cryptocurrencies: {len(symbols)}')

# of cryptocurrencies: 250


In [3]:
df_raw = yf.download(tickers = symbols, period = "2y", interval = "1h")

[*********************100%***********************]  250 of 250 completed


### Data engineering

In [4]:
# Remove nan
df = df_raw.ffill(limit=5).dropna(axis=1)

# Select top 10 crypto - highest volume
tickers_hg_volume = df.Volume.mean().nlargest(n=10).index
df = df.Close[tickers_hg_volume]

### Some info on the dataset

In [5]:
display(df.head())
display(df.shape)

Unnamed: 0,XRP-USD,TRX-USD,ETC-USD,BCH-USD,EOS-USD,ADA-USD,BNT-USD,LTC-USD,DOGE-USD,BAT-USD
2020-04-06 23:00:00+00:00,0.195506,0.013658,5.56681,255.91951,2.731257,0.035616,0.196221,44.391121,0.00198,0.160199
2020-04-07 00:00:00+00:00,0.200237,0.013799,5.660561,256.643097,2.756998,0.036091,0.198634,45.314709,0.001996,0.163322
2020-04-07 01:00:00+00:00,0.199604,0.01375,5.659618,256.708099,2.749161,0.036236,0.19731,45.576633,0.002013,0.162904
2020-04-07 02:00:00+00:00,0.197019,0.013551,5.561947,254.985825,2.712875,0.035746,0.195936,45.07613,0.001987,0.162139
2020-04-07 03:00:00+00:00,0.197864,0.013596,5.555479,256.180786,2.72286,0.035925,0.196226,45.455006,0.00198,0.164334


(17204, 10)

In [6]:
display(df.index.min())
display(df.index.max())

Timestamp('2020-04-06 23:00:00+0000', tz='UTC')

Timestamp('2022-04-06 23:01:00+0000', tz='UTC')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 17204 entries, 2020-04-06 23:00:00+00:00 to 2022-04-06 23:01:00+00:00
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   XRP-USD   17204 non-null  float64
 1   TRX-USD   17204 non-null  float64
 2   ETC-USD   17204 non-null  float64
 3   BCH-USD   17204 non-null  float64
 4   EOS-USD   17204 non-null  float64
 5   ADA-USD   17204 non-null  float64
 6   BNT-USD   17204 non-null  float64
 7   LTC-USD   17204 non-null  float64
 8   DOGE-USD  17204 non-null  float64
 9   BAT-USD   17204 non-null  float64
dtypes: float64(10)
memory usage: 1.4 MB


### Save data

In [14]:
filepath = Path('../csv/crypto_close_price.csv')

In [15]:
df.to_csv(filepath)