# **Fetch Data From Exchanges**
This notebook fetches data from major exchanges.

In [6]:
#%% Import Libraries
import pandas as pd
import numpy as np

# The "ccxt" library facilitates connection to various exchanges, including Binance
import ccxt

# The "pycoingecko" library is used to fetch cryptocurrency data from the CoinGecko API
from pycoingecko import CoinGeckoAPI

In [7]:
#%% Extracting Data from Binance

# List of symbols to fetch data for
symbols = ['BTC/USDT','ETH/USDT','BNB/USDT','SOL/USDT','XRP/USDT','AVAX/USDT']

# Binance API configuration
binance = ccxt.binance()

# List of crypto to fetch data for
timeframe = '1w'  # Timeframe (1 week)

'''
Available timeframes:
'1m' - 1 minute
'1h' - 1 hour
'1d' - 1 day
'1w' - 1 week
'1M' - 1 week
'''

limit = 410  # Number of candles

# Create a list to store the data
all_data = []

# Loop to fetch data for each asset
for symbol in symbols:
    try:
        # Fetch OHLCV data from Binance
        OHLCV = binance.fetch_ohlcv(symbol, timeframe, limit=limit)
        
        print(f"\nSymbol: {symbol}")
        print(f"Number of candles retrieved: {len(OHLCV)}")
        print(f"Date range: {pd.to_datetime(OHLCV[0][0], unit='ms')} to {pd.to_datetime(OHLCV[-1][0], unit='ms')}")
        
        # Process all candles
        for candle in OHLCV:
            data = {
                'symbols': symbol.split('/')[0],
                'timestamp': pd.to_datetime(candle[0], unit='ms'),
                'open': candle[1],
                'high': candle[2],
                'low': candle[3],
                'close': candle[4],
                'volume': candle[5]
            }
            all_data.append(data)
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

# Convert to a DataFrame
ccxt_df = pd.DataFrame(all_data)

# Calculating Average Price
ccxt_df['average_price'] = round((ccxt_df['high'] + ccxt_df['low']) / 2, 2)

# Group by week and calculate the average price
ccxt_df['timestamp'] = pd.to_datetime(ccxt_df['timestamp']).dt.to_period('W')

print(ccxt_df.info())


Symbol: BTC/USDT
Number of candles retrieved: 404
Date range: 2017-08-14 00:00:00 to 2025-05-05 00:00:00

Symbol: ETH/USDT
Number of candles retrieved: 404
Date range: 2017-08-14 00:00:00 to 2025-05-05 00:00:00

Symbol: BNB/USDT
Number of candles retrieved: 392
Date range: 2017-11-06 00:00:00 to 2025-05-05 00:00:00

Symbol: SOL/USDT
Number of candles retrieved: 248
Date range: 2020-08-10 00:00:00 to 2025-05-05 00:00:00

Symbol: XRP/USDT
Number of candles retrieved: 367
Date range: 2018-04-30 00:00:00 to 2025-05-05 00:00:00

Symbol: AVAX/USDT
Number of candles retrieved: 242
Date range: 2020-09-21 00:00:00 to 2025-05-05 00:00:00
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2057 entries, 0 to 2056
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype        
---  ------         --------------  -----        
 0   symbols        2057 non-null   object       
 1   timestamp      2057 non-null   period[W-SUN]
 2   open           2057 non-null   float64      
 3   hi

In [8]:
#%% Grouping and Sorting Data

# Group by 'symbols' and 'timestamp' to get the average price for each week
avg_df = ccxt_df[['symbols', 'timestamp', 'average_price']]

# Convert symbols to Categorical with specific order from the original crypto DataFrame
avg_df['symbols'] = pd.Categorical(avg_df['symbols'])

# Sort first by symbols (to group all records for each symbol together)
# Then by timestamp (descending) within each symbol group
avg_df = avg_df.sort_values(
    ['symbols', 'timestamp'], 
    ascending=[True, False]
).reset_index(drop=True)

print(avg_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2057 entries, 0 to 2056
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype        
---  ------         --------------  -----        
 0   symbols        2057 non-null   category     
 1   timestamp      2057 non-null   period[W-SUN]
 2   average_price  2057 non-null   float64      
dtypes: category(1), float64(1), period[W-SUN](1)
memory usage: 34.5 KB
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  avg_df['symbols'] = pd.Categorical(avg_df['symbols'])


In [9]:
#%% Save the DataFrame to a CSV file

avg_df = pd.DataFrame.to_csv(avg_df, 'files/avg_df.csv', index=False)