# **Cryptocurrency Market Analysis**
This notebook analyzes historical price data for major cryptocurrencies (BTC, ETH, BNB, SOL) using data from Binance exchange.

In [1]:
#%% Import Libraries

import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# The "ccxt" library facilitates connection to various exchanges, including Binance
import ccxt

In [2]:
#%% Creating DataFrame

crypto = pd.DataFrame({
    'symbols': ["BTC/USDT", "ETH/USDT", "BNB/USDT", "SOL/USDT"]
})

In [3]:
#%% Extracting Data from Binance

# Binance API configuration
binance = ccxt.binance()

# List of crypto to fetch data for
symbols = crypto['symbols'].tolist()  # Add other pairs as necessary
timeframe = '1M'  # Timeframe (1 month)

'''
Available timeframes:
'1m' - 1 minute
'1h' - 1 hour
'1d' - 1 day
'1w' - 1 week
'1M' - 1 month
'''

limit = 120  # Number of candles

# Create a list to store the data
all_data = []

# Loop to fetch data for each asset
for symbol in symbols:
    try:
        # Fetch OHLCV data from Binance
        OHLCV = binance.fetch_ohlcv(symbol, timeframe, limit=limit)
        
        print(f"\nSymbol: {symbol}")
        print(f"Number of candles retrieved: {len(OHLCV)}")
        print(f"Date range: {pd.to_datetime(OHLCV[0][0], unit='ms')} to {pd.to_datetime(OHLCV[-1][0], unit='ms')}")
        
        # Process all candles
        for candle in OHLCV:
            data = {
                'symbols': symbol,
                'timestamp': pd.to_datetime(candle[0], unit='ms'),
                'open': candle[1],
                'high': candle[2],
                'low': candle[3],
                'close': candle[4],
                'volume': candle[5]
            }
            all_data.append(data)
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

# Convert to a DataFrame
crypto_monthly_price = pd.DataFrame(all_data)

# Calculating Average Price
crypto_monthly_price['average_price'] = round((crypto_monthly_price['high'] + crypto_monthly_price['low']) / 2, 2)

# Group by month and calculate the average price
crypto_monthly_price['timestamp'] = pd.to_datetime(crypto_monthly_price['timestamp']).dt.to_period('M')

print(crypto_monthly_price.info())


Symbol: BTC/USDT
Number of candles retrieved: 93
Date range: 2017-08-01 00:00:00 to 2025-04-01 00:00:00

Symbol: ETH/USDT
Number of candles retrieved: 93
Date range: 2017-08-01 00:00:00 to 2025-04-01 00:00:00

Symbol: BNB/USDT
Number of candles retrieved: 90
Date range: 2017-11-01 00:00:00 to 2025-04-01 00:00:00

Symbol: SOL/USDT
Number of candles retrieved: 57
Date range: 2020-08-01 00:00:00 to 2025-04-01 00:00:00
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype    
---  ------         --------------  -----    
 0   symbols        333 non-null    object   
 1   timestamp      333 non-null    period[M]
 2   open           333 non-null    float64  
 3   high           333 non-null    float64  
 4   low            333 non-null    float64  
 5   close          333 non-null    float64  
 6   volume         333 non-null    float64  
 7   average_price  333 non-null    float64  
dtypes: float64(

In [4]:
#%% Grouping and Sorting Data

# Group by 'symbols' and 'timestamp' to get the average price for each month
crypto_monthly_avg = crypto_monthly_price[['symbols', 'timestamp', 'average_price']]

# Convert symbols to Categorical with specific order from the original crypto DataFrame
crypto_monthly_avg['symbols'] = pd.Categorical(
    crypto_monthly_avg['symbols'],
    categories=symbols,
    ordered=True
)

# Sort first by symbols (to group all records for each symbol together)
# Then by timestamp (descending) within each symbol group
crypto_monthly_avg = crypto_monthly_avg.sort_values(
    ['symbols', 'timestamp'], 
    ascending=[True, False]
).reset_index(drop=True)

print(crypto_monthly_avg.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype    
---  ------         --------------  -----    
 0   symbols        333 non-null    category 
 1   timestamp      333 non-null    period[M]
 2   average_price  333 non-null    float64  
dtypes: category(1), float64(1), period[M](1)
memory usage: 5.9 KB
None


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crypto_monthly_avg['symbols'] = pd.Categorical(


In [None]:
#%% Creating Interactive Price Growth Graph

# Convert Period to datetime for plotting
crypto_monthly_avg['timestamp'] = crypto_monthly_avg['timestamp'].astype(str).apply(lambda x: pd.to_datetime(x))

# Create the figure
fig = px.line(
    crypto_monthly_avg,
    x='timestamp',
    y='average_price',
    color='symbols',
    title='Cryptocurrency Price Growth (Monthly Average)',
    labels={
        'timestamp': 'Date',
        'average_price': 'Price (USD)',
        'symbols': 'Cryptocurrency'
    }
)

# Customize the layout
fig.update_layout(
    template='plotly_dark',
    hovermode='x unified',
    font_family="IBM Plex Sans",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    ),
    xaxis_title="Date",
    yaxis_title="Price (USD)",
)

# Add range slider
fig.update_xaxes(rangeslider_visible=True)

# Show the plot
fig.show()

# **Cryptocurrency Relative Price Performance Analysis**
Each cryptocurrency price was normalized to 100% from July 2020 to enable direct comparison of growth rates.

In [6]:
#%% Creating Normalized Price Data

# Create a copy of the DataFrame to avoid modifying the original
normalized_df = crypto_monthly_avg.copy()

# Convert timestamp to datetime if not already
normalized_df['timestamp'] = pd.to_datetime(normalized_df['timestamp'])

# Filter data starting from July 2020
start_date = pd.to_datetime('2020-07-01')
normalized_df = normalized_df[normalized_df['timestamp'] >= start_date]

# Sort by timestamp ascending to get correct base prices
normalized_df = normalized_df.sort_values(['symbols', 'timestamp'], ascending=[True, True])

# Get the first value for each symbol (July 2020)
base_values = normalized_df.groupby('symbols').first()['average_price']

# Calculate normalized prices (July 2020 = 100%)
normalized_df['normalized_price'] = normalized_df.apply(
    lambda x: (x['average_price'] / base_values[x['symbols']]) * 100,
    axis=1
)

# Verify the first month is 100% for each symbol
print("\nFirst month values (should be 100%):")
print(normalized_df.groupby('symbols')['normalized_price'].first())


First month values (should be 100%):
symbols
BTC/USDT    100.0
ETH/USDT    100.0
BNB/USDT    100.0
SOL/USDT    100.0
Name: normalized_price, dtype: float64








In [7]:
#%% Create the normalized price evolution graph

# Create the figure
fig = px.line(
    normalized_df,
    x='timestamp',
    y='normalized_price',
    color='symbols',
    title='Cryptocurrency Relative Price Performance (Normalized to July 2020)',
    labels={
        'timestamp': 'Date',
        'normalized_price': 'Price Change (%)',
        'symbols': 'Cryptocurrency'
    }
)

# Customize the layout
fig.update_layout(
    template='plotly_dark',
    hovermode='x unified',
    font_family="IBM Plex Sans",
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    ),
    xaxis_title="Date",
    yaxis_title="Price Change (%)",
    yaxis=dict(tickformat=',d', ticksuffix='%')  # Format y-axis as percentage
)

# Add range slider
fig.update_xaxes(rangeslider_visible=True)

# Show the plot
fig.show()

In [8]:
#%% Separating Symbols into Individual DataFrames
# Create individual DataFrames for each symbol and sort each DataFrame by timestamp in descending order
'''
btc_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'BTC/USDT'].reset_index(drop=True)
btc_data = btc_data.sort_values('timestamp', ascending=False).reset_index(drop=True)

eth_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'ETH/USDT'].reset_index(drop=True)
eth_data = eth_data.sort_values('timestamp', ascending=False).reset_index(drop=True)

bnb_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'BNB/USDT'].reset_index(drop=True)
bnb_data = bnb_data.sort_values('timestamp', ascending=False).reset_index(drop=True)

sol_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'SOL/USDT'].reset_index(drop=True)
sol_data = sol_data.sort_values('timestamp', ascending=False).reset_index(drop=True)

'''

"\nbtc_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'BTC/USDT'].reset_index(drop=True)\nbtc_data = btc_data.sort_values('timestamp', ascending=False).reset_index(drop=True)\n\neth_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'ETH/USDT'].reset_index(drop=True)\neth_data = eth_data.sort_values('timestamp', ascending=False).reset_index(drop=True)\n\nbnb_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'BNB/USDT'].reset_index(drop=True)\nbnb_data = bnb_data.sort_values('timestamp', ascending=False).reset_index(drop=True)\n\nsol_data = crypto_monthly_avg[crypto_monthly_price['symbols'] == 'SOL/USDT'].reset_index(drop=True)\nsol_data = sol_data.sort_values('timestamp', ascending=False).reset_index(drop=True)\n\n"