# Downloading the Data based on Category:

In [None]:
import os
import time
import requests
import pandas as pd

def get_coin_data(category):
    url = f'https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&category={category}&order=market_cap_desc&per_page=100&page=1&sparkline=false&locale=en'
    response = requests.get(url)
    data = response.json()
    id_list = [coin['id'] for coin in data]
    return id_list

def get_coin_chart(id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    url = f'https://www.coingecko.com/price_charts/{id}/usd/365_days.json'
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        stats_df = pd.DataFrame(data['stats'], columns=['timestamp', 'price'])
        volumes_df = pd.DataFrame(data['total_volumes'], columns=['timestamp', 'volume'])
        stats_df['timestamp'] = pd.to_datetime(stats_df['timestamp'], unit='ms')
        volumes_df['timestamp'] = pd.to_datetime(volumes_df['timestamp'], unit='ms')
        
        #remove the last row as it is the current day and the data is incomplete
        stats_df = stats_df[:-1]
        volumes_df = volumes_df[:-1]
        
        return stats_df, volumes_df
    else:
        print(f"Request for {id} failed with status code {response.status_code}")
        return None, None

def save_data(category):
    id_list = get_coin_data(category)

    for id in id_list:
        stats_df, volumes_df = get_coin_chart(id)

        if stats_df is not None and volumes_df is not None:
            os.makedirs(f'{category}/stats', exist_ok=True)
            os.makedirs(f'{category}/volume', exist_ok=True)

            stats_df.to_json(f'{category}/stats/{id}.json', orient='records', date_format='iso')
            volumes_df.to_json(f'{category}/volume/{id}.json', orient='records', date_format='iso')

        # Sleep for 2 seconds to avoid hitting rate limits
        time.sleep(2)

# Usage:
save_data('artificial-intelligence')

# Read the prices on to a Dataframe

In [None]:
import pandas as pd
import os

def read_price(file):
    try:
        # Read the JSON data into a DataFrame
        df = pd.read_json(file, orient='records')

        # Convert 'timestamp' to datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'])

        # Get the coin ID from the filename
        coin_id = os.path.splitext(os.path.basename(file))[0]

        # Rename the 'price' column to the coin ID
        df = df.rename(columns={'price': coin_id})

        return df

    except Exception as e:
        #print(f"Error reading file {file}: {e}")
        return None

def merge_all_coins(coin_ids, base_path='artificial-intelligence/stats/'):
    # Initialize an empty DataFrame
    merged_df = pd.DataFrame()

    for coin_id in coin_ids:
        # Construct the file path
        file = os.path.join(base_path, f"{coin_id}.json")

        # Read the coin data
        coin_df = read_price(file)

        # If read_price returned None, skip this coin
        if coin_df is None:
            #print(f"Skipping {coin_id} because its data could not be read")
            continue

        # If this is the first coin, assign its data to merged_df
        if merged_df.empty:
            merged_df = coin_df
        else:
            # Otherwise, merge the coin data into merged_df
            merged_df = pd.merge(merged_df, coin_df, on='timestamp', how='outer')

    # Sort the merged dataframe by 'timestamp'
    merged_df = merged_df.sort_values('timestamp')

    return merged_df


def get_merged_df(category):
    # Get the coin IDs for the category
    coin_ids = get_coin_data(category)

    # Merge all coins
    merged_df = merge_all_coins(coin_ids)

    # Filter columns that have at least 80% of the data
    merged_df.dropna(thresh=200, axis=1, inplace=True)

    # Copy the DataFrame
    df = merged_df.copy()

    # Convert 'timestamp' to datetime and set it as the index
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)

    # Drop rows with missing values
    df.dropna(axis=0, inplace=True)

    return df

df = get_merged_df('artificial-intelligence')

df

# Calculating Betas

In [None]:
import numpy as np

def calc_beta(df):
    np_array = df.values
    #Market index is first column
    m = np_array[:,0]
    beta = []
    for ind, col in enumerate(df):
        if ind > 0:
            #coin returns indexed by ind
            s = np_array[:,ind]
            #covariance between coin returns and market index
            covariance = np.cov(s,m)
            beta.append(covariance[0,1]/covariance[1,1])
    return pd.Series(beta, df.columns[1:], name='beta')

log_returns = np.log(df).diff().dropna()
beta = calc_beta(log_returns)
beta

# Convert the series to a DataFrame
beta_df = beta.to_frame()

# Sort the DataFrame by beta from highest to lowest
sorted_beta_df = beta_df.sort_values(by='beta', ascending=False)

sorted_beta_df 

In [None]:
# Find the asset with the highest beta
highest_beta_asset = beta.idxmax()
print(f"The asset with the highest beta is: {highest_beta_asset}")

# Find the asset with the lowest beta
lowest_beta_asset = beta.idxmin()
print(f"The asset with the lowest beta is: {lowest_beta_asset}")

# Complete Code

In [107]:
import os
import time
import requests
import pandas as pd
import numpy as np

def get_coin_data(category):
    url = f'https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&category={category}&order=market_cap_desc&per_page=100&page=1&sparkline=false&locale=en'
    response = requests.get(url)
    data = response.json()
    id_list = [coin['id'] for coin in data]
    return id_list

def get_coin_chart(id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    url = f'https://www.coingecko.com/price_charts/{id}/usd/365_days.json'
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        stats_df = pd.DataFrame(data['stats'], columns=['timestamp', 'price'])
        volumes_df = pd.DataFrame(data['total_volumes'], columns=['timestamp', 'volume'])
        stats_df['timestamp'] = pd.to_datetime(stats_df['timestamp'], unit='ms')
        volumes_df['timestamp'] = pd.to_datetime(volumes_df['timestamp'], unit='ms')
        
        #remove the last row as it is the current day and the data is incomplete
        stats_df = stats_df[:-1]
        volumes_df = volumes_df[:-1]
        
        return stats_df, volumes_df
    else:
        print(f"Request for {id} failed with status code {response.status_code}")
        return None, None

def save_data(category):
    id_list = get_coin_data(category)

    for id in id_list:
        stats_df, volumes_df = get_coin_chart(id)

        if stats_df is not None and volumes_df is not None:
            os.makedirs(f'{category}/stats', exist_ok=True)
            os.makedirs(f'{category}/volume', exist_ok=True)

            stats_df.to_json(f'{category}/stats/{id}.json', orient='records', date_format='iso')
            volumes_df.to_json(f'{category}/volume/{id}.json', orient='records', date_format='iso')

        # Sleep for 2 seconds to avoid hitting rate limits
        time.sleep(2)

def read_price(file):
    try:
        # Read the JSON data into a DataFrame
        df = pd.read_json(file, orient='records')

        # Convert 'timestamp' to datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'])

        # Get the coin ID from the filename
        coin_id = os.path.splitext(os.path.basename(file))[0]

        # Rename the 'price' column to the coin ID
        df = df.rename(columns={'price': coin_id})

        return df

    except Exception as e:
        #print(f"Error reading file {file}: {e}")
        return None

def merge_all_coins(coin_ids, category):
    base_path=f'{category}/stats/'
    
    # Initialize an empty DataFrame
    merged_df = pd.DataFrame()

    for coin_id in coin_ids:
        # Construct the file path
        file = os.path.join(base_path, f"{coin_id}.json")

        # Read the coin data
        coin_df = read_price(file)

        # If read_price returned None, skip this coin
        if coin_df is None:
            #print(f"Skipping {coin_id} because its data could not be read")
            continue

        # If this is the first coin, assign its data to merged_df
        if merged_df.empty:
            merged_df = coin_df
        else:
            # Otherwise, merge the coin data into merged_df
            merged_df = pd.merge(merged_df, coin_df, on='timestamp', how='outer')

    # Sort the merged dataframe by 'timestamp'
    merged_df = merged_df.sort_values('timestamp')

    return merged_df

def get_merged_df(category):
    # Get the coin IDs for the category
    coin_ids = get_coin_data(category)

    # Merge all coins
    merged_df = merge_all_coins(coin_ids, category)

    # Filter columns that have at least 80% of the data
    merged_df.dropna(thresh=200, axis=1, inplace=True)

    # Copy the DataFrame
    df = merged_df.copy()

    # Convert 'timestamp' to datetime and set it as the index
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)

    # Drop rows with missing values
    df.dropna(axis=0, inplace=True)

    return df

def calc_beta(df):
    np_array = df.values
    #Market index is first column
    m = np_array[:,0]
    beta = []
    for ind, col in enumerate(df):
        if ind > 0:
            #coin returns indexed by ind
            s = np_array[:,ind]
            #covariance between coin returns and market index
            covariance = np.cov(s,m)
            beta.append(covariance[0,1]/covariance[1,1])
    return pd.Series(beta, df.columns[1:], name='beta')

def get_sorted_beta(category):
    
    # Usage:
    save_data(category=category)
    
    # Get the merged DataFrame for the category
    df = get_merged_df(category)

    # Calculate the log returns
    log_returns = np.log(df).diff().dropna()

    # Calculate the beta values
    beta = calc_beta(log_returns)

    # Convert the series to a DataFrame
    beta_df = beta.to_frame()

    # Sort the DataFrame by beta from highest to lowest
    sorted_beta_df = beta_df.sort_values(by='beta', ascending=False)

    return sorted_beta_df

sorted_beta_df = get_sorted_beta('layer-1')
print(sorted_beta_df)

                        beta
chihuahua-token     4.322121
moonriver           3.352360
jackal-protocol     2.914854
injective-protocol  2.560886
avalanche-2         2.535628
...                      ...
humanode            0.005064
canto              -0.173394
dynex              -0.281944
kylacoin           -0.610929
electra-protocol   -1.023402

[77 rows x 1 columns]


In [108]:
sorted_beta_df

Unnamed: 0,beta
chihuahua-token,4.322121
moonriver,3.352360
jackal-protocol,2.914854
injective-protocol,2.560886
avalanche-2,2.535628
...,...
humanode,0.005064
canto,-0.173394
dynex,-0.281944
kylacoin,-0.610929


In [106]:
category = 'layer-1'

coin_ids = get_coin_data(category)

merged_df = merge_all_coins(coin_ids)

merged_df

Unnamed: 0,timestamp,aioz-network,oraichain-token,dynex,jackal-protocol,phantasma,octaspace,thought
0,2023-01-03,0.031979,1.785423,0.035718,,0.150064,,0.006676
1,2023-01-04,0.032170,1.680263,0.033208,0.126208,0.146529,,0.007653
2,2023-01-05,0.032257,1.869101,0.034101,0.126208,0.144524,,0.007361
3,2023-01-06,0.031674,1.768468,0.044987,0.125672,0.146287,,0.008186
4,2023-01-07,0.031636,2.086059,0.072792,0.128324,0.146067,,0.008449
...,...,...,...,...,...,...,...,...
360,2023-12-29,0.125577,6.763191,0.665686,0.334329,0.189080,0.724558,0.022512
361,2023-12-30,0.138144,6.484129,0.728981,0.315041,0.177755,0.722032,0.023267
362,2023-12-31,0.132326,6.622116,0.763791,0.310527,0.168377,0.723070,0.022809
363,2024-01-01,0.141277,6.656221,0.775636,0.318701,0.180936,0.691838,0.021547
