# Downloading the Data based on Category:

In [46]:
import os
import time
import requests
import pandas as pd

def get_coin_data(category):
    url = f'https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&category={category}&order=market_cap_desc&per_page=100&page=1&sparkline=false&locale=en'
    response = requests.get(url)
    data = response.json()
    id_list = [coin['id'] for coin in data]
    return id_list

def get_coin_chart(id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    url = f'https://www.coingecko.com/price_charts/{id}/usd/365_days.json'
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        stats_df = pd.DataFrame(data['stats'], columns=['timestamp', 'price'])
        volumes_df = pd.DataFrame(data['total_volumes'], columns=['timestamp', 'volume'])
        stats_df['timestamp'] = pd.to_datetime(stats_df['timestamp'], unit='ms')
        volumes_df['timestamp'] = pd.to_datetime(volumes_df['timestamp'], unit='ms')
        
        #remove the last row as it is the current day and the data is incomplete
        stats_df = stats_df[:-1]
        volumes_df = volumes_df[:-1]
        
        return stats_df, volumes_df
    else:
        print(f"Request for {id} failed with status code {response.status_code}")
        return None, None

def save_data(category):
    id_list = get_coin_data(category)

    for id in id_list:
        stats_df, volumes_df = get_coin_chart(id)

        if stats_df is not None and volumes_df is not None:
            os.makedirs(f'{category}/stats', exist_ok=True)
            os.makedirs(f'{category}/volume', exist_ok=True)

            stats_df.to_json(f'{category}/stats/{id}.json', orient='records', date_format='iso')
            volumes_df.to_json(f'{category}/volume/{id}.json', orient='records', date_format='iso')

        # Sleep for 2 seconds to avoid hitting rate limits
        time.sleep(2)

# Usage:
save_data('artificial-intelligence')

# Read the prices on to a Dataframe

In [44]:
import glob

def read_data(category):
    # Step 1: Get a list of all JSON files
    files = glob.glob(f'{category}/stats/*.json')

    # Initialize an empty DataFrame
    main_df = pd.DataFrame()

    # Step 2: For each file
    for file in files:
        try:
            # Read the JSON data into a DataFrame
            df = pd.read_json(file, orient='records')

            # Get the coin ID from the filename
            coin_id = os.path.splitext(os.path.basename(file))[0]

            # Set 'timestamp' as the index and resample to daily frequency
            df = df.set_index('timestamp').resample('D').mean()

            # Interpolate missing values
            df = df.interpolate()

            # Rename the 'price' column to the coin ID
            df = df.rename(columns={'price': coin_id})

            # Merge the DataFrame with the main DataFrame
            if main_df.empty:
                main_df = df
            else:
                main_df = pd.merge(main_df, df, on='timestamp', how='outer')

        except Exception as e:
            print(f"Error reading file {file}: {e}")

    return main_df

# Usage:
df = read_data('artificial-intelligence')
df.dropna(inplace=True, axis=1)
df

Unnamed: 0_level_0,presearch,mbd-financials,cere-network,forta,numeraire,singularitynet,thought,botto,dynex,insure,...,dotmoovs,selfkey,synesis-one,checkdot,ispolink,vaiot,origintrail,chirpley,iexec-rlc,trace-network-labs
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-02,0.061145,0.000127,0.008224,0.150590,21.684500,0.544794,0.008238,0.410044,0.081841,0.002631,...,0.008796,0.011760,0.007104,0.357382,0.001541,0.166831,0.369530,0.001731,1.999774,0.030577
2023-03-03,0.060709,0.000068,0.008304,0.150500,20.467756,0.526491,0.008125,0.358593,0.082031,0.002586,...,0.007853,0.010772,0.007840,0.361685,0.001500,0.155614,0.343968,0.001752,1.901262,0.029224
2023-03-04,0.056827,0.000086,0.007654,0.152640,19.595320,0.457746,0.008112,0.292387,0.066411,0.003345,...,0.007411,0.010928,0.007719,0.340440,0.001377,0.135269,0.320692,0.001604,1.818173,0.029891
2023-03-05,0.054829,0.000075,0.007421,0.149665,19.055534,0.475851,0.008109,0.282798,0.078524,0.002902,...,0.007452,0.010234,0.007933,0.297969,0.001369,0.129841,0.337777,0.001709,1.710606,0.027941
2023-03-06,0.057254,0.000130,0.007300,0.149951,18.718434,0.465326,0.007856,0.292811,0.067210,0.002938,...,0.007502,0.010960,0.007611,0.304813,0.001413,0.126973,0.335029,0.001927,1.731929,0.028525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-25,0.065658,0.000170,0.008009,0.148881,19.586091,0.445283,0.008282,0.333019,0.085691,0.002620,...,0.006432,0.011768,0.006644,0.231860,0.001305,0.128620,0.372958,0.001687,1.900972,0.027575
2023-02-26,0.061903,0.000070,0.007770,0.148984,19.094615,0.404964,0.008234,0.367873,0.079952,0.002565,...,0.006317,0.012559,0.006084,0.248369,0.001302,0.120267,0.359380,0.001587,1.893438,0.026890
2023-02-27,0.067466,0.000120,0.007919,0.149936,19.903846,0.414806,0.008206,0.378450,0.080102,0.002655,...,0.006674,0.013535,0.006008,0.262277,0.001384,0.132421,0.394189,0.001622,1.936023,0.028959
2023-02-28,0.062988,0.000130,0.008011,0.153042,19.595102,0.442910,0.008121,0.368575,0.076630,0.002631,...,0.006596,0.012552,0.007490,0.320843,0.001494,0.130229,0.363707,0.001680,1.932966,0.029154


In [45]:
import glob
import os
import pandas as pd

def read_data(category):
    # Step 1: Get a list of all JSON files
    files = glob.glob(f'{category}/stats/*.json')

    # Initialize an empty DataFrame
    main_df = pd.DataFrame()

    # Step 2: For each file
    for file in files:
        try:
            # Read the JSON data into a DataFrame
            df = pd.read_json(file, orient='records')

            # Get the coin ID from the filename
            coin_id = os.path.splitext(os.path.basename(file))[0]

            # Set 'timestamp' as the index and resample to daily frequency
            df = df.set_index('timestamp').resample('D').mean()

            # Interpolate missing values
            df = df.interpolate()

            # Rename the 'price' column to the coin ID
            df = df.rename(columns={'price': coin_id})

            # Merge the DataFrame with the main DataFrame
            if main_df.empty:
                main_df = df
            else:
                main_df = pd.merge(main_df, df, on='timestamp', how='outer')

        except Exception as e:
            print(f"Error reading file {file}: {e}")

    return main_df

# Usage:
df = read_data('artificial-intelligence')

# Drop columns with less than 200 non-NaN values
df.dropna(thresh=200, inplace=True, axis=1)

df

Unnamed: 0_level_0,echelon-prime,presearch,mbd-financials,lilai,cere-network,forta,numeraire,singularitynet,thought,botto,...,synesis-one,checkdot,ispolink,noisegpt,vaiot,origintrail,chirpley,kwai,iexec-rlc,trace-network-labs
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-03-02,3.786070,0.061145,0.000127,,0.008224,0.150590,21.684500,0.544794,0.008238,0.410044,...,0.007104,0.357382,0.001541,,0.166831,0.369530,0.001731,0.149172,1.999774,0.030577
2023-03-03,3.786070,0.060709,0.000068,,0.008304,0.150500,20.467756,0.526491,0.008125,0.358593,...,0.007840,0.361685,0.001500,,0.155614,0.343968,0.001752,0.150700,1.901262,0.029224
2023-03-04,3.101659,0.056827,0.000086,,0.007654,0.152640,19.595320,0.457746,0.008112,0.292387,...,0.007719,0.340440,0.001377,,0.135269,0.320692,0.001604,0.149175,1.818173,0.029891
2023-03-05,3.556111,0.054829,0.000075,,0.007421,0.149665,19.055534,0.475851,0.008109,0.282798,...,0.007933,0.297969,0.001369,,0.129841,0.337777,0.001709,0.139520,1.710606,0.027941
2023-03-06,3.440172,0.057254,0.000130,,0.007300,0.149951,18.718434,0.465326,0.007856,0.292811,...,0.007611,0.304813,0.001413,,0.126973,0.335029,0.001927,0.135851,1.731929,0.028525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-25,,0.065658,0.000170,,0.008009,0.148881,19.586091,0.445283,0.008282,0.333019,...,0.006644,0.231860,0.001305,,0.128620,0.372958,0.001687,0.150226,1.900972,0.027575
2023-02-26,,0.061903,0.000070,,0.007770,0.148984,19.094615,0.404964,0.008234,0.367873,...,0.006084,0.248369,0.001302,,0.120267,0.359380,0.001587,0.158258,1.893438,0.026890
2023-02-27,,0.067466,0.000120,,0.007919,0.149936,19.903846,0.414806,0.008206,0.378450,...,0.006008,0.262277,0.001384,,0.132421,0.394189,0.001622,0.162157,1.936023,0.028959
2023-02-28,,0.062988,0.000130,,0.008011,0.153042,19.595102,0.442910,0.008121,0.368575,...,0.007490,0.320843,0.001494,,0.130229,0.363707,0.001680,0.157715,1.932966,0.029154
