In [73]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import time
from pycoingecko import CoinGeckoAPI

In [25]:
class Analyzer:
    def __init__(self):
        self.cg = CoinGeckoAPI()
        self.data = None
        self.signals = None
        self.returns = None

    def collectData(self, nCoins = 100):
        print(f"Collecting data for top {nCoins} coins from CoinGecko API")
        markets = self.cg

In [26]:
analyzer = Analyzer()

In [27]:
# following coingecko tutorial here to grab and get the data we need
# open high low close chart, in list form
ohlc = analyzer.cg.get_coin_ohlc_by_id(id = "ethereum", vs_currency = "usd", days = "30")
print(ohlc)

[[1755979200000, 4749.13, 4757.02, 4730.2, 4756.72], [1755993600000, 4753.32, 4793.47, 4738.27, 4773.88], [1756008000000, 4782.43, 4806.26, 4759.44, 4791.16], [1756022400000, 4795.08, 4795.08, 4759.53, 4764.41], [1756036800000, 4754.03, 4781.01, 4725.52, 4748.14], [1756051200000, 4746.3, 4822.05, 4741.02, 4796.88], [1756065600000, 4797.24, 4946.05, 4774.72, 4774.72], [1756080000000, 4806.63, 4815.73, 4738.24, 4778.11], [1756094400000, 4780.26, 4793.26, 4686.34, 4746.77], [1756108800000, 4731.37, 4733.4, 4588.09, 4588.09], [1756123200000, 4591.03, 4608.27, 4538.58, 4593.88], [1756137600000, 4601.74, 4667.76, 4594.25, 4615.85], [1756152000000, 4601.95, 4636.68, 4462.14, 4462.8], [1756166400000, 4435.27, 4438.26, 4352.68, 4381.63], [1756180800000, 4375.38, 4446.52, 4341.54, 4403.14], [1756195200000, 4407.08, 4445.89, 4399.73, 4427.03], [1756209600000, 4430.91, 4448.89, 4408.77, 4417.25], [1756224000000, 4423.17, 4563.51, 4411.96, 4519.53], [1756238400000, 4520.35, 4591.03, 4511.02, 4591.0

In [28]:
## TURN OHLC LIST INTO READABLE DATAFRAME
# convert list to dataframe
ethDF = pd.DataFrame(ohlc)

# columns of an ohlc chart
ethDF.columns = ["date", "open", "high", "low", "close"]

# convert date column to datetime
ethDF["date"] = pd.to_datetime(ethDF["date"], unit = "ms")

# make date the rows
ethDF.set_index("date", inplace=True)

# show
display(ethDF)

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-08-23 20:00:00,4749.13,4757.02,4730.20,4756.72
2025-08-24 00:00:00,4753.32,4793.47,4738.27,4773.88
2025-08-24 04:00:00,4782.43,4806.26,4759.44,4791.16
2025-08-24 08:00:00,4795.08,4795.08,4759.53,4764.41
2025-08-24 12:00:00,4754.03,4781.01,4725.52,4748.14
...,...,...,...,...
2025-09-22 00:00:00,4494.25,4494.25,4447.14,4452.87
2025-09-22 04:00:00,4445.96,4455.20,4290.60,4308.37
2025-09-22 08:00:00,4305.50,4307.17,4145.53,4200.64
2025-09-22 12:00:00,4193.07,4206.57,4151.84,4181.75


In [None]:
# getting multiple currencies data at once
parameters = {
    "vs_currency": "usd",
    "order": "market_cap_desc",
    "per_page": 100,
    "page": 1,
    "sparkline": False,
}

coinMarketData = analyzer.cg.get_coins_markets(**parameters)
print(coinMarketData)

[{'id': 'bitcoin', 'symbol': 'btc', 'name': 'Bitcoin', 'image': 'https://coin-images.coingecko.com/coins/images/1/large/bitcoin.png?1696501400', 'current_price': 112809, 'market_cap': 2245839384059, 'market_cap_rank': 1, 'fully_diluted_valuation': 2245878161214, 'total_volume': 63041857722, 'high_24h': 115617, 'low_24h': 112293, 'price_change_24h': -2773.6550090310047, 'price_change_percentage_24h': -2.39973, 'market_cap_change_24h': -56714337121.79004, 'market_cap_change_percentage_24h': -2.46311, 'circulating_supply': 19923296.0, 'total_supply': 19923640.0, 'max_supply': 21000000.0, 'ath': 124128, 'ath_change_percentage': -9.21484, 'ath_date': '2025-08-14T00:37:02.582Z', 'atl': 67.81, 'atl_change_percentage': 166087.14198, 'atl_date': '2013-07-06T00:00:00.000Z', 'roi': None, 'last_updated': '2025-09-22T18:36:00.547Z'}, {'id': 'ethereum', 'symbol': 'eth', 'name': 'Ethereum', 'image': 'https://coin-images.coingecko.com/coins/images/279/large/ethereum.png?1696501628', 'current_price': 4

In [37]:
# convert this data to readable dataframe
marketDF = pd.DataFrame(coinMarketData)
display(marketDF)

Unnamed: 0,id,symbol,name,image,current_price,market_cap,market_cap_rank,fully_diluted_valuation,total_volume,high_24h,...,total_supply,max_supply,ath,ath_change_percentage,ath_date,atl,atl_change_percentage,atl_date,roi,last_updated
0,bitcoin,btc,Bitcoin,https://coin-images.coingecko.com/coins/images...,112809.000000,2245839384059,1,2245878161214,6.304186e+10,115617.000000,...,1.992364e+07,2.100000e+07,124128.000000,-9.21484,2025-08-14T00:37:02.582Z,67.810000,1.660871e+05,2013-07-06T00:00:00.000Z,,2025-09-22T18:36:00.547Z
1,ethereum,eth,Ethereum,https://coin-images.coingecko.com/coins/images...,4177.710000,503849444713,2,503849444713,5.037707e+10,4497.460000,...,1.207038e+08,,4946.050000,-15.55727,2025-08-24T19:21:03.333Z,0.432979,9.645149e+05,2015-10-20T00:00:00.000Z,"{'times': 48.499597958607175, 'currency': 'btc...",2025-09-22T18:36:00.572Z
2,tether,usdt,Tether,https://coin-images.coingecko.com/coins/images...,1.001000,172133538435,3,172133538435,1.390064e+11,1.001000,...,1.720180e+11,,1.320000,-24.37050,2018-07-24T00:00:00.000Z,0.572521,7.477967e+01,2015-03-02T00:00:00.000Z,,2025-09-22T18:35:59.043Z
3,ripple,xrp,XRP,https://coin-images.coingecko.com/coins/images...,2.850000,170493069910,4,285173514173,8.975972e+09,2.990000,...,9.998580e+10,1.000000e+11,3.650000,-21.59664,2025-07-18T03:40:53.808Z,0.002686,1.063283e+05,2014-05-22T00:00:00.000Z,,2025-09-22T18:35:57.464Z
4,binancecoin,bnb,BNB,https://coin-images.coingecko.com/coins/images...,991.580000,137987141376,5,137987141376,3.556660e+09,1052.970000,...,1.391863e+08,2.000000e+08,1079.070000,-8.01378,2025-09-21T02:07:58.820Z,0.039818,2.492741e+06,2017-10-19T00:00:00.000Z,,2025-09-22T18:36:03.617Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,jupiter-exchange-solana,jup,Jupiter,https://coin-images.coingecko.com/coins/images...,0.468409,1456570924,96,3276154875,6.721754e+07,0.527726,...,6.999012e+09,1.000000e+10,2.000000,-76.57086,2024-01-31T15:02:47.304Z,0.306358,5.295365e+01,2025-04-07T06:56:22.719Z,,2025-09-22T18:35:55.904Z
96,paypal-usd,pyusd,PayPal USD,https://coin-images.coingecko.com/coins/images...,1.000000,1410213008,97,1410460190,8.282196e+07,1.001000,...,1.409772e+09,,1.021000,-2.00248,2023-10-23T22:44:57.056Z,0.959426,4.259960e+00,2024-12-05T22:31:13.430Z,,2025-09-22T18:35:56.025Z
97,tether-gold,xaut,Tether Gold,https://coin-images.coingecko.com/coins/images...,3754.570000,1410097350,98,1410097350,9.147294e+07,3755.350000,...,3.755722e+05,,3755.350000,0.00618,2025-09-22T18:18:06.910Z,1447.840000,1.593922e+02,2020-03-19T13:45:41.821Z,,2025-09-22T18:35:58.408Z
98,polygon-bridged-usdt-polygon,usdt,Polygon Bridged USDT (Polygon),https://coin-images.coingecko.com/coins/images...,1.000000,1406018691,99,1406018691,2.889202e+07,1.001000,...,1.405376e+09,,1.027000,-2.56449,2024-04-14T06:51:56.926Z,0.945933,5.763480e+00,2024-12-05T22:30:30.789Z,,2025-09-22T18:35:56.514Z


In [79]:
def getDailyPriceData(coinId, days = 180):
    try:
        priceData = analyzer.cg.get_coin_market_chart_by_id(
            id = coinId,
            vs_currency = "usd",
            days = days,
            interval = "daily"
        )
    except Exception as e:
        print(f"Error: {e}")

    priceDataDF = pd.DataFrame(priceData)
    dates = [x[0] for x in priceDataDF["prices"]]
    dates = pd.to_datetime(dates, unit="ms")

    for i in range(len(priceDataDF)):
        priceDataDF.loc[i, "prices"] = priceDataDF.loc[i, "prices"][1]
        priceDataDF.loc[i, "market_caps"] = priceDataDF.loc[i, "market_caps"][1]
        priceDataDF.loc[i, "total_volumes"] = priceDataDF.loc[i, "total_volumes"][1]
    
    priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100

    priceDataDF["date"] = dates
    priceDataDF.set_index("date", inplace=True)
    
    return priceDataDF


In [80]:
bitcoinSampleData = getDailyPriceData(coinId = "bitcoin",days = 180)
display(bitcoinSampleData)
ethSampleData = getDailyPriceData(coinId = "ethereum", days = 180)
display(ethSampleData)

  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100


Unnamed: 0_level_0,prices,market_caps,total_volumes,daily_returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-03-27 00:00:00,86960.855549,1724854479045.672363,25522877558.700558,
2025-03-28 00:00:00,87227.27158,1729729987159.482178,24620445368.489468,0.306363
2025-03-29 00:00:00,84359.469155,1673182800974.10376,31674486232.097313,-3.287736
2025-03-30 00:00:00,82679.172554,1640665503855.143799,16837439419.924389,-1.991829
2025-03-31 00:00:00,82356.381112,1633996304187.259277,13428975108.070734,-0.390414
...,...,...,...,...
2025-09-19 00:00:00,117145.49509,2333705338232.097168,43642467075.258377,0.592111
2025-09-20 00:00:00,115655.810464,2303469041668.799805,34296721252.330009,-1.271653
2025-09-21 00:00:00,115715.515368,2305371926506.301758,18774953731.290394,0.051623
2025-09-22 00:00:00,115304.479994,2297332085237.64502,18659113982.495464,-0.355212


  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100


Unnamed: 0_level_0,prices,market_caps,total_volumes,daily_returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-03-27 00:00:00,2009.883543,242347645799.662201,14024859854.906677,
2025-03-28 00:00:00,2003.303424,241580699763.479614,12136188059.422693,-0.327388
2025-03-29 00:00:00,1896.91736,228784244898.248444,18419487648.397388,-5.310532
2025-03-30 00:00:00,1829.274521,220262156018.885254,12924117064.878807,-3.565935
2025-03-31 00:00:00,1805.337878,217784813538.976074,10056531345.160347,-1.308532
...,...,...,...,...
2025-09-19 00:00:00,4590.636737,554210013387.613647,29381752919.130959,-0.024654
2025-09-20 00:00:00,4470.479474,539242810184.069458,28156319659.267269,-2.617442
2025-09-21 00:00:00,4481.799472,540941822331.274048,14314030981.454111,0.253217
2025-09-22 00:00:00,4452.87113,537406899904.397278,15910684640.581882,-0.645463


In [81]:
def getCoinList(top_n=20):
    try:
        coins_data = analyzer.cg.get_coins_markets(
            vs_currency='usd',
            order='market_cap_desc', 
            per_page=top_n,
            page=1
        )
        
        coin_list = []
        for coin in coins_data:
            coin_list.append({
                'id': coin['id'],
                'symbol': coin['symbol'], 
                'name': coin['name'],
                'market_cap_rank': coin['market_cap_rank'],
                'current_price': coin['current_price'],
                'market_cap': coin['market_cap']
            })
            
        return pd.DataFrame(coin_list)
        
    except Exception as e:
        return None
    

def collectAllDailyData(coin_df, days=30): 
    all_price_data = []
    
    for idx, coin in coin_df.iterrows():
        coin_id = coin['id']
        
        price_data = getDailyPriceData(coin_id, days)
        
        if price_data is not None:
            price_data['symbol'] = coin['symbol']
            price_data['name'] = coin['name']
            price_data['market_cap_rank'] = coin['market_cap_rank']
            
            all_price_data.append(price_data)
        
        time.sleep(0.5)  # 500ms delay between requests
    
    if all_price_data:
        combinedDf = pd.concat(all_price_data, ignore_index=True)
        return combinedDf
    else:
        return None

In [83]:
coin_universe = getCoinList(20)
combinedDf = collectAllDailyData(coin_universe, days = 30)

  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = priceDataDF["prices"].pct_change() * 100
  priceDataDF["daily_returns"] = price

In [84]:
pd.to_pickle(combinedDf, "./combinedDf_20.pkl")