In [27]:
import csv
import json
import requests
import math
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from collections import Counter
from scipy.stats import norm, percentileofscore

In [6]:
# Get every single token and convert to pd dataframe

url = "https://api.coingecko.com/api/v3/coins/list"

headers = {"x-cg-pro-api-key": "CG-xxx"}

response = requests.get(url, headers=headers)
response_list = json.loads(response.text)

all_coins = pd.DataFrame(response_list)
all_coins['name'] = all_coins['name'].str.lower()
#print(all_coins)

In [None]:
# Get all token categories and convert names to id

coins = dict()
updated_coins = dict()

with open('token-categories.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        coins[row[0].strip().lower()] = row[1:]

coins['bitcoin'] = coins['\ufeffbitcoin']
del coins['\ufeffbitcoin']
#print(coins)

for coin in coins:
    row = all_coins[all_coins['name'] == coin]
    if row.empty:
        row = all_coins[all_coins['id'] == coin]

    id = row.iloc[0]['id']
    updated_coins[id] = coins[coin]
    #print(id, coin)

#print(updated_coins)
#print(coins['ordi'])


In [8]:
# Get all coins from top100 list and construct dataframe of those with at least 1 appearance
# 247 coins that have had at least one appearance in the top 100

top100_df = pd.read_excel('top100.xlsx')
top100_df = top100_df.set_index('date_rebal_Q')
sums = top100_df.sum()
zerosum = sums[sums == 0].index

top100_df = top100_df.drop(zerosum, axis=1)
#print(top100_df)

In [9]:
categories = dict(set())
other = []

for coin in top100_df.columns:
    if coin in updated_coins:
        if updated_coins[coin][1] == 'Memecoin':
            if 'Memecoin' in categories:
                categories['Memecoin'].add(coin)
            else:
                categories['Memecoin'] = {coin}
        else:
            if updated_coins[coin][0] in categories:
                categories[updated_coins[coin][0]].add(coin)
            else:
                categories[updated_coins[coin][0]] = {coin}
    else:
        if 'n.a.' in categories:
            categories['n.a.'].add(coin)
        else:
            categories['n.a.'] = {coin}

#convert categories to csv
max_len = max(len(arr) for arr in categories.values())
categories_list = {key: list(value) + [None] * (max_len - len(value)) for key, value in categories.items()}
categories_df = pd.DataFrame(categories_list)
categories_df.to_csv('top100_categories.csv')

#print(categories)
#print(categories_df)

In [10]:
# Create list of all coins
top100 = set()
for category in categories.values():
    top100 = top100.union(category)

#print(top100)

In [11]:
# Create df for all and each catagory (10 categories + 1 for All)

all_coin_price = pd.read_csv('prices.csv')
all_coin_price.rename(columns={all_coin_price.columns[0] : 'Date'}, inplace=True)
all_coin_price['Date'] = pd.to_datetime(all_coin_price['Date'])
all_coin_price.set_index('Date', inplace=True)

top_coins_data = all_coin_price[all_coin_price.columns.intersection(top100)]

cat_dfs = {}
for category, values in categories.items():
    cat_dfs[category] = top_coins_data[top_coins_data.columns.intersection(values)]

cat_dfs['All'] = top_coins_data

# convert top coins to csv
top_coins_data.to_csv('top100_price_data.csv')

#print(cat_dfs['Memecoin'])
#print(top_coins_data)

In [12]:
# Get weekly price for last, median, mean

last_weekly = {}
median_weekly = {}
mean_weekly = {}

for category, df in cat_dfs.items():

    last = df[df.index >= '2019-12-29'].resample('W').last().pct_change() * 100
    median = df[df.index >= '2019-12-29'].resample('W').median().pct_change() * 100
    mean = df[df.index >= '2019-12-29'].resample('W').mean().pct_change() * 100

    last = last[last.index.year >= 2020]
    median = median[median.index.year >= 2020]
    mean = mean[mean.index.year >= 2020]

    last_weekly[category] = last
    median_weekly[category] = median
    mean_weekly[category] = mean

#print(last_weekly['Memecoin'])
#print(median_weekly['Memecoin'])
#print(mean_weekly['Memecoin'])

In [None]:
# sample to get outperforming weeks -> last
#OPTIMIZE!!!

num_top_last = []
df_memecoin = last_weekly['Memecoin']
exclude = ['Memecoin', 'All']
else_dfs = {key : val for key, val in last_weekly.items() if key not in exclude}


top_pc_2020 = top_coins_data[top_coins_data.index >= '2019-12-29'].resample('W').last().pct_change() * 100
top_pc_2020 = top_pc_2020[top_pc_2020.index.year >= 2020]

all_mean = top_pc_2020.mean(axis=1)
all_std = top_pc_2020.std(axis=1)
stats = pd.concat([all_mean, all_std], axis=1, keys=['Mean', 'StdDev'])

for i in range(10000):

    mc_nums = {}
    count = 0

    for row in df_memecoin.itertuples():

        updated_row = [n for n in list(row)[1:] if not math.isnan(n)]
        length = len(updated_row)
        stat_row = stats.loc[row[0]]
        samples = np.random.normal(loc=stat_row['Mean'], scale=stat_row['StdDev'], size=length)
        mc_nums[row[0]] = sum(samples) / length

    for date, val in mc_nums.items():

        flag = True
        for df in else_dfs.values():

            updated_row = [n for n in list(df.loc[date]) if not math.isnan(n)]
            length = len(updated_row)
            stat_row = stats.loc[date]
            samples = np.random.normal(loc=stat_row['Mean'], scale=stat_row['StdDev'], size=length)
            samp_mean = sum(samples) / length
            if samp_mean > val:
                flag = False
                break

        if flag:
            count += 1

    num_top_last.append(count)
    if i % 1000 == 0: print(i)

print(len(num_top_last))

In [None]:
# sample to get outperforming weeks -> median
#OPTIMIZE!!!

num_top_median = []
df_memecoin = median_weekly['Memecoin']
exclude = ['Memecoin', 'All']
else_dfs = {key : val for key, val in median_weekly.items() if key not in exclude}

top_pc_2020 = top_coins_data[top_coins_data.index >= '2019-12-29'].resample('W').median().pct_change() * 100
top_pc_2020 = top_pc_2020[top_pc_2020.index.year >= 2020]

all_mean = top_pc_2020.mean(axis=1)
all_std = top_pc_2020.std(axis=1)
stats = pd.concat([all_mean, all_std], axis=1, keys=['Mean', 'StdDev'])

for i in range(10000):

    mc_nums = {}
    count = 0

    for row in df_memecoin.itertuples():


        updated_row = [n for n in list(row)[1:] if not math.isnan(n)]
        length = len(updated_row)
        stat_row = stats.loc[row[0]]
        samples = np.random.normal(loc=stat_row['Mean'], scale=stat_row['StdDev'], size=length)
        samp_mean = sum(samples) / len(samples)
        mc_nums[row[0]] = samp_mean

    for date, val in mc_nums.items():

        flag = True
        for cat, df in else_dfs.items():

            updated_row = [n for n in list(df.loc[date]) if not math.isnan(n)]
            length = len(updated_row)
            stat_row = stats.loc[date]
            samples = np.random.normal(loc=stat_row['Mean'], scale=stat_row['StdDev'], size=length)
            samp_mean = sum(samples) / len(samples)
            if samp_mean > val:
                flag = False
                break

        if flag:
            count += 1

    num_top_median.append(count)

print(sum(num_top_median))

In [None]:
# sample to get outperforming weeks -> mean
#OPTIMIZE!!!

num_top_mean = []
df_memecoin = mean_weekly['Memecoin']
exclude = ['Memecoin', 'All']
else_dfs = {key : val for key, val in mean_weekly.items() if key not in exclude}

top_pc_2020 = top_coins_data[top_coins_data.index >= '2019-12-29'].resample('W').mean().pct_change() * 100
top_pc_2020 = top_pc_2020[top_pc_2020.index.year >= 2020]

all_mean = top_pc_2020.mean(axis=1)
all_std = top_pc_2020.std(axis=1)
stats = pd.concat([all_mean, all_std], axis=1, keys=['Mean', 'StdDev'])

for i in range(10000):

    mc_nums = {}
    count = 0

    for row in df_memecoin.itertuples():

        updated_row = [n for n in list(row)[1:] if not math.isnan(n)]
        length = len(updated_row)
        stat_row = stats.loc[row[0]]
        samples = np.random.normal(loc=stat_row['Mean'], scale=stat_row['StdDev'], size=length)
        mc_nums[row[0]] = sum(samples) / len(samples)

    for date, val in mc_nums.items():

        flag = True
        for df in else_dfs.values():

            updated_row = [n for n in list(df.loc[date]) if not math.isnan(n)]
            length = len(updated_row)
            stat_row = stats.loc[date]
            samples = np.random.normal(loc=stat_row['Mean'], scale=stat_row['StdDev'], size=length)
            samp_mean = sum(samples) / len(samples)
            if samp_mean > val:
                flag = False
                break

        if flag:
            count += 1

    num_top_mean.append(count)
    if i % 1000 == 0: print(i)

print(len(num_top_last))

In [None]:
#stats for distributions

length_last = len(num_top_last)
mean_last = sum(num_top_last) / length_last
#mean_median = sum(num_top_median) / length
#mean_mean = sum(num_top_mean) / length

print(length_last)
print(mean_last)

In [17]:
# distribution of counts -> last

last_counts = Counter(num_top_last)

nums = list(last_counts.keys())
frequencies = list(last_counts.values())

last_counts_histo = go.Figure(data=[go.Bar(x=nums, y=frequencies)])
last_counts_histo.update_layout(title='Frequencies of # of Weeks of Memecoin Outperformance (Last)', xaxis_title='# of Weeks of Memecoin Outperformance', yaxis_title='Frequency')
last_counts_histo.show()

In [18]:
# distribution of counts -> median

median_counts = Counter(num_top_median)

nums = list(median_counts.keys())
frequencies = list(median_counts.values())

median_counts_histo = go.Figure(data=[go.Bar(x=nums, y=frequencies)])
median_counts_histo.update_layout(title='Frequencies of # of Weeks of Memecoin Outperformance (Median)', xaxis_title='# of Weeks of Memecoin Outperformance', yaxis_title='Frequency')
median_counts_histo.show()

In [33]:
# distribution of counts -> mean

mean_counts = Counter(num_top_mean)

nums = list(mean_counts.keys())
frequencies = list(mean_counts.values())

mean_counts_histo = go.Figure(data=[go.Bar(x=nums, y=frequencies)])
mean_counts_histo.update_layout(title='Frequencies of # of Weeks of Memecoin Outperformance (Mean)', xaxis_title='# of Weeks of Memecoin Outperformance', yaxis_title='Frequency')
mean_counts_histo.show()

In [41]:
# actual # of weeks memecoins outperformed -> last, mean
cats_last_mean = []

for cat, df in last_weekly.items():
    cat_df = pd.DataFrame(df.mean(axis=1), columns=[cat])
    cats_last_mean.append(cat_df)

all_last_mean = pd.concat(cats_last_mean, axis=1)
all_last_mean = all_last_mean.drop('All', axis=1)

ct_last_mean = {}
for coin in all_last_mean.columns:
    other_columns = all_last_mean.columns[all_last_mean.columns != coin]
    df_coin_top = all_last_mean[all_last_mean[coin] > all_last_mean[other_columns].max(axis=1)]
    ct_last_mean[coin] = len(df_coin_top)

# actual # of weeks memecoins outperformed -> last, median
cats_last_median = []

for cat, df in last_weekly.items():
    cat_df = pd.DataFrame(df.median(axis=1), columns=[cat])
    cats_last_median.append(cat_df)

all_last_median = pd.concat(cats_last_median, axis=1)
all_last_median = all_last_median.drop('All', axis=1)

ct_last_median = {}
for coin in all_last_median.columns:
    other_columns = all_last_median.columns[all_last_median.columns != coin]
    df_coin_top = all_last_median[all_last_median[coin] > all_last_median[other_columns].max(axis=1)]
    ct_last_median[coin] = len(df_coin_top)

#print(ct_last_mean)
#print(ct_last_median)

In [42]:
# actual # of weeks memecoins outperformed -> median, mean
cats_median_mean = []

for cat, df in median_weekly.items():
    cat_df = pd.DataFrame(df.mean(axis=1), columns=[cat])
    cats_median_mean.append(cat_df)

all_median_mean = pd.concat(cats_median_mean, axis=1)
all_median_mean = all_median_mean.drop('All', axis=1)

ct_median_mean = {}
for coin in all_median_mean.columns:
    other_columns = all_median_mean.columns[all_median_mean.columns != coin]
    df_coin_top = all_median_mean[all_median_mean[coin] > all_median_mean[other_columns].max(axis=1)]
    ct_median_mean[coin] = len(df_coin_top)

# actual # of weeks memecoins outperformed -> median, median
cats_median_median = []

for cat, df in median_weekly.items():
    cat_df = pd.DataFrame(df.median(axis=1), columns=[cat])
    cats_median_median.append(cat_df)

all_median_median = pd.concat(cats_median_median, axis=1)
all_median_median = all_median_median.drop('All', axis=1)

ct_median_median = {}
for coin in all_median_median.columns:
    other_columns = all_median_median.columns[all_median_median.columns != coin]
    df_coin_top = all_median_median[all_median_median[coin] > all_median_median[other_columns].max(axis=1)]
    ct_median_median[coin] = len(df_coin_top)

#print(ct_median_mean)
#print(ct_median_median)

In [43]:
# actual # of weeks memecoins outperformed -> mean, mean
cats_mean_mean = []

for cat, df in mean_weekly.items():
    cat_df = pd.DataFrame(df.mean(axis=1), columns=[cat])
    cats_mean_mean.append(cat_df)

all_mean_mean = pd.concat(cats_mean_mean, axis=1)
all_mean_mean = all_mean_mean.drop('All', axis=1)

ct_mean_mean = {}
for coin in all_mean_mean.columns:
    other_columns = all_mean_mean.columns[all_mean_mean.columns != coin]
    df_coin_top = all_mean_mean[all_mean_mean[coin] > all_mean_mean[other_columns].max(axis=1)]
    ct_mean_mean[coin] = len(df_coin_top)

# actual # of weeks memecoins outperformed -> mean, median
cats_mean_median = []

for cat, df in mean_weekly.items():
    cat_df = pd.DataFrame(df.median(axis=1), columns=[cat])
    cats_mean_median.append(cat_df)

all_mean_median = pd.concat(cats_mean_median, axis=1)
all_mean_median = all_mean_median.drop('All', axis=1)

ct_mean_median = {}
for coin in all_mean_median.columns:
    other_columns = all_mean_median.columns[all_mean_median.columns != coin]
    df_coin_top = all_mean_median[all_mean_median[coin] > all_mean_median[other_columns].max(axis=1)]
    ct_mean_median[coin] = len(df_coin_top)

#print(ct_mean_mean)
#print(ct_mean_median)

In [44]:
#Pie Graph for top weeks since 2020 -> last, mean

cats_top_labels = list(ct_last_mean.keys())
cats_top_vals = list(ct_last_mean.values())
text_vals = [f'{val} weeks' for val in cats_top_vals]

trace = go.Pie(labels=cats_top_labels, values=cats_top_vals, text=text_vals)
layout = go.Layout(title='Weeks as Top Performing Sector by Category (lwa)', legend=dict(orientation='h', yanchor='bottom', y=-5), margin=dict(b=90))
top_perf_graph = go.Figure(data=[trace], layout=layout)

top_perf_graph.show()

#Pie Graph for top weeks since 2020 -> last, median

cats_top_labels = list(ct_last_median.keys())
cats_top_vals = list(ct_last_median.values())
text_vals = [f'{val} weeks' for val in cats_top_vals]

trace = go.Pie(labels=cats_top_labels, values=cats_top_vals, text=text_vals)
layout = go.Layout(title='Weeks as Top Performing Sector by Category (lwm)', legend=dict(orientation='h', yanchor='bottom', y=-5), margin=dict(b=90))
top_perf_graph = go.Figure(data=[trace], layout=layout)

top_perf_graph.show()

In [46]:
#Pie Graph for top weeks since 2020 -> median, mean

cats_top_labels = list(ct_median_mean.keys())
cats_top_vals = list(ct_median_mean.values())
text_vals = [f'{val} weeks' for val in cats_top_vals]

trace = go.Pie(labels=cats_top_labels, values=cats_top_vals, text=text_vals)
layout = go.Layout(title='Weeks as Top Performing Sector by Category (mwa)', legend=dict(orientation='h', yanchor='bottom', y=-5), margin=dict(b=90))
top_perf_graph = go.Figure(data=[trace], layout=layout)

top_perf_graph.show()

#Pie Graph for top weeks since 2020 -> median, median

cats_top_labels = list(ct_median_median.keys())
cats_top_vals = list(ct_median_median.values())
text_vals = [f'{val} weeks' for val in cats_top_vals]

trace = go.Pie(labels=cats_top_labels, values=cats_top_vals, text=text_vals)
layout = go.Layout(title='Weeks as Top Performing Sector by Category (mwm)', legend=dict(orientation='h', yanchor='bottom', y=-5), margin=dict(b=90))
top_perf_graph = go.Figure(data=[trace], layout=layout)

top_perf_graph.show()

In [47]:
#Pie Graph for top weeks since 2020 -> mean, mean

cats_top_labels = list(ct_mean_mean.keys())
cats_top_vals = list(ct_mean_mean.values())
text_vals = [f'{val} weeks' for val in cats_top_vals]

trace = go.Pie(labels=cats_top_labels, values=cats_top_vals, text=text_vals)
layout = go.Layout(title='Weeks as Top Performing Sector by Category (awa)', legend=dict(orientation='h', yanchor='bottom', y=-5), margin=dict(b=90))
top_perf_graph = go.Figure(data=[trace], layout=layout)

top_perf_graph.show()

#Pie Graph for top weeks since 2020 -> mean, median

cats_top_labels = list(ct_mean_median.keys())
cats_top_vals = list(ct_mean_median.values())
text_vals = [f'{val} weeks' for val in cats_top_vals]

trace = go.Pie(labels=cats_top_labels, values=cats_top_vals, text=text_vals)
layout = go.Layout(title='Weeks as Top Performing Sector by Category (awm)', legend=dict(orientation='h', yanchor='bottom', y=-5), margin=dict(b=90))
top_perf_graph = go.Figure(data=[trace], layout=layout)

top_perf_graph.show()

In [40]:


mean_last = sum(freq * num for num, freq in last_counts.items()) / len(num_top_last)
var_last = sum((num - mean_last)**2 * freq for num, freq in last_counts.items()) / len(num_top_last)
std_last = var_last**0.5

mean_median = sum(freq * num for num, freq in median_counts.items()) / len(num_top_median)
var_median = sum((num - mean_median)**2 * freq for num, freq in median_counts.items()) / len(num_top_median)
std_median = var_median**0.5

mean_mean = sum(freq * num for num, freq in mean_counts.items()) / len(num_top_mean)
var_mean = sum((num - mean_mean)**2 * freq for num, freq in mean_counts.items()) / len(num_top_mean)
std_mean = var_mean**0.5


# LWA
ct = ct_last_mean['Memecoin']
lwa_z = (ct - mean_last) / std_last
lwa_p = norm.cdf(lwa_z)
lwa_percentile = percentileofscore(num_top_last, ct)
print(f'lwa-- z: {lwa_z}, p: {lwa_p}, percentile: {lwa_percentile}')

# LWM
ct = ct_last_median['Memecoin']
lwm_z = (ct - mean_last) / std_last
lwm_p = norm.cdf(lwm_z)
lwm_percentile = percentileofscore(num_top_last, ct)
print(f'lwm-- z: {lwm_z}, p: {lwm_p}, percentile: {lwm_percentile}')

# MWA
ct = ct_median_mean['Memecoin']
mwa_z = (ct - mean_median) / std_median
mwa_p = norm.cdf(mwa_z)
mwa_percentile = percentileofscore(num_top_median, ct)
print(f'mwa-- z: {mwa_z}, p: {mwa_p}, percentile: {mwa_percentile}')

# MWM
ct = ct_median_median['Memecoin']
mwm_z = (ct - mean_median) / std_median
mwm_p = norm.cdf(mwm_z)
mwm_percentile = percentileofscore(num_top_median, ct)
print(f'mwm-- z: {mwm_z}, p: {mwm_p}, percentile: {mwm_percentile}')

# AWA
ct = ct_mean_mean['Memecoin']
awa_z = (ct - mean_mean) / std_mean
awa_p = norm.cdf(awa_z)
awa_percentile = percentileofscore(num_top_mean, ct)
print(f'awa-- z: {awa_z}, p: {awa_p}, percentile: {awa_percentile}')

# AWM
ct = ct_mean_median['Memecoin']
awm_z = (ct - mean_mean) / std_mean
awm_p = norm.cdf(awm_z)
awm_percentile = percentileofscore(num_top_mean, ct)
print(f'awm-- z: {awm_z}, p: {awm_p}, percentile: {awm_percentile}')


lwa-- z: 0.3462175554592354, p: 0.6354103888557403, percentile: 64.145
lwm-- z: 0.3462175554592354, p: 0.6354103888557403, percentile: 64.145
mwa-- z: 0.002449540759861955, p: 0.5009772243994104, percentile: 51.14
mwm-- z: -0.6686575167364951, p: 0.2518569867382141, percentile: 25.385
awa-- z: -0.34336354696131605, p: 0.365662489836234, percentile: 37.26
awm-- z: -0.34336354696131605, p: 0.365662489836234, percentile: 37.26
