In [162]:
import csv
import json
import requests
import math
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.formula.api as sm
from pandas.tseries.offsets import MonthEnd
from collections import Counter
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import pearsonr

In [163]:
# Get every single token and convert to pd dataframe

url = "https://api.coingecko.com/api/v3/coins/list"

headers = {"x-cg-pro-api-key": "CG-UHjW13FD2AW8Wxd4DiaJKJ1H"}

response = requests.get(url, headers=headers)
response_list = json.loads(response.text)

all_coins = pd.DataFrame(response_list)
all_coins['name'] = all_coins['name'].str.lower()
#print(all_coins)

In [164]:
# Get all token categories and convert names to id

coins = dict()
updated_coins = dict()

with open('token-categories.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        coins[row[0].strip().lower()] = row[1:]

coins['bitcoin'] = coins['\ufeffbitcoin']
del coins['\ufeffbitcoin']
#print(coins)

for coin in coins:
    row = all_coins[all_coins['name'] == coin]
    if row.empty:
        row = all_coins[all_coins['id'] == coin]

    id = row.iloc[0]['id']
    updated_coins[id] = coins[coin]
    #print(id, coin)

#print(updated_coins)
#print(coins['ordi'])


In [165]:
# Get all coins from top100 list and construct dataframe of those with at least 1 appearance
# 247 coins that have had at least one appearance in the top 100

top100_df = pd.read_excel('top100.xlsx')
top100_df = top100_df.set_index('date_rebal_Q')
sums = top100_df.sum()
zerosum = sums[sums == 0].index

top100_df = top100_df.drop(zerosum, axis=1)
#print(top100_df)

In [166]:
categories = dict(set())
other = []

for coin in top100_df.columns:
    if coin in updated_coins:
        if updated_coins[coin][1] == 'Memecoin':
            if 'Memecoin' in categories:
                categories['Memecoin'].add(coin)
            else:
                categories['Memecoin'] = {coin}
        else:
            if updated_coins[coin][0] in categories:
                categories[updated_coins[coin][0]].add(coin)
            else:
                categories[updated_coins[coin][0]] = {coin}
    else:
        if 'n.a.' in categories:
            categories['n.a.'].add(coin)
        else:
            categories['n.a.'] = {coin}

#convert categories to csv
max_len = max(len(arr) for arr in categories.values())
categories_list = {key: list(value) + [None] * (max_len - len(value)) for key, value in categories.items()}
categories_df = pd.DataFrame(categories_list)
categories_df.to_csv('top100_categories.csv')

#print(categories)
#print(categories_df)

In [167]:
# Create list of all coins
top100 = set()
for category in categories.values():
    top100 = top100.union(category)

#print(top100)

In [168]:
# Create df for all and each catagory (10 categories + 1 for All)

all_coin_price = pd.read_csv('prices.csv')
all_coin_price.rename(columns={all_coin_price.columns[0] : 'Date'}, inplace=True)
all_coin_price['Date'] = pd.to_datetime(all_coin_price['Date'])
all_coin_price.set_index('Date', inplace=True)

top_coins_data = all_coin_price[all_coin_price.columns.intersection(top100)]

cat_dfs = {}
for category, values in categories.items():
    cat_dfs[category] = top_coins_data[top_coins_data.columns.intersection(values)]

cat_dfs['All'] = top_coins_data

# convert top coins to csv
top_coins_data.to_csv('top100_price_data.csv')

all_wo_mc = top_coins_data
for col in top_coins_data.columns:
    if col in cat_dfs['Memecoin'].columns:
        all_wo_mc = all_wo_mc.drop(columns=[col], axis=1)

cat_dfs['AllwoMC'] = all_wo_mc

#print(all_wo_mc)
#print(cat_dfs['Memecoin'])
#print(top_coins_data)

In [169]:
# Get weekly pct change for last, median, mean

last_weekly = {}
median_weekly = {}
mean_weekly = {}

for category, df in cat_dfs.items():

    last = df[df.index >= '2019-12-29'].resample('W').last().pct_change() * 100
    median = df[df.index >= '2019-12-29'].resample('W').median().pct_change() * 100
    mean = df[df.index >= '2019-12-29'].resample('W').mean().pct_change() * 100

    last = last[last.index.year >= 2020]
    median = median[median.index.year >= 2020]
    mean = mean[mean.index.year >= 2020]

    last_weekly[category] = last
    median_weekly[category] = median
    mean_weekly[category] = mean

#print(last_weekly['Memecoin'])
#print(median_weekly['Memecoin'])
#print(mean_weekly['Memecoin'])

In [170]:
# Get monthly pct change for last, median, mean

last_monthly = {}
median_monthly = {}
mean_monthly = {}

for category, df in cat_dfs.items():

    last = df[df.index >= '2019-12-29'].resample('M').last().pct_change() * 100
    median = df[df.index >= '2019-12-29'].resample('M').median().pct_change() * 100
    mean = df[df.index >= '2019-12-29'].resample('M').mean().pct_change() * 100

    last = last[last.index.year >= 2020]
    median = median[median.index.year >= 2020]
    mean = mean[mean.index.year >= 2020]

    last_monthly[category] = last
    median_monthly[category] = median
    mean_monthly[category] = mean

#print(last_monthly['Memecoin'])
#print(median_monthly['Memecoin'])
#print(mean_monthly['Memecoin'])

In [171]:
# Get mean/median for each category -> weekly

last_weekly_comb = {}
median_weekly_comb = {}
mean_weekly_comb = {}

for category, df in last_weekly.items():
    row_means = df.mean(axis=1)
    row_medians = df.median(axis=1)

    combined_df = pd.DataFrame({'Mean': row_means, 'Median': row_medians})

    last_weekly_comb[category] = combined_df

for category, df in median_weekly.items():
    row_means = df.mean(axis=1)
    row_medians = df.median(axis=1)

    combined_df = pd.DataFrame({'Mean': row_means, 'Median': row_medians})

    median_weekly_comb[category] = combined_df

for category, df in mean_weekly.items():
    row_means = df.mean(axis=1)
    row_medians = df.median(axis=1)

    combined_df = pd.DataFrame({'Mean': row_means, 'Median': row_medians})

    mean_weekly_comb[category] = combined_df


#print(last_weekly_comb['Memecoin'])
#print(median_weekly_comb['Memecoin'])
#print(mean_weekly_comb['Memecoin'])

In [172]:
# Get mean/median for each category -> monthly

last_monthly_comb = {}
median_monthly_comb = {}
mean_monthly_comb = {}

for category, df in last_monthly.items():
    row_means = df.mean(axis=1)
    row_medians = df.median(axis=1)

    combined_df = pd.DataFrame({'Mean': row_means, 'Median': row_medians})

    last_monthly_comb[category] = combined_df

for category, df in median_monthly.items():
    row_means = df.mean(axis=1)
    row_medians = df.median(axis=1)

    combined_df = pd.DataFrame({'Mean': row_means, 'Median': row_medians})

    median_monthly_comb[category] = combined_df

for category, df in mean_monthly.items():
    row_means = df.mean(axis=1)
    row_medians = df.median(axis=1)

    combined_df = pd.DataFrame({'Mean': row_means, 'Median': row_medians})

    mean_monthly_comb[category] = combined_df

#print(last_monthly_comb['All'])
#print(median_monthly_comb['Memecoin'])
#print(mean_monthly_comb['Memecoin'])

In [173]:
# find weeks where MC outperformed market

df_all = last_weekly_comb['All']
df_meme = last_weekly_comb['Memecoin']
last_w_avg = df_all[df_meme['Mean'] > df_all['Mean']]['Mean'].to_frame()
last_w_med = df_all[df_meme['Median'] > df_all['Median']]['Median'].to_frame()

df_all = median_weekly_comb['All']
df_meme = median_weekly_comb['Memecoin']
median_w_avg = df_all[df_meme['Mean'] > df_all['Mean']]['Mean'].to_frame()
median_w_med = df_all[df_meme['Median'] > df_all['Median']]['Median'].to_frame()

df_all = mean_weekly_comb['All']
df_meme = mean_weekly_comb['Memecoin']
mean_w_avg = df_all[df_meme['Mean'] > df_all['Mean']]['Mean'].to_frame()
mean_w_med = df_all[df_meme['Median'] > df_all['Median']]['Median'].to_frame()

print(median_w_avg)

                 Mean
Date                 
2020-01-05  -1.098211
2020-01-12   3.942846
2020-01-26  -0.131479
2020-03-01 -13.216326
2020-03-15 -15.055490
...               ...
2023-12-24   3.507885
2024-01-14  -2.422849
2024-02-18  10.287053
2024-03-03  11.643690
2024-03-10  15.149879

[97 rows x 1 columns]


In [174]:
# find months where MC outperformed market

df_all = last_monthly_comb['All']
df_meme = last_monthly_comb['Memecoin']
last_m_avg = df_all[df_meme['Mean'] > df_all['Mean']]['Mean'].to_frame()
last_m_med = df_all[df_meme['Median'] > df_all['Median']]['Median'].to_frame()

df_all = median_monthly_comb['All']
df_meme = median_monthly_comb['Memecoin']
median_m_avg = df_all[df_meme['Mean'] > df_all['Mean']]['Mean'].to_frame()
median_m_med = df_all[df_meme['Median'] > df_all['Median']]['Median'].to_frame()

df_all = mean_monthly_comb['All']
df_meme = mean_monthly_comb['Memecoin']
mean_m_avg = df_all[df_meme['Mean'] > df_all['Mean']]['Mean'].to_frame()
mean_m_med = df_all[df_meme['Median'] > df_all['Median']]['Median'].to_frame()

print(median_m_avg)

                  Mean
Date                  
2020-03-31  -39.356719
2020-07-31   21.889810
2020-10-31  -11.521961
2021-01-31   40.052885
2021-02-28  167.764018
2021-03-31   84.496346
2021-04-30   59.305166
2021-05-31   21.937481
2021-06-30  -39.220624
2021-07-31  -10.902123
2021-10-31   11.887622
2021-11-30   32.917036
2022-01-31   -9.268217
2022-02-28  -18.958761
2022-05-31  -47.015844
2022-06-30  -22.512403
2022-09-30  -13.529852
2022-10-31   -5.620179
2022-11-30  -14.683361
2023-01-31   20.046657
2023-02-28   27.770374
2023-04-30    2.775594
2023-05-31  -14.599699
2023-11-30   43.847381
2023-12-31   28.438163
2024-03-31   36.834583


In [175]:
# Convert rows of prices to lists for linear regression

out_perf_weeks = {'lwa': [], 'lwm': [], 'mwa': [], 'mwm': [], 'awa': [], 'awm': [], 'lma': [], 'lmm': [], 'mma': [], 'mmm': [], 'ama': [], 'amm': []}

#last, avg, weekly
lwa_ret_all = []
lwa_ret_meme = []
lwa_same_all = []
lwa_ret_awoc = []
lwa_next_awoc = []

for row in last_w_avg.iterrows():
    lwa_ret_meme.append(last_weekly_comb['Memecoin'].loc[row[0]]['Mean'])
    lwa_ret_awoc.append(last_weekly_comb['AllwoMC'].loc[row[0]]['Mean'])
    out_perf_weeks['lwa'].append(row[0])


for date in last_w_avg.index.tolist():
    try:
        next_week = last_weekly_comb['All'].loc[(date + pd.DateOffset(weeks=1))]['Mean']
        same_week = last_weekly_comb['All'].loc[date]['Mean']
        next_awoc = last_weekly_comb['AllwoMC'].loc[(date + pd.DateOffset(weeks=1))]['Mean']
        lwa_ret_all.append(next_week)
        lwa_same_all.append(same_week)
        lwa_next_awoc.append(next_awoc)
    except KeyError:
        continue

lwa_ret_meme = lwa_ret_meme[:-1]
lwa_ret_awoc = lwa_ret_awoc[:-1]
lwa_ret_all = np.array(lwa_ret_all)
lwa_ret_meme = np.array(lwa_ret_meme)
lwa_same_all = np.array(lwa_same_all)
lwa_ret_awoc = np.array(lwa_ret_awoc)
lwa_next_awoc = np.array(lwa_next_awoc)

r_val, p_val = pearsonr(lwa_ret_meme, lwa_ret_all)
print(f'last, avg, weekly-- r val: {r_val}, p val: {p_val}')


#last, med, weekly
lwm_ret_all = []
lwm_ret_meme = []
lwm_same_all = []
lwm_ret_awoc = []
lwm_next_awoc = []

for row in last_w_med.iterrows():
    lwm_ret_meme.append(last_weekly_comb['Memecoin'].loc[row[0]]['Median'])
    lwm_ret_awoc.append(last_weekly_comb['AllwoMC'].loc[row[0]]['Median'])
    out_perf_weeks['lwm'].append(row[0])

for date in last_w_med.index.tolist():
    try:
        next_week = last_weekly_comb['All'].loc[(date + pd.DateOffset(weeks=1))]['Median']
        same_week = last_weekly_comb['All'].loc[date]['Median']
        next_awoc = last_weekly_comb['AllwoMC'].loc[(date + pd.DateOffset(weeks=1))]['Median']
        lwm_ret_all.append(next_week)
        lwm_same_all.append(same_week)
        lwm_next_awoc.append(next_awoc)
    except KeyError:
        continue

lwm_ret_meme = lwm_ret_meme[:-1]
lwm_ret_awoc = lwm_ret_awoc[:-1]
lwm_ret_all = np.array(lwm_ret_all)
lwm_ret_meme = np.array(lwm_ret_meme)
lwm_same_all = np.array(lwm_same_all)
lwm_ret_awoc = np.array(lwm_ret_awoc)
lwm_next_awoc = np.array(lwm_next_awoc)

r_val, p_val = pearsonr(lwm_ret_meme, lwm_ret_all)
print(f'last, med, weekly-- r val: {r_val}, p val: {p_val}')


#median, avg, weekly
mwa_ret_all = []
mwa_ret_meme = []
mwa_same_all = []
mwa_ret_awoc = []
mwa_next_awoc = []

for row in median_w_avg.iterrows():
    mwa_ret_meme.append(median_weekly_comb['Memecoin'].loc[row[0]]['Mean'])
    mwa_ret_awoc.append(median_weekly_comb['AllwoMC'].loc[row[0]]['Mean'])
    out_perf_weeks['mwa'].append(row[0])

for date in median_w_avg.index.tolist():
    try:
        next_week = median_weekly_comb['All'].loc[(date + pd.DateOffset(weeks=1))]['Mean']
        same_week = median_weekly_comb['All'].loc[date]['Mean']
        next_awoc = median_weekly_comb['AllwoMC'].loc[(date + pd.DateOffset(weeks=1))]['Mean']
        mwa_ret_all.append(next_week)
        mwa_same_all.append(same_week)
        mwa_next_awoc.append(next_awoc)
    except KeyError:
        continue

mwa_ret_meme = mwa_ret_meme[:-1]
mwa_ret_awoc = mwa_ret_awoc[:-1]
mwa_ret_all = np.array(mwa_ret_all)
mwa_ret_meme = np.array(mwa_ret_meme)
mwa_same_all = np.array(mwa_same_all)
mwa_ret_awoc = np.array(mwa_ret_awoc)
mwa_next_awoc = np.array(mwa_next_awoc)

r_val, p_val = pearsonr(mwa_ret_meme, mwa_ret_all)
print(f'median, avg, weekly-- r val: {r_val}, p val: {p_val}')


#median, med, weekly
mwm_ret_all = []
mwm_ret_meme = []
mwm_same_all = []
mwm_ret_awoc = []
mwm_next_awoc = []

for row in median_w_med.iterrows():
    mwm_ret_meme.append(median_weekly_comb['Memecoin'].loc[row[0]]['Median'])
    mwm_ret_awoc.append(median_weekly_comb['AllwoMC'].loc[row[0]]['Median'])
    out_perf_weeks['mwm'].append(row[0])


for date in median_w_med.index.tolist():
    try:
        next_week = median_weekly_comb['All'].loc[(date + pd.DateOffset(weeks=1))]['Median']
        same_week = median_weekly_comb['All'].loc[date]['Median']
        next_awoc = median_weekly_comb['AllwoMC'].loc[(date + pd.DateOffset(weeks=1))]['Median']
        mwm_ret_all.append(next_week)
        mwm_same_all.append(same_week)
        mwm_next_awoc.append(next_awoc)
    except KeyError:
        continue

mwm_ret_meme = mwm_ret_meme[:-1]
mwm_ret_awoc = mwm_ret_awoc[:-1]
mwm_ret_all = np.array(mwm_ret_all)
mwm_ret_meme = np.array(mwm_ret_meme)
mwm_same_all = np.array(mwm_same_all)
mwm_ret_awoc = np.array(mwm_ret_awoc)
mwm_next_awoc = np.array(mwm_next_awoc)

r_val, p_val = pearsonr(mwm_ret_meme, mwm_ret_all)
print(f'median, med, weekly-- r val: {r_val}, p val: {p_val}')


#mean, avg, weekly
awa_ret_all = []
awa_ret_meme = []
awa_same_all = []
awa_ret_awoc = []
awa_next_awoc = []

for row in mean_w_avg.iterrows():
    awa_ret_meme.append(mean_weekly_comb['Memecoin'].loc[row[0]]['Mean'])
    awa_ret_awoc.append(mean_weekly_comb['AllwoMC'].loc[row[0]]['Mean'])
    out_perf_weeks['awa'].append(row[0])

for date in mean_w_avg.index.tolist():
    try:
        next_week = mean_weekly_comb['All'].loc[(date + pd.DateOffset(weeks=1))]['Mean']
        same_week = mean_weekly_comb['All'].loc[date]['Mean']
        next_awoc = mean_weekly_comb['AllwoMC'].loc[(date + pd.DateOffset(weeks=1))]['Mean']
        awa_ret_all.append(next_week)
        awa_same_all.append(same_week)
        awa_next_awoc.append(next_awoc)
    except KeyError:
        continue

awa_ret_meme = awa_ret_meme[:-1]
awa_ret_awoc = awa_ret_awoc[:-1]
awa_ret_all = np.array(awa_ret_all)
awa_ret_meme = np.array(awa_ret_meme)
awa_same_all = np.array(awa_same_all)
awa_ret_awoc = np.array(awa_ret_awoc)
awa_next_awoc = np.array(awa_next_awoc)

r_val, p_val = pearsonr(awa_ret_meme, awa_ret_all)
print(f'mean, avg, weekly-- r val: {r_val}, p val: {p_val}')


#mean, med, weekly
awm_ret_all = []
awm_ret_meme = []
awm_same_all = []
awm_ret_awoc = []
awm_next_awoc = []

for row in mean_w_med.iterrows():
    awm_ret_meme.append(mean_weekly_comb['Memecoin'].loc[row[0]]['Median'])
    awm_ret_awoc.append(mean_weekly_comb['AllwoMC'].loc[row[0]]['Median'])
    out_perf_weeks['awm'].append(row[0])

for date in mean_w_med.index.tolist():
    try:
        next_week = mean_weekly_comb['All'].loc[(date + pd.DateOffset(weeks=1))]['Median']
        same_week = mean_weekly_comb['All'].loc[date]['Median']
        next_awoc = mean_weekly_comb['AllwoMC'].loc[(date + pd.DateOffset(weeks=1))]['Median']
        awm_ret_all.append(next_week)
        awm_same_all.append(same_week)
        awm_next_awoc.append(next_awoc)
    except KeyError:
        continue

awm_ret_meme = awm_ret_meme[:-1]
awm_ret_awoc = awm_ret_awoc[:-1]
awm_ret_all = np.array(awm_ret_all)
awm_ret_meme = np.array(awm_ret_meme)
awm_same_all = np.array(awm_same_all)
awm_ret_awoc = np.array(awm_ret_awoc)
awm_next_awoc = np.array(awm_next_awoc)

r_val, p_val = pearsonr(awm_ret_meme, awm_ret_all)
print(f'mean, med, weekly-- r val: {r_val}, p val: {p_val}')

last, avg, weekly-- r val: 0.02573914109065292, p val: 0.8075824666365163
last, med, weekly-- r val: 0.012877570902992411, p val: 0.8957625684682027
median, avg, weekly-- r val: 0.40079722798482165, p val: 5.197596737525357e-05
median, med, weekly-- r val: 0.3270938769630538, p val: 0.0011425872810769413
mean, avg, weekly-- r val: 0.3019495374915806, p val: 0.003823875376534725
mean, med, weekly-- r val: 0.2785664670361525, p val: 0.007500496329899813


In [176]:
# Convert rows of prices to lists for linear regression

#last, avg, monthly
lma_ret_all = []
lma_ret_meme = []
lma_same_all = []
lma_ret_awoc = []
lma_next_awoc = []

for row in last_m_avg.iterrows():
    lma_ret_meme.append(last_monthly_comb['Memecoin'].loc[row[0]]['Mean'])
    lma_ret_awoc.append(last_monthly_comb['AllwoMC'].loc[row[0]]['Mean'])
    out_perf_weeks['lma'].append(row[0])

for date in last_m_avg.index.tolist():
    try:
        next_month = last_monthly_comb['All'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Mean']
        same_month = last_monthly_comb['All'].loc[date]['Mean']
        next_awoc = last_monthly_comb['AllwoMC'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Mean']
        lma_ret_all.append(next_month)
        lma_same_all.append(same_month)
        lma_next_awoc.append(next_awoc)
    except KeyError:
        continue

lma_ret_meme = lma_ret_meme[:-1]
lma_ret_awoc = lma_ret_awoc[:-1]
lma_ret_all = np.array(lma_ret_all)
lma_ret_meme = np.array(lma_ret_meme)
lma_same_all = np.array(lma_same_all)
lma_ret_awoc = np.array(lma_ret_awoc)
lma_next_awoc = np.array(lma_next_awoc)

r_val, p_val = pearsonr(lma_ret_meme, lma_ret_all)
print(f'last, avg, monthly-- r val: {r_val}, p val: {p_val}')


#last, med, monthly
lmm_ret_all = []
lmm_ret_meme = []
lmm_same_all = []
lmm_ret_awoc = []
lmm_next_awoc = []

for row in last_m_med.iterrows():
    lmm_ret_meme.append(last_monthly_comb['Memecoin'].loc[row[0]]['Median'])
    lmm_ret_awoc.append(last_monthly_comb['AllwoMC'].loc[row[0]]['Median'])
    out_perf_weeks['lmm'].append(row[0])

for date in last_m_med.index.tolist():
    try:
        next_month = last_monthly_comb['All'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Median']
        same_month = last_monthly_comb['All'].loc[date]['Median']
        next_awoc = last_monthly_comb['AllwoMC'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Median']
        lmm_ret_all.append(next_month)
        lmm_same_all.append(same_month)
        lmm_next_awoc.append(next_awoc)
    except KeyError:
        continue

lmm_ret_meme = lmm_ret_meme[:-1]
lmm_ret_awoc = lmm_ret_awoc[:-1]
lmm_ret_all = np.array(lmm_ret_all)
lmm_ret_meme = np.array(lmm_ret_meme)
lmm_same_all = np.array(lmm_same_all)
lmm_ret_awoc = np.array(lmm_ret_awoc)
lmm_next_awoc = np.array(lmm_next_awoc)

r_val, p_val = pearsonr(lmm_ret_meme, lmm_ret_all)
print(f'last, med, monthly-- r val: {r_val}, p val: {p_val}')


#median, avg, monthly
mma_ret_all = []
mma_ret_meme = []
mma_same_all = []
mma_ret_awoc = []
mma_next_awoc = []


for row in median_m_avg.iterrows():
    mma_ret_meme.append(median_monthly_comb['Memecoin'].loc[row[0]]['Mean'])
    mma_ret_awoc.append(median_monthly_comb['AllwoMC'].loc[row[0]]['Mean'])
    out_perf_weeks['mma'].append(row[0])

for date in median_m_avg.index.tolist():
    try:
        next_month = median_monthly_comb['All'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Mean']
        same_month = median_monthly_comb['All'].loc[date]['Mean']
        next_awoc = median_monthly_comb['AllwoMC'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Mean']
        mma_ret_all.append(next_month)
        mma_same_all.append(same_month)
        mma_next_awoc.append(next_awoc)
    except KeyError:
        continue

mma_ret_meme = mma_ret_meme[:-1]
mma_ret_awoc = mma_ret_awoc[:-1]
mma_ret_all = np.array(mma_ret_all)
mma_ret_meme = np.array(mma_ret_meme)
mma_same_all = np.array(mma_same_all)
mma_ret_awoc = np.array(mma_ret_awoc)
mma_next_awoc = np.array(mma_next_awoc)

r_val, p_val = pearsonr(mma_ret_meme, mma_ret_all)
print(f'median, avg, monthly-- r val: {r_val}, p val: {p_val}')


#median, med, monthly
mmm_ret_all = []
mmm_ret_meme = []
mmm_same_all = []
mmm_ret_awoc = []
mmm_next_awoc = []

for row in median_m_med.iterrows():
    mmm_ret_meme.append(median_monthly_comb['Memecoin'].loc[row[0]]['Median'])
    mmm_ret_awoc.append(median_monthly_comb['AllwoMC'].loc[row[0]]['Median'])
    out_perf_weeks['mmm'].append(row[0])

for date in median_m_med.index.tolist():
    try:
        next_month = median_monthly_comb['All'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Median']
        same_month = median_monthly_comb['All'].loc[date]['Median']
        next_awoc = median_monthly_comb['AllwoMC'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Median']
        mmm_ret_all.append(next_month)
        mmm_same_all.append(same_month)
        mmm_next_awoc.append(next_awoc)
    except KeyError:
        continue

mmm_ret_meme = mmm_ret_meme[:-1]
mmm_ret_awoc = mmm_ret_awoc[:-1]
mmm_ret_all = np.array(mmm_ret_all)
mmm_ret_meme = np.array(mmm_ret_meme)
mmm_same_all = np.array(mmm_same_all)
mmm_ret_awoc = np.array(mmm_ret_awoc)
mmm_next_awoc = np.array(mmm_next_awoc)

r_val, p_val = pearsonr(mmm_ret_meme, mmm_ret_all)
print(f'median, med, monthly-- r val: {r_val}, p val: {p_val}')


#mean, avg, monthly
ama_ret_all = []
ama_ret_meme = []
ama_same_all = []
ama_ret_awoc = []
ama_next_awoc = []

for row in mean_m_avg.iterrows():
    ama_ret_meme.append(mean_monthly_comb['Memecoin'].loc[row[0]]['Mean'])
    ama_ret_awoc.append(mean_monthly_comb['AllwoMC'].loc[row[0]]['Mean'])
    out_perf_weeks['ama'].append(row[0])

for date in mean_m_avg.index.tolist():
    try:
        next_month = mean_monthly_comb['All'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Mean']
        same_month = mean_monthly_comb['All'].loc[date]['Mean']
        next_awoc = mean_monthly_comb['AllwoMC'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Mean']
        ama_ret_all.append(next_month)
        ama_same_all.append(same_month)
        ama_next_awoc.append(next_awoc)
    except KeyError:
        continue

ama_ret_meme = ama_ret_meme[:-1]
ama_ret_awoc = ama_ret_awoc[:-1]
ama_ret_all = np.array(ama_ret_all)
ama_ret_meme = np.array(ama_ret_meme)
ama_same_all = np.array(ama_same_all)
ama_ret_awoc = np.array(ama_ret_awoc)
ama_next_awoc = np.array(ama_next_awoc)

r_val, p_val = pearsonr(ama_ret_meme, ama_ret_all)
print(f'mean, avg, monthly-- r val: {r_val}, p val: {p_val}')


#mean, med, monthly
amm_ret_all = []
amm_ret_meme = []
amm_same_all = []
amm_ret_awoc = []
amm_next_awoc = []

for row in mean_m_med.iterrows():
    amm_ret_meme.append(mean_monthly_comb['Memecoin'].loc[row[0]]['Median'])
    amm_ret_awoc.append(mean_monthly_comb['AllwoMC'].loc[row[0]]['Median'])
    out_perf_weeks['amm'].append(row[0])

for date in mean_m_med.index.tolist():
    try:
        next_month = mean_monthly_comb['All'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Median']
        same_month = mean_monthly_comb['All'].loc[date]['Median']
        next_awoc = mean_monthly_comb['AllwoMC'].loc[(date + pd.DateOffset(months=1)) + MonthEnd(0)]['Median']
        amm_ret_all.append(next_month)
        amm_same_all.append(same_month)
        amm_next_awoc.append(next_awoc)
    except KeyError:
        continue

amm_ret_meme = amm_ret_meme[:-1]
amm_ret_awoc = amm_ret_awoc[:-1]
amm_ret_all = np.array(amm_ret_all)
amm_ret_meme = np.array(amm_ret_meme)
amm_same_all = np.array(amm_same_all)
amm_ret_awoc = np.array(amm_ret_awoc)
amm_next_awoc = np.array(amm_next_awoc)

r_val, p_val = pearsonr(amm_ret_meme, amm_ret_all)
print(f'mean, med, monthly-- r val: {r_val}, p val: {p_val}')

last, avg, monthly-- r val: 0.5023888533472182, p val: 0.01048882922132753
last, med, monthly-- r val: 0.3641805548866921, p val: 0.06183044044717421
median, avg, monthly-- r val: 0.19127620662578707, p val: 0.35971168180643215
median, med, monthly-- r val: 0.05105930188538512, p val: 0.8003281245261388
mean, avg, monthly-- r val: 0.25474910133189477, p val: 0.20914085861866552
mean, med, monthly-- r val: 0.09828253685057897, p val: 0.6257556638893881


In [177]:
df = pd.DataFrame({'A': amm_ret_all, 'M': amm_ret_meme})
result = sm.ols(formula='A ~ M', data=df).fit()
print(result.params)
print(result.summary())

Intercept    7.389556
M            0.007241
dtype: float64
                            OLS Regression Results                            
Dep. Variable:                      A   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                 -0.030
Method:                 Least Squares   F-statistic:                    0.2438
Date:                Mon, 25 Mar 2024   Prob (F-statistic):              0.626
Time:                        10:21:36   Log-Likelihood:                -128.98
No. Observations:                  27   AIC:                             262.0
Df Residuals:                      25   BIC:                             264.5
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------

In [178]:


everything = {'lwa': last_weekly_comb, 'lwm': last_weekly_comb, 'mwa': median_weekly_comb, 'mwm': median_weekly_comb, 'awa': mean_weekly_comb, 'awm': mean_weekly_comb, 'lma': last_monthly_comb, 'lmm': last_monthly_comb, 'mma': median_monthly_comb, 'mmm': median_monthly_comb, 'ama': mean_monthly_comb, 'amm': mean_monthly_comb}

for key, dfs in everything.items():
    col_names = dfs.keys()
    if key[-1] == 'a':
        new_df = pd.concat([df['Mean'] for df in dfs.values()], axis=1)
        everything[key] = new_df
    elif key[-1] == 'm':
        new_df = pd.concat([df['Median'] for df in dfs.values()], axis=1)
        everything[key] = new_df
    everything[key].columns = dfs.keys()

mc_outmarket_1b = {}
mcs = {}
for key, df in everything.items():
    new_df = df[df.index.isin(out_perf_weeks[key])]
    if key[1] == 'w':
        mcs[key] = new_df['Memecoin'][1:].to_frame()
    else:
        mcs[key] = new_df['Memecoin'].to_frame()

    if key[1] == 'w':
        weeks_before = pd.to_datetime(out_perf_weeks[key]) - pd.DateOffset(weeks=1)
        filtered_df = df[df.index.isin(weeks_before)]
        mc_outmarket_1b[key] = filtered_df
    elif key[1] == 'm':
        months_before = (pd.to_datetime(out_perf_weeks[key]) - pd.DateOffset(months=1)) + MonthEnd(0)
        filtered_df = df[df.index.isin(months_before)]
        mc_outmarket_1b[key] = filtered_df

rp_vals_1 = {'lwa': {}, 'lwm': {}, 'mwa': {}, 'mwm': {}, 'awa': {}, 'awm': {}, 'lma': {}, 'lmm': {}, 'mma': {}, 'mmm': {}, 'ama': {}, 'amm': {}}
for key, df in mc_outmarket_1b.items():
    #if key[0] == 'l': pass
    mc_array = mcs[key]['Memecoin'].values
    for col in df.columns:
        if col == 'Memecoin': continue
        new_df = pd.DataFrame()
        cat_array = df[col].values
        if len(mc_array) != len(cat_array):
            new_df = pd.DataFrame({'Cat': cat_array, 'MC': mc_array[1:]})
        else:
            new_df = pd.DataFrame({'Cat': cat_array, 'MC': mc_array})
        result = sm.ols(formula='MC ~ Cat', data=new_df).fit()
        rp_vals_1[key][col] = result.pvalues[-1]

sig_rp_vals = {}
sig_categories = {}
for cat, vals in rp_vals_1.items():
    sig_rp_vals[cat] = {}
    sig_categories[cat] = []
    for sector, rp in vals.items():
        if rp <= 0.05:
            sig_rp_vals[cat][sector] = rp
            sig_categories[cat].append(sector)

sig_categories_list = sum(sig_categories.values(), [])
print(Counter(sig_categories_list))

Counter({'Entertainment': 9, 'Modular Layer': 5, 'Exchange': 4, 'DeFi': 4, 'Infrastructure': 4, 'All': 4, 'AllwoMC': 4, 'Currency': 4, 'Layer 1': 3, 'n.a.': 1, 'Other': 1})


In [179]:
rp_vals_2 = {'lwa': {}, 'lwm': {}, 'mwa': {}, 'mwm': {}, 'awa': {}, 'awm': {}, 'lma': {}, 'lmm': {}, 'mma': {}, 'mmm': {}, 'ama': {}, 'amm': {}}
for key, df in mc_outmarket_1b.items():
    #if key[0] == 'l': pass
    mc_array = mcs[key]['Memecoin'].values
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin' or i == j: continue

            cat1_array = df[df.columns[i]].values
            cat2_array = df[df.columns[j]].values

            if len(mc_array) != len(cat2_array) or len(mc_array) != len(cat1_array):
                new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'MC': mc_array[1:]})
            else:
                new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'MC': mc_array})
            result = sm.ols(formula='MC ~ Cat1 + Cat2', data=new_df).fit()
            rp_vals_2[key][df.columns[i] + ',' + df.columns[j]] = result.pvalues[1:3]

sig_rp_vals_2 = {}
sig_categories_2 = {}
for cat, vals in rp_vals_2.items():
    sig_rp_vals_2[cat] = {}
    sig_categories_2[cat] = []
    for sector, rp in vals.items():
        if rp[0] <= 0.05 and rp[1] <= 0.05:
            sig_rp_vals_2[cat][sector] = rp
            sig_categories_2[cat].append(sector)

sig_categories_2_list = sum(sig_categories_2.values(), [])
tups2 = [tuple(pair.split(',')) for pair in sig_categories_2_list]
print(Counter(tups2))

Counter({('DeFi', 'Other'): 4, ('Currency', 'Layer 1'): 3, ('Currency', 'All'): 3, ('n.a.', 'Entertainment'): 3, ('Currency', 'DeFi'): 3, ('Layer 1', 'DeFi'): 3, ('DeFi', 'n.a.'): 3, ('DeFi', 'All'): 3, ('DeFi', 'AllwoMC'): 3, ('n.a.', 'All'): 3, ('n.a.', 'AllwoMC'): 3, ('Currency', 'n.a.'): 2, ('Currency', 'AllwoMC'): 2, ('Currency', 'Entertainment'): 2, ('All', 'AllwoMC'): 2, ('Entertainment', 'Other'): 2, ('Entertainment', 'All'): 2, ('Entertainment', 'AllwoMC'): 2, ('Currency', 'Exchange'): 1, ('DeFi', 'Entertainment'): 1, ('Infrastructure', 'n.a.'): 1, ('Infrastructure', 'Other'): 1, ('Layer 1', 'Entertainment'): 1, ('Infrastructure', 'Entertainment'): 1, ('Layer 1', 'Exchange'): 1, ('Exchange', 'Entertainment'): 1, ('Exchange', 'All'): 1, ('Exchange', 'AllwoMC'): 1, ('Layer 1', 'All'): 1, ('Layer 1', 'AllwoMC'): 1})


In [180]:
rp_vals_3 = {'lwa': {}, 'lwm': {}, 'mwa': {}, 'mwm': {}, 'awa': {}, 'awm': {}, 'lma': {}, 'lmm': {}, 'mma': {}, 'mmm': {}, 'ama': {}, 'amm': {}}
for key, df in mc_outmarket_1b.items():
    #if key[0] == 'l': pass
    mc_array = mcs[key]['Memecoin'].values
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin' or i == j: continue
            for k in range(j+1, len(df.columns)):
                if df.columns[k] == 'Memecoin' or j == k: continue

                cat1_array = df[df.columns[i]].values
                cat2_array = df[df.columns[j]].values
                cat3_array = df[df.columns[k]].values

                if len(mc_array) != len(cat2_array) or len(mc_array) != len(cat1_array) or len(mc_array) != len(cat3_array):
                    nnew_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'Cat3': cat3_array, 'MC': mc_array[1:]})
                else:
                    new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'Cat3': cat3_array, 'MC': mc_array})
                result = sm.ols(formula='MC ~ Cat1 + Cat2 + Cat3', data=new_df).fit()
                rp_vals_3[key][df.columns[i] + ',' + df.columns[j] + ',' + df.columns[k]] = result.pvalues

sig_rp_vals_3 = {}
sig_categories_3 = {}
for cat, vals in rp_vals_3.items():
    sig_rp_vals_3[cat] = {}
    sig_categories_3[cat] = []
    for sector, rp in vals.items():
        if rp[0] <= 0.05 and rp[1] <= 0.05 and rp[2] <= 0.05:
            sig_rp_vals_3[cat][sector] = rp
            sig_categories_3[cat].append(sector)

sig_categories_3_list = sum(sig_categories_3.values(), [])
tups = [tuple(pair.split(',')) for pair in sig_categories_3_list]
print(Counter(tups))

Counter({('Currency', 'All', 'AllwoMC'): 2, ('Currency', 'DeFi', 'Entertainment'): 2, ('Currency', 'n.a.', 'All'): 2, ('Layer 1', 'n.a.', 'All'): 2, ('Layer 1', 'Exchange', 'DeFi'): 2, ('Currency', 'Layer 1', 'DeFi'): 1, ('Currency', 'Layer 1', 'Other'): 1, ('Layer 1', 'Other', 'All'): 1, ('Layer 1', 'Other', 'AllwoMC'): 1, ('DeFi', 'Other', 'All'): 1, ('DeFi', 'Other', 'AllwoMC'): 1, ('Infrastructure', 'Other', 'All'): 1, ('n.a.', 'Other', 'All'): 1, ('n.a.', 'Other', 'AllwoMC'): 1, ('Currency', 'Exchange', 'DeFi'): 1, ('Currency', 'DeFi', 'Infrastructure'): 1, ('Currency', 'DeFi', 'Modular Layer'): 1, ('Currency', 'n.a.', 'AllwoMC'): 1, ('Layer 1', 'DeFi', 'Infrastructure'): 1, ('Layer 1', 'DeFi', 'Modular Layer'): 1, ('Layer 1', 'DeFi', 'Entertainment'): 1, ('Layer 1', 'n.a.', 'Entertainment'): 1, ('Layer 1', 'n.a.', 'AllwoMC'): 1, ('Exchange', 'DeFi', 'n.a.'): 1, ('Exchange', 'DeFi', 'All'): 1, ('Exchange', 'DeFi', 'AllwoMC'): 1, ('Exchange', 'n.a.', 'Entertainment'): 1, ('DeFi', '

In [181]:
mc_outmarket_1f = {}
for key, df in everything.items():
    new_df = df[df.index.isin(out_perf_weeks[key])]
    mcs[key] = new_df['Memecoin'].to_frame()
    if key[1] == 'w':
        weeks_after = pd.to_datetime(out_perf_weeks[key]) + pd.DateOffset(weeks=1)
        filtered_df = df[df.index.isin(weeks_after)]
        mc_outmarket_1f[key] = filtered_df

    elif key[1] == 'm':
        months_after = (pd.to_datetime(out_perf_weeks[key]) + pd.DateOffset(months=1)) + MonthEnd(0)
        filtered_df = df[df.index.isin(months_after)]
        mc_outmarket_1b[key] = filtered_df

#print(mc_outmarket_1f['lwa'])

In [182]:
rp_vals_1f = {'lwa': {}, 'lwm': {}, 'mwa': {}, 'mwm': {}, 'awa': {}, 'awm': {}, 'lma': {}, 'lmm': {}, 'mma': {}, 'mmm': {}, 'ama': {}, 'amm': {}}
for key, df in mc_outmarket_1f.items():
    #if key[0] == 'l': pass
    mc_array = mcs[key]['Memecoin'].values
    for col in df.columns:
        if col == 'Memecoin': continue
        cat_array = df[col].values
        if len(mc_array) != len(cat_array):
            new_df = pd.DataFrame({'Cat': cat_array, 'MC': mc_array[1:]})
        else:
            new_df = pd.DataFrame({'Cat': cat_array, 'MC': mc_array})
        result = sm.ols(formula='MC ~ Cat', data=new_df).fit()
        rp_vals_1f[key][col] = result.pvalues[-1]

sig_rp_valsf = {}
sig_categoriesf = {}
for cat, vals in rp_vals_1f.items():
    sig_rp_valsf[cat] = {}
    sig_categoriesf[cat] = []
    for sector, rp in vals.items():
        if rp <= 0.05:
            sig_rp_valsf[cat][sector] = rp
            sig_categoriesf[cat].append(sector)

sig_categories_listf = sum(sig_categoriesf.values(), [])
print(Counter(sig_categories_listf))

Counter({'DeFi': 4, 'Currency': 3, 'Layer 1': 3, 'Exchange': 3, 'Infrastructure': 2, 'All': 2, 'AllwoMC': 2, 'Modular Layer': 1, 'Entertainment': 1, 'Other': 1, 'n.a.': 1})


In [197]:
rp_vals_2f = {'lwa': {}, 'lwm': {}, 'mwa': {}, 'mwm': {}, 'awa': {}, 'awm': {}, 'lma': {}, 'lmm': {}, 'mma': {}, 'mmm': {}, 'ama': {}, 'amm': {}}
for key, df in mc_outmarket_1f.items():
    #if key[0] == 'l': pass
    mc_array = mcs[key]['Memecoin'].values
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin' or i == j: continue

            cat1_array = df[df.columns[i]].values
            cat2_array = df[df.columns[j]].values

            if len(mc_array) != len(cat2_array) or len(mc_array) != len(cat1_array):
                new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'MC': mc_array[1:]})
            else:
                new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'MC': mc_array})
            result = sm.ols(formula='MC ~ Cat1 + Cat2', data=new_df).fit()
            rp_vals_2f[key][df.columns[i] + ',' + df.columns[j]] = result.pvalues[1:3]

sig_rp_vals_2f = {}
sig_categories_2f = {}
for cat, vals in rp_vals_2f.items():
    sig_rp_vals_2f[cat] = {}
    sig_categories_2f[cat] = []
    for sector, rp in vals.items():
        if rp[0] <= 0.05 and rp[1] <= 0.05:
            sig_rp_vals_2f[cat][sector] = rp
            sig_categories_2f[cat].append(sector)

sig_categories_2_listf = sum(sig_categories_2f.values(), [])
tups2f = [tuple(pair.split(',')) for pair in sig_categories_2_listf]
print(Counter(tups2f))

Counter({('DeFi', 'Infrastructure'): 4, ('DeFi', 'n.a.'): 3, ('DeFi', 'Entertainment'): 3, ('DeFi', 'All'): 3, ('DeFi', 'AllwoMC'): 3, ('Currency', 'Infrastructure'): 2, ('Currency', 'Other'): 2, ('Layer 1', 'Other'): 2, ('n.a.', 'All'): 2, ('All', 'AllwoMC'): 2, ('DeFi', 'Other'): 2, ('Currency', 'DeFi'): 2, ('Layer 1', 'DeFi'): 2, ('DeFi', 'Modular Layer'): 2, ('Currency', 'Layer 1'): 1, ('Currency', 'Exchange'): 1, ('Layer 1', 'Exchange'): 1, ('Layer 1', 'Infrastructure'): 1, ('Exchange', 'Modular Layer'): 1, ('Infrastructure', 'Modular Layer'): 1, ('Modular Layer', 'Other'): 1, ('n.a.', 'AllwoMC'): 1, ('Other', 'All'): 1, ('Other', 'AllwoMC'): 1, ('Infrastructure', 'All'): 1, ('Infrastructure', 'AllwoMC'): 1})


In [184]:
rp_vals_3f = {'lwa': {}, 'lwm': {}, 'mwa': {}, 'mwm': {}, 'awa': {}, 'awm': {}, 'lma': {}, 'lmm': {}, 'mma': {}, 'mmm': {}, 'ama': {}, 'amm': {}}
for key, df in mc_outmarket_1f.items():
    #if key[0] == 'l': pass
    mc_array = mcs[key]['Memecoin'].values
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin' or i == j: continue
            for k in range(j+1, len(df.columns)):
                if df.columns[k] == 'Memecoin' or j == k: continue

                cat1_array = df[df.columns[i]].values
                cat2_array = df[df.columns[j]].values
                cat3_array = df[df.columns[k]].values

                if len(mc_array) != len(cat2_array) or len(mc_array) != len(cat1_array) or len(mc_array) != len(cat3_array):
                    new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'Cat3': cat3_array, 'MC': mc_array[1:]})
                else:
                    new_df = pd.DataFrame({'Cat1': cat1_array, 'Cat2': cat2_array, 'Cat3': cat3_array, 'MC': mc_array})
                result = sm.ols(formula='MC ~ Cat1 + Cat2 + Cat3', data=new_df).fit()
                rp_vals_3f[key][df.columns[i] + ',' + df.columns[j] + ',' + df.columns[k]] = result.pvalues

sig_rp_vals_3f = {}
sig_categories_3f = {}
for cat, vals in rp_vals_3f.items():
    sig_rp_vals_3f[cat] = {}
    sig_categories_3f[cat] = []
    for sector, rp in vals.items():
        if rp[0] <= 0.05 and rp[1] <= 0.05 and rp[2] <= 0.05:
            sig_rp_vals_3f[cat][sector] = rp
            sig_categories_3f[cat].append(sector)

sig_categories_3_listf = sum(sig_categories_3f.values(), [])
tups = [tuple(pair.split(',')) for pair in sig_categories_3_listf]
print(Counter(tups))

Counter({('Layer 1', 'DeFi', 'Infrastructure'): 2, ('Currency', 'Layer 1', 'DeFi'): 1, ('Currency', 'Layer 1', 'Modular Layer'): 1, ('Currency', 'Layer 1', 'n.a.'): 1, ('Currency', 'Layer 1', 'All'): 1, ('Currency', 'Layer 1', 'AllwoMC'): 1, ('Currency', 'Exchange', 'DeFi'): 1, ('Currency', 'Exchange', 'Modular Layer'): 1, ('Currency', 'Exchange', 'n.a.'): 1, ('Currency', 'Exchange', 'Entertainment'): 1, ('Currency', 'Exchange', 'Other'): 1, ('Currency', 'Exchange', 'All'): 1, ('Currency', 'Exchange', 'AllwoMC'): 1, ('Currency', 'Infrastructure', 'Modular Layer'): 1, ('Currency', 'Infrastructure', 'n.a.'): 1, ('Currency', 'Infrastructure', 'Entertainment'): 1, ('Currency', 'Infrastructure', 'All'): 1, ('Currency', 'Infrastructure', 'AllwoMC'): 1, ('Currency', 'Modular Layer', 'Other'): 1, ('Currency', 'n.a.', 'AllwoMC'): 1, ('Currency', 'Other', 'All'): 1, ('Currency', 'Other', 'AllwoMC'): 1, ('Layer 1', 'Infrastructure', 'Modular Layer'): 1, ('Layer 1', 'Infrastructure', 'n.a.'): 1, (

In [226]:
#Get expected returns for each category

all_exp_ret = {}
for method, df in everything.items():
    cat_exp_ret = {}
    for col in df.columns:
        cat_exp_ret[col] = df[col].mean()
    all_exp_ret[method] = cat_exp_ret

diff_exp_ret = {}
for method, df in everything.items():
    new_df = df.copy()
    for col in new_df.columns:
        exp = all_exp_ret[method][col]
        new_df[col] = (new_df[col] - exp)/abs(exp) * 100
    diff_exp_ret[method] = new_df




In [230]:
diff_exp_ret_b = {}
mcs = {}
for key, df in diff_exp_ret.items():
    new_list = []
    new_df = df.copy()
    mcs[key] = new_df['Memecoin'].values[1:]
    new_df = new_df.drop('Memecoin', axis=1)

    for col in new_df.columns:
        new_list.append(pd.DataFrame(new_df[col].values[:-1], columns=[col]))
        temp_df = pd.concat(new_list, axis=1)
    diff_exp_ret_b[key] = temp_df

#print(diff_exp_ret_b['lwa'])
#print(mcs['lwa'])

In [235]:
rp_vals_all_1b = {}
for method, df in diff_exp_ret_b.items():
    df_dict = {}
    mc_array = mcs[method]
    for i in range(len(df.columns)):

        cat_array = df[df.columns[i]].values

        new_df = pd.DataFrame({'MC': mc_array, 'Cat': cat_array})
        result = sm.ols(formula='MC ~ Cat', data=new_df).fit()
        df_dict[df.columns[i]] = result.pvalues
    rp_vals_all_1b[method] = df_dict

sig_vals_all_1b = {}
sig_cat_all_1b = {}
for cat, vals in rp_vals_all_1b.items():
    sig_vals_all_1b[cat] = {}
    sig_cat_all_1b[cat] = []
    for sector, rp in vals.items():
        if rp[1] <= 0.05:
            sig_vals_all_1b[cat][sector] = rp
            sig_cat_all_1b[cat].append(sector)

sig_cat_all_1b_list = sum(sig_cat_all_1b.values(), [])
print(Counter(sig_cat_all_1b_list))


Counter({'Entertainment': 8, 'Exchange': 6, 'Layer 1': 4, 'Currency': 4, 'DeFi': 3, 'All': 3, 'AllwoMC': 3, 'n.a.': 3, 'Infrastructure': 1})


In [237]:
rp_vals_all_2b = {}
for method, df in diff_exp_ret_b.items():
    df_dict = {}
    mc_array = mcs[method]
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin': continue

            cat_array1 = df[df.columns[i]].values
            cat_array2 = df[df.columns[j]].values


            new_df = pd.DataFrame({'MC': mc_array, 'Cat1': cat_array1, 'Cat2': cat_array2})
            result = sm.ols(formula='MC ~ Cat1 + Cat2', data=new_df).fit()
            df_dict[df.columns[i] + ',' + df.columns[j]] = result.pvalues
    rp_vals_all_2b[method] = df_dict

sig_vals_all_2b = {}
sig_cat_all_2b = {}
for cat, vals in rp_vals_all_2b.items():
    sig_vals_all_2b[cat] = {}
    sig_cat_all_2b[cat] = []
    for sector, rp in vals.items():
        if rp[1] <= 0.05 and rp[2] <= 0.05:
            sig_vals_all_2b[cat][sector] = rp
            sig_cat_all_2b[cat].append(sector)

sig_cat_all_2b_list = sum(sig_cat_all_2b.values(), [])
tups2b_all = [tuple(pair.split(',')) for pair in sig_cat_all_2b_list]
print(Counter(tups2b_all))

Counter({('DeFi', 'Other'): 4, ('DeFi', 'n.a.'): 3, ('Currency', 'Layer 1'): 2, ('Currency', 'Exchange'): 2, ('Currency', 'Entertainment'): 2, ('n.a.', 'Entertainment'): 2, ('Entertainment', 'Other'): 2, ('Entertainment', 'All'): 2, ('Entertainment', 'AllwoMC'): 2, ('All', 'AllwoMC'): 2, ('n.a.', 'AllwoMC'): 2, ('Layer 1', 'DeFi'): 2, ('Layer 1', 'Other'): 2, ('DeFi', 'Entertainment'): 1, ('Currency', 'DeFi'): 1, ('Layer 1', 'Entertainment'): 1, ('DeFi', 'All'): 1, ('DeFi', 'AllwoMC'): 1, ('Other', 'All'): 1, ('n.a.', 'All'): 1, ('Currency', 'Infrastructure'): 1, ('Currency', 'n.a.'): 1, ('Currency', 'All'): 1, ('Currency', 'AllwoMC'): 1, ('Layer 1', 'Infrastructure'): 1})


In [238]:
rp_vals_all_3b = {}
for method, df in diff_exp_ret_b.items():
    df_dict = {}
    mc_array = mcs[method]
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin': continue
            for k in range(j+1, len(df.columns)):
                if df.columns[k] == 'Memecoin': continue

                cat_array1 = df[df.columns[i]].values
                cat_array2 = df[df.columns[j]].values
                cat_array3 = df[df.columns[k]].values
                new_df = pd.DataFrame({'MC': mc_array, 'Cat1': cat_array1, 'Cat2': cat_array2, 'Cat3': cat_array3})
                result = sm.ols(formula='MC ~ Cat1 + Cat2 + Cat3', data=new_df).fit()
                df_dict[df.columns[i] + ',' + df.columns[j] + ',' + df.columns[k]] = result.pvalues
    rp_vals_all_3b[method] = df_dict

sig_vals_all_3b = {}
sig_cat_all_3b = {}
for cat, vals in rp_vals_all_3b.items():
    sig_vals_all_3b[cat] = {}
    sig_cat_all_3b[cat] = []
    for sector, rp in vals.items():
        if rp[1] <= 0.05 and rp[2] <= 0.05 and rp[3] <= 0.05:
            sig_vals_all_3b[cat][sector] = rp
            sig_cat_all_3b[cat].append(sector)

sig_cat_all_3b_list = sum(sig_cat_all_3b.values(), [])
tups3b_all = [tuple(pair.split(',')) for pair in sig_cat_all_3b_list]
print(Counter(tups3b_all))

Counter({('Currency', 'n.a.', 'All'): 3, ('Layer 1', 'n.a.', 'AllwoMC'): 3, ('Currency', 'n.a.', 'AllwoMC'): 2, ('Layer 1', 'Exchange', 'Entertainment'): 2, ('Exchange', 'Entertainment', 'Other'): 2, ('Exchange', 'n.a.', 'Entertainment'): 2, ('Layer 1', 'n.a.', 'All'): 2, ('DeFi', 'Entertainment', 'All'): 2, ('DeFi', 'Entertainment', 'AllwoMC'): 2, ('Currency', 'Exchange', 'DeFi'): 2, ('Exchange', 'DeFi', 'Other'): 2, ('Exchange', 'DeFi', 'Entertainment'): 1, ('Currency', 'Exchange', 'Entertainment'): 1, ('Currency', 'All', 'AllwoMC'): 1, ('Exchange', 'Entertainment', 'All'): 1, ('Exchange', 'Entertainment', 'AllwoMC'): 1, ('Exchange', 'All', 'AllwoMC'): 1, ('Entertainment', 'All', 'AllwoMC'): 1, ('Layer 1', 'DeFi', 'Entertainment'): 1, ('DeFi', 'n.a.', 'Entertainment'): 1, ('DeFi', 'Entertainment', 'Other'): 1, ('Exchange', 'Other', 'All'): 1, ('n.a.', 'Other', 'All'): 1, ('n.a.', 'Other', 'AllwoMC'): 1, ('Entertainment', 'Other', 'All'): 1, ('Entertainment', 'Other', 'AllwoMC'): 1, (

In [239]:
diff_exp_ret_a = {}
mcs = {}
for key, df in diff_exp_ret.items():
    new_list = []
    new_df = df.copy()
    mcs[key] = new_df['Memecoin'].values[:-1]
    new_df = new_df.drop('Memecoin', axis=1)

    for col in new_df.columns:
        new_list.append(pd.DataFrame(new_df[col].values[1:], columns=[col]))
        temp_df = pd.concat(new_list, axis=1)
    diff_exp_ret_a[key] = temp_df

#print(diff_exp_ret_b['lwa'])
#print(mcs['lwa'])

In [240]:
rp_vals_all_1a = {}
for method, df in diff_exp_ret_a.items():
    df_dict = {}
    mc_array = mcs[method]
    for i in range(len(df.columns)):

        cat_array = df[df.columns[i]].values

        new_df = pd.DataFrame({'MC': mc_array, 'Cat': cat_array})
        result = sm.ols(formula='MC ~ Cat', data=new_df).fit()
        df_dict[df.columns[i]] = result.pvalues
    rp_vals_all_1a[method] = df_dict

sig_vals_all_1a = {}
sig_cat_all_1a = {}
for cat, vals in rp_vals_all_1a.items():
    sig_vals_all_1a[cat] = {}
    sig_cat_all_1a[cat] = []
    for sector, rp in vals.items():
        if rp[1] <= 0.05:
            sig_vals_all_1a[cat][sector] = rp
            sig_cat_all_1a[cat].append(sector)

sig_cat_all_1a_list = sum(sig_cat_all_1a.values(), [])
print(Counter(sig_cat_all_1a_list))


Counter({'Exchange': 11, 'Entertainment': 9, 'Layer 1': 7, 'Other': 6, 'Currency': 5, 'All': 5, 'AllwoMC': 5, 'DeFi': 4, 'Infrastructure': 4, 'n.a.': 4})


In [241]:
rp_vals_all_2a = {}
for method, df in diff_exp_ret_a.items():
    df_dict = {}
    mc_array = mcs[method]
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin': continue

            cat_array1 = df[df.columns[i]].values
            cat_array2 = df[df.columns[j]].values


            new_df = pd.DataFrame({'MC': mc_array, 'Cat1': cat_array1, 'Cat2': cat_array2})
            result = sm.ols(formula='MC ~ Cat1 + Cat2', data=new_df).fit()
            df_dict[df.columns[i] + ',' + df.columns[j]] = result.pvalues
    rp_vals_all_2a[method] = df_dict

sig_vals_all_2a = {}
sig_cat_all_2a = {}
for cat, vals in rp_vals_all_2a.items():
    sig_vals_all_2a[cat] = {}
    sig_cat_all_2a[cat] = []
    for sector, rp in vals.items():
        if rp[1] <= 0.05 and rp[2] <= 0.05:
            sig_vals_all_2a[cat][sector] = rp
            sig_cat_all_2a[cat].append(sector)

sig_cat_all_2a_list = sum(sig_cat_all_2a.values(), [])
tups2a_all = [tuple(pair.split(',')) for pair in sig_cat_all_2a_list]
print(Counter(tups2a_all))

Counter({('DeFi', 'AllwoMC'): 8, ('Layer 1', 'DeFi'): 7, ('DeFi', 'All'): 7, ('Currency', 'Exchange'): 6, ('Exchange', 'DeFi'): 6, ('DeFi', 'n.a.'): 6, ('Layer 1', 'Exchange'): 5, ('Exchange', 'All'): 5, ('Exchange', 'AllwoMC'): 5, ('Exchange', 'Modular Layer'): 4, ('Exchange', 'Infrastructure'): 4, ('Exchange', 'Other'): 4, ('Currency', 'All'): 4, ('Exchange', 'Entertainment'): 4, ('Currency', 'Entertainment'): 4, ('DeFi', 'Entertainment'): 4, ('Exchange', 'n.a.'): 3, ('Currency', 'Layer 1'): 3, ('Layer 1', 'AllwoMC'): 3, ('Currency', 'AllwoMC'): 3, ('Entertainment', 'Other'): 3, ('DeFi', 'Infrastructure'): 3, ('Layer 1', 'Infrastructure'): 2, ('Infrastructure', 'AllwoMC'): 2, ('n.a.', 'All'): 2, ('Infrastructure', 'n.a.'): 2, ('n.a.', 'Other'): 2, ('Currency', 'n.a.'): 2, ('Layer 1', 'Other'): 2, ('Layer 1', 'Entertainment'): 2, ('Infrastructure', 'Entertainment'): 2, ('Modular Layer', 'Entertainment'): 2, ('n.a.', 'Entertainment'): 2, ('Entertainment', 'All'): 2, ('Entertainment', '

In [242]:
rp_vals_all_3a = {}
for method, df in diff_exp_ret_a.items():
    df_dict = {}
    mc_array = mcs[method]
    for i in range(len(df.columns)):
        if df.columns[i] == 'Memecoin': continue
        for j in range(i+1, len(df.columns)):
            if df.columns[j] == 'Memecoin': continue
            for k in range(j+1, len(df.columns)):
                if df.columns[k] == 'Memecoin': continue

                cat_array1 = df[df.columns[i]].values
                cat_array2 = df[df.columns[j]].values
                cat_array3 = df[df.columns[k]].values
                new_df = pd.DataFrame({'MC': mc_array, 'Cat1': cat_array1, 'Cat2': cat_array2, 'Cat3': cat_array3})
                result = sm.ols(formula='MC ~ Cat1 + Cat2 + Cat3', data=new_df).fit()
                df_dict[df.columns[i] + ',' + df.columns[j] + ',' + df.columns[k]] = result.pvalues
    rp_vals_all_3a[method] = df_dict

sig_vals_all_3a = {}
sig_cat_all_3a = {}
for cat, vals in rp_vals_all_3a.items():
    sig_vals_all_3a[cat] = {}
    sig_cat_all_3a[cat] = []
    for sector, rp in vals.items():
        if rp[1] <= 0.05 and rp[2] <= 0.05 and rp[3] <= 0.05:
            sig_vals_all_3a[cat][sector] = rp
            sig_cat_all_3a[cat].append(sector)

sig_cat_all_3a_list = sum(sig_cat_all_3a.values(), [])
tups3a_all = [tuple(pair.split(',')) for pair in sig_cat_all_3a_list]
print(Counter(tups3a_all))

Counter({('Layer 1', 'n.a.', 'AllwoMC'): 5, ('Exchange', 'Modular Layer', 'Other'): 4, ('Currency', 'Layer 1', 'DeFi'): 4, ('Currency', 'Layer 1', 'Infrastructure'): 3, ('Currency', 'DeFi', 'AllwoMC'): 3, ('Exchange', 'Other', 'All'): 3, ('Exchange', 'Other', 'AllwoMC'): 3, ('Modular Layer', 'Entertainment', 'Other'): 3, ('Currency', 'Infrastructure', 'AllwoMC'): 2, ('DeFi', 'Infrastructure', 'AllwoMC'): 2, ('Currency', 'DeFi', 'All'): 2, ('DeFi', 'n.a.', 'All'): 2, ('Other', 'All', 'AllwoMC'): 2, ('Currency', 'Layer 1', 'Other'): 2, ('Layer 1', 'Entertainment', 'AllwoMC'): 2, ('Modular Layer', 'n.a.', 'Entertainment'): 2, ('Modular Layer', 'Entertainment', 'All'): 2, ('Modular Layer', 'Entertainment', 'AllwoMC'): 2, ('Exchange', 'n.a.', 'AllwoMC'): 1, ('Currency', 'Layer 1', 'Exchange'): 1, ('Currency', 'Exchange', 'n.a.'): 1, ('Layer 1', 'Exchange', 'DeFi'): 1, ('Layer 1', 'Exchange', 'Modular Layer'): 1, ('Layer 1', 'DeFi', 'Modular Layer'): 1, ('Exchange', 'DeFi', 'n.a.'): 1, ('DeF