In [23]:
import pandas as pd
import numpy as np
from datetime import date
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
!pip3 install seaborn
import seaborn as sns
import random

You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [3]:
df = pd.read_csv("crypto-markets.csv")

In [4]:
currencies_of_interest = ["BTC", "XRP"]
starting_balances_list = [5, 20]
crypto_usd_fx_list = [65000, 1.1]

curr_dic = {}
for currency in currencies_of_interest:
    curr_dic[currency] = df.loc[df["symbol"]==currency,:].copy()
    curr_dic[currency].reset_index(inplace=True, drop=True)
    print(curr_dic[currency].head())

      slug symbol     name        date  ranknow    open    high     low  \
0  bitcoin    BTC  Bitcoin  2013-04-28        1  135.30  135.98  132.10   
1  bitcoin    BTC  Bitcoin  2013-04-29        1  134.44  147.49  134.00   
2  bitcoin    BTC  Bitcoin  2013-04-30        1  144.00  146.93  134.05   
3  bitcoin    BTC  Bitcoin  2013-05-01        1  139.00  139.89  107.72   
4  bitcoin    BTC  Bitcoin  2013-05-02        1  116.38  125.60   92.28   

    close  volume        market  close_ratio  spread  
0  134.21     0.0  1.488567e+09       0.5438    3.88  
1  144.54     0.0  1.603769e+09       0.7813   13.49  
2  139.00     0.0  1.542813e+09       0.3843   12.88  
3  116.99     0.0  1.298955e+09       0.2882   32.17  
4  105.21     0.0  1.168517e+09       0.3881   33.32  
     slug symbol name        date  ranknow      open      high       low  \
0  ripple    XRP  XRP  2013-08-04        2  0.005874  0.005927  0.005874   
1  ripple    XRP  XRP  2013-08-05        2  0.005875  0.005980  0.0

In [5]:
for currency in currencies_of_interest:
    # Restructure data so that date, open, past market cap(s), past high/low(s) are used to predict close price
    no_rows = curr_dic[currency].shape[0]
    for row in range(no_rows - 1):
        curr_dic[currency].iloc[no_rows - 1 - row, 6] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 6]
        curr_dic[currency].iloc[no_rows - 1 - row, 7] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 7]
        curr_dic[currency].iloc[no_rows - 1 - row, 10] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 10]
    curr_dic[currency].drop(0, axis=0, inplace=True)
    curr_dic[currency].drop(columns=["slug", "symbol", "name", "ranknow", "close_ratio", "spread", "volume"], inplace=True)
    curr_dic[currency].rename(columns={"high": "Prev High", "low": "Prev Low", "market": "Prev Marketcap", "close": "Close"}, inplace=True)

    # Change date column into an integer for number of days since start date
    date_null = date.fromisoformat(curr_dic[currency].iloc[0, 0])
    for row in range(no_rows - 1):
        date = date.fromisoformat(curr_dic[currency].iloc[row, 0])
        curr_dic[currency].iloc[row, 0] = (date - date_null).days
        
    print(curr_dic[currency].head())

  date    open  Prev High  Prev Low   Close  Prev Marketcap
1    0  134.44     135.98    132.10  144.54    1.488567e+09
2    1  144.00     147.49    134.00  139.00    1.603769e+09
3    2  139.00     146.93    134.05  116.99    1.542813e+09
4    3  116.38     139.89    107.72  105.21    1.298955e+09
5    4  106.25     125.60     92.28   97.75    1.168517e+09
  date      open  Prev High  Prev Low     Close  Prev Marketcap
1    0  0.005875   0.005927  0.005874  0.005613      45983577.0
2    1  0.005637   0.005980  0.005613  0.004680      43879157.0
3    2  0.004669   0.005661  0.004629  0.004417      36591008.0
4    3  0.004397   0.004682  0.004333  0.004254      34534121.0
5    4  0.004257   0.004424  0.004175  0.004291      33258632.0


In [6]:
# Split into training and test sets
set_dic = {}
for currency in currencies_of_interest:
    set_dic["train_"+currency], set_dic["val_"+currency] = train_test_split(
        curr_dic[currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap", "Close"]], 
        test_size=0.33, random_state=1234)
    set_dic["X_train_"+currency] = set_dic["train_"+currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]]
    set_dic["X_test_"+currency] = set_dic["val_"+currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]]
    set_dic["Y_train_"+currency] = set_dic["train_"+currency]["Close"]
    set_dic["Y_test_"+currency] = set_dic["val_"+currency]["Close"]

In [7]:
# Basic linear regression seems to have lower RMSE which is interesting and unexpected
# Not using accuracy as a performance measure because I am predicting a continuous variable
model_dic = {}
for currency in currencies_of_interest:
    model_dic[currency] = LinearRegression()
    model_dic["reg_"+currency] = model_dic[currency].fit(set_dic["X_train_"+currency], set_dic["Y_train_"+currency])
    y_pred_btc = model_dic["reg_"+currency].predict(set_dic["X_test_"+currency])
    print("RMSE " + currency + ": " + str(mean_squared_error(set_dic["Y_test_"+currency], y_pred_btc) ** 0.5))

RMSE BTC: 292.3605500168556
RMSE XRP: 0.0251012486948646


In [8]:
starting_balances = {}
crypto_usd_fx = {}
starting_indicators = {}
predictions = {}
counter = 0

for currency in currencies_of_interest:
    starting_balances[currency] = starting_balances_list[counter]
    crypto_usd_fx[currency] = crypto_usd_fx_list[counter]
    counter += 1

    starting_indicators[currency] = curr_dic[currency].iloc[0,:][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]].to_frame().T
    predictions[currency] = model_dic["reg_"+currency].predict(starting_indicators[currency])

revenue = sum([crypto_usd_fx[currency]*predictions[currency] for currency in currencies_of_interest])
cost = sum([crypto_usd_fx[currency]*starting_balances[currency] for currency in currencies_of_interest])
profit = revenue - cost
print("Profit: " + str(profit[0]))

counter = 0

In [9]:
crypto_df = pd.read_csv("crypto-markets.csv")
crypto_df.head()

crypto_df = crypto_df[crypto_df['date'].str.startswith("2018")]
crypto_df = crypto_df[crypto_df['date'] != "2018-11-30"]
crypto_df['daily_returns'] = (crypto_df['close'] - crypto_df['open']) / (crypto_df['open']) * 100
crypto_df.head()

bt_df = crypto_df[crypto_df['name'] == "Bitcoin"]
bt_df.tail()

four_two_df = crypto_df[crypto_df['name'] == "Project-X"]
four_two_df.tail()



names = crypto_df['name'].unique()
names

benchmark_std = np.std(bt_df['daily_returns'])
lst = list()
lst1 = list()
def calculate_volatility(crypto_name):
  b1_df = crypto_df[crypto_df['name'].str.lower() == crypto_name.lower()]
  if (b1_df.shape[0] != 333):
    return
  b1_std = np.std(b1_df['daily_returns'])
  temp = np.corrcoef(b1_df['daily_returns'], bt_df['daily_returns'])[0][1]
  return temp * (b1_std / benchmark_std), temp

for name in names:
  temp = calculate_volatility(name)
  if temp is not None:
    beta, corr = temp
    lst.append(beta)
    lst1.append(corr)
  else:
    lst.append(None)
    lst1.append(None)

names = [x.lower() for x in names]
df_betas = pd.DataFrame({"Name": names, "Beta": lst, "Correlation": lst1})
df_cleaned = df_betas[df_betas['Beta'].notna()]
df_cleaned.sort_values("Beta", ascending = False)

df_cleaned.to_csv("crypto_betas")

In [10]:
df_betas = pd.read_csv("crypto_betas")
df_betas.head()

def calculate_risk(names, percentages):
  betas = list()
  if (sum([int(percentage) for percentage in percentages]) != 100):
      return "The Percentages Do Not Add Up To 100!"
  for i in range(len(names)):
    betas.append(df_betas[df_betas['Name'] == names[i].lower()]['Beta'].iloc[0] * (int(percentages[i]) / 100))
  risk_score = np.sum(betas)
  return risk_score

In [39]:
currencies = df_betas["Name"].unique().tolist()
portfolio_size = 2
risk_portfolios = {}
for trial in range(20):
    curr_temp = random.sample(currencies, 2)
    weight = random.choice([i for i in range(1, 101)])
    risk_lvl = calculate_risk([curr_temp[0], curr_temp[1]], [weight, 100-weight])
    if round(risk_lvl, 1) not in risk_portfolios:
        risk_portfolios[round(risk_lvl, 1)] = [(curr_temp[0], curr_temp[1], weight, 100-weight)]
    else: 
        risk_portfolios[round(risk_lvl, 1)].append((curr_temp[0], curr_temp[1], weight, 100-weight))
print(risk_portfolios)

{1.3: [('ubiq', 'rchain', 37, 63), ('lunyr', 'ark', 26, 74), ('feathercoin', 'spreadcoin', 3, 97), ('incakoin', 'elementrem', 45, 55)], 0.9: [('golfcoin', 'feathercoin', 35, 65), ('stellar', 'filecoin [futures]', 37, 63)], 1.2: [('stronghands', 'ark', 7, 93), ('alqo', 'storj', 72, 28), ('iconomi', 'hush', 73, 27), ('qlc chain', 'unobtanium', 85, 15), ('putincoin', 'cloakcoin', 20, 80)], 1.4: [('cappasity', 'trollcoin', 46, 54), ('nekonium', 'wavesgo', 73, 27)], 1.1: [('ethereum classic', 'etheroll', 37, 63), ('databits', 'neumark', 81, 19), ('solarcoin', 'maidsafecoin', 71, 29)], 0.6: [('xenon', 'crave', 88, 12)], 1.0: [('life', 'edgeless', 23, 77)], 0.8: [('namecoin', 'leocoin', 87, 13), ('petrodollar', 'autonio', 99, 1)]}


In [29]:
def profit_predictor(currencies_of_interest, starting_balances_list, crypto_usd_fx_list):
    
    from datetime import date
    curr_dic = {}
    for currency in currencies_of_interest:
        curr_dic[currency] = df.loc[df["name"].str.lower()==currency,:].copy()
        curr_dic[currency].reset_index(inplace=True, drop=True)
        #print(curr_dic[currency].head())

    for currency in currencies_of_interest:
        # Restructure data so that date, open, past market cap(s), past high/low(s) are used to predict close price
        no_rows = curr_dic[currency].shape[0]
        for row in range(no_rows - 1):
            curr_dic[currency].iloc[no_rows - 1 - row, 6] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 6]
            curr_dic[currency].iloc[no_rows - 1 - row, 7] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 7]
            curr_dic[currency].iloc[no_rows - 1 - row, 10] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 10]
        curr_dic[currency].drop(0, axis=0, inplace=True)
        curr_dic[currency].drop(columns=["slug", "symbol", "name", "ranknow", "close_ratio", "spread", "volume"], inplace=True)
        curr_dic[currency].rename(columns={"high": "Prev High", "low": "Prev Low", "market": "Prev Marketcap", "close": "Close"}, inplace=True)

        # Change date column into an integer for number of days since start date
        date_null = date.fromisoformat(curr_dic[currency].iloc[0, 0])
        for row in range(no_rows - 1):
            date = date.fromisoformat(curr_dic[currency].iloc[row, 0])
            curr_dic[currency].iloc[row, 0] = (date - date_null).days

        #print(curr_dic[currency].head())
        
    # Split into training and test sets
    set_dic = {}
    for currency in currencies_of_interest:
        set_dic["train_"+currency], set_dic["val_"+currency] = train_test_split(
            curr_dic[currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap", "Close"]], 
            test_size=0.33, random_state=1234)
        set_dic["X_train_"+currency] = set_dic["train_"+currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]]
        set_dic["X_test_"+currency] = set_dic["val_"+currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]]
        set_dic["Y_train_"+currency] = set_dic["train_"+currency]["Close"]
        set_dic["Y_test_"+currency] = set_dic["val_"+currency]["Close"]

    # Basic linear regression seems to have lower RMSE which is interesting and unexpected
    # Not using accuracy as a performance measure because I am predicting a continuous variable
    model_dic = {}
    for currency in currencies_of_interest:
        model_dic[currency] = LinearRegression()
        model_dic["reg_"+currency] = model_dic[currency].fit(set_dic["X_train_"+currency], set_dic["Y_train_"+currency])
        y_pred_btc = model_dic["reg_"+currency].predict(set_dic["X_test_"+currency])
        #print("RMSE " + currency + ": " + str(mean_squared_error(set_dic["Y_test_"+currency], y_pred_btc) ** 0.5))

    starting_balances = {}
    crypto_usd_fx = {}
    starting_indicators = {}
    predictions = {}
    counter = 0

    for currency in currencies_of_interest:
        starting_balances[currency] = starting_balances_list[counter]
        crypto_usd_fx[currency] = crypto_usd_fx_list[counter]
        counter += 1

        starting_indicators[currency] = curr_dic[currency].iloc[0,:][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]].to_frame().T
        predictions[currency] = model_dic["reg_"+currency].predict(starting_indicators[currency])

    revenue = sum([crypto_usd_fx[currency]*predictions[currency] for currency in currencies_of_interest])
    cost = sum([crypto_usd_fx[currency]*starting_balances[currency] for currency in currencies_of_interest])
    profit = revenue - cost
    #print("Profit: " + str(profit[0]))

    counter = 0
    
    return profit

In [45]:
final_portfolios = {}
data_viz_df = pd.DataFrame(columns = ["asset_a", "asset_b", "weight_a", "weight_b", "risk", "profit_pred"])
for risk in risk_portfolios.keys():
    portfolio_options = risk_portfolios[risk]
    crypto_usd_fx_list = [1,1]
    profit_est = -1000
    for portfolio in portfolio_options:
        initial_portfolio_worth = 100
        profit_pred = profit_predictor([portfolio[0], portfolio[1]], [portfolio[2]*.01*initial_portfolio_worth, portfolio[3]*.01*initial_portfolio_worth], crypto_usd_fx_list)
        if profit_pred > profit_est:
            data_viz_df = data_viz_df.append({"asset_a":portfolio[0], "asset_b":portfolio[1], "weight_a":portfolio[2], "weight_b":portfolio[3], "risk":risk, "profit_pred":profit_pred[0]}, ignore_index=True)
            final_portfolios[risk] = portfolio
            profit_est = profit_pred
    profit_est = 0
data_viz_df.to_csv("data_viz_df.csv", index=False)
print(data_viz_df)
print(final_portfolios)

             asset_a             asset_b weight_a weight_b  risk  profit_pred
0               ubiq              rchain       37       63   1.3   -99.760569
1              lunyr                 ark       26       74   1.3   -96.961219
2           golfcoin         feathercoin       35       65   0.9   -99.552496
3            stellar  filecoin [futures]       37       63   0.9   -87.895059
4        stronghands                 ark        7       93   1.2   -99.941410
5               alqo               storj       72       28   1.2   -99.297954
6          qlc chain          unobtanium       85       15   1.2   -93.931667
7          cappasity           trollcoin       46       54   1.4   -99.959303
8           nekonium             wavesgo       73       27   1.4   -99.667413
9   ethereum classic            etheroll       37       63   1.1   -97.600783
10             xenon               crave       88       12   0.6   -99.909427
11              life            edgeless       23       77   1.0