In [14]:
import pandas as pd
import numpy as np
from datetime import date
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
!pip3 install seaborn
import seaborn as sns

You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m


In [35]:
df = pd.read_csv("crypto-markets.csv")

In [36]:
currencies_of_interest = ["BTC", "XRP"]
starting_balances_list = [5, 20]
crypto_usd_fx_list = [65000, 1.1]

curr_dic = {}
for currency in currencies_of_interest:
    curr_dic[currency] = df.loc[df["symbol"]==currency,:].copy()
    curr_dic[currency].reset_index(inplace=True, drop=True)
    print(curr_dic[currency].head())

      slug symbol     name        date  ranknow    open    high     low  \
0  bitcoin    BTC  Bitcoin  2013-04-28        1  135.30  135.98  132.10   
1  bitcoin    BTC  Bitcoin  2013-04-29        1  134.44  147.49  134.00   
2  bitcoin    BTC  Bitcoin  2013-04-30        1  144.00  146.93  134.05   
3  bitcoin    BTC  Bitcoin  2013-05-01        1  139.00  139.89  107.72   
4  bitcoin    BTC  Bitcoin  2013-05-02        1  116.38  125.60   92.28   

    close  volume        market  close_ratio  spread  
0  134.21     0.0  1.488567e+09       0.5438    3.88  
1  144.54     0.0  1.603769e+09       0.7813   13.49  
2  139.00     0.0  1.542813e+09       0.3843   12.88  
3  116.99     0.0  1.298955e+09       0.2882   32.17  
4  105.21     0.0  1.168517e+09       0.3881   33.32  
     slug symbol name        date  ranknow      open      high       low  \
0  ripple    XRP  XRP  2013-08-04        2  0.005874  0.005927  0.005874   
1  ripple    XRP  XRP  2013-08-05        2  0.005875  0.005980  0.0

In [37]:
once = False
for currency in currencies_of_interest:
    # Restructure data so that date, open, past market cap(s), past high/low(s) are used to predict close price
    no_rows = curr_dic[currency].shape[0]
    for row in range(no_rows - 1):
        curr_dic[currency].iloc[no_rows - 1 - row, 7] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 7]
        if once == False and currency=="XRP": 
            print(curr_dic[currency].iloc[no_rows - 1 - row, 7])
            once=True
        curr_dic[currency].iloc[no_rows - 1 - row, 8] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 8]
        curr_dic[currency].iloc[no_rows - 1 - row, 11] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 11]
        #curr_dic[currency].iloc[no_rows - 1 - row, 4] = curr_dic[currency].iloc[no_rows - 1 - row - 1, 4]
    curr_dic[currency].drop(0, axis=0, inplace=True)
    curr_dic[currency].drop(columns=["slug", "symbol", "name", "ranknow", "close_ratio", "spread", "volume"], inplace=True)
    curr_dic[currency].rename(columns={"high": "Prev High", "low": "Prev Low", "market": "Prev Marketcap", "close": "Close"}, inplace=True)

    # Change date column into an integer for number of days since start date
    date_null = date.fromisoformat(curr_dic[currency].iloc[0, 0])
    for row in range(no_rows - 1):
        date = date.fromisoformat(curr_dic[currency].iloc[row, 0])
        curr_dic[currency].iloc[row, 0] = (date - date_null).days
        
    print(curr_dic[currency].head())

  date    open  Prev High  Prev Low   Close  Prev Marketcap
1    0  134.44     147.49    132.10  134.21    1.603769e+09
2    1  144.00     146.93    134.00  144.54    1.542813e+09
3    2  139.00     139.89    134.05  139.00    1.298955e+09
4    3  116.38     125.60    107.72  116.99    1.168517e+09
5    4  106.25     108.13     92.28  105.21    1.085995e+09
0.360313
  date      open  Prev High  Prev Low     Close  Prev Marketcap
1    0  0.005875   0.005980  0.005874  0.005882      43879157.0
2    1  0.005637   0.005661  0.005613  0.005613      36591008.0
3    2  0.004669   0.004682  0.004629  0.004680      34534121.0
4    3  0.004397   0.004424  0.004333  0.004417      33258632.0
5    4  0.004257   0.004367  0.004175  0.004254      33547503.0


In [22]:
# Split into training and test sets
set_dic = {}
for currency in currencies_of_interest:
    set_dic["train_"+currency], set_dic["val_"+currency] = train_test_split(
        curr_dic[currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap", "Close"]], 
        test_size=0.33, random_state=1234)
    set_dic["X_train_"+currency] = set_dic["train_"+currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]]
    set_dic["X_test_"+currency] = set_dic["val_"+currency][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]]
    set_dic["Y_train_"+currency] = set_dic["train_"+currency]["Close"]
    set_dic["Y_test_"+currency] = set_dic["val_"+currency]["Close"]

In [23]:
# Basic linear regression seems to have lower RMSE which is interesting and unexpected
# Not using accuracy as a performance measure because I am predicting a continuous variable
model_dic = {}
for currency in currencies_of_interest:
    model_dic[currency] = LinearRegression()
    model_dic["reg_"+currency] = model_dic[currency].fit(set_dic["X_train_"+currency], set_dic["Y_train_"+currency])
    y_pred_btc = model_dic["reg_"+currency].predict(set_dic["X_test_"+currency])
    print("RMSE " + currency + ": " + str(mean_squared_error(set_dic["Y_test_"+currency], y_pred_btc) ** 0.5))

RMSE BTC: 11.097708184905635
RMSE XRP: 0.0019308521474138089


In [28]:
starting_balances = {}
crypto_usd_fx = {}
starting_indicators = {}
predictions = {}
counter = 0

for currency in currencies_of_interest:
    starting_balances[currency] = starting_balances_list[counter]
    crypto_usd_fx[currency] = crypto_usd_fx_list[counter]
    counter += 1
    
    starting_indicators[currency] = curr_dic[currency].iloc[0,:][["date", "Prev High", "Prev Low", "open", "Prev Marketcap"]].to_frame().T
    predictions[currency] = model_dic["reg_"+currency].predict(starting_indicators[currency])
    
revenue = sum([crypto_usd_fx[currency]*predictions[currency] for currency in currencies_of_interest])
cost = sum([crypto_usd_fx[currency]*starting_balances[currency] for currency in currencies_of_interest])
profit = revenue - cost
print("Profit: " + str(profit[0]))

counter = 0 

Profit: 8414005.413680386
