In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from datetime import datetime, timedelta
from calendar import monthrange

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import tensorflow as tf

import sklearn as skl

from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

In [2]:
# Constants
future_days_to_predict = 30

In [3]:
def find_targets(df):
    # Create empty list for target price
    target_price_list = []
    print('Finding Target Values...\n')
    
    # Iterate through the table
    for current, row in df.iterrows():
        
        # Find target time based on constant set
        target_time = current + timedelta(days=future_days_to_predict)
        
        # Skip times not found in table
        if target_time not in df.index.values:
            continue
        
        # Find target price and append to list
        target_price = df.loc[df.index == target_time, 'Price'].values[0]
        
        target_price_list.append([current, target_price])
        
    # Convert into dataframe
    
    target_price_df = pd.DataFrame(target_price_list, columns = ['date', 'target_price'])
        
    new_df = df.merge(target_price_df, how='right', on='date')
    
    new_df.dropna(inplace=True)
    
    # Find price change
    
    new_df[f'{future_days_to_predict}Day-PriceChange'] = [(f-c)/c for c, f in list(zip(new_df['Price'], new_df['target_price']))]
    new_df['target'] = [1 if x > 0 else 0 for x in new_df[f'{future_days_to_predict}Day-PriceChange']]
    
    # Finalize dataframe after adding targets
    
    new_df.drop(columns = ['Price', 'target_price', f'{future_days_to_predict}Day-PriceChange'], inplace=True)
    
    new_df.set_index('date', inplace=True)
    
    return new_df

In [4]:
def prepare_table(df):
    
    # Features to have: PriceChange, VolumeChange, Supply, 3-Day H/L Ratio, 7-Day H/L Ratio, 30-Day H/L Ratio, 90-Day H/L Ratio, 
    # 365-Day H/L Ratio, 30-Day Volatility, 90-Day Volatility, Days from 90-Day High, Days From 90-Day Low, Days From 365-Day High,
    # Days From 365-Day Low, 30-Day Average Volume, 365-Day Average Volume, Day of Week, Month of Year, 
    # Day of Month
    
    # Copy a new df too keep old one
    new_df = df.copy().drop(columns = ['prices', 'market_caps', 'total_volumes', 'coin_id'])
    
    # Add basic columns
    new_df['Price'] = df['prices']
    new_df['Volume'] = df['total_volumes']
    new_df[f'{currency}-1DayPriceChange'] = df['prices'].pct_change()
    new_df[f'{currency}-1DayVolumeChange'] = df['total_volumes'].pct_change()
    
    new_df[f'{currency}-MarketCap'] = df['market_caps']
    
    # Remove nan and inf values
    new_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    new_df.dropna(inplace=True)
    
    # Set index to datetime
    new_df.index = pd.to_datetime(new_df.index)
    
    day_list = [3, 7, 30, 90, 365]
    
    # Iterate through the day ranges
    for day in day_list:
        
        print(f'Going through {day} day range features...\n')
        
        min_price_list = []
        max_price_list = []
        volatility_list = []
        volume_list = []
        dt_list = []
        
        # Iterate through new df
        for current, row in new_df.iterrows():
            if day == 3:
                month_of_year = current.month/12
                days_in_month = monthrange(current.year, current.month)[1]
                day_of_month = current.day/days_in_month
                
                day_of_week = current.isoweekday()/7
                dt_list.append([current, day_of_week, day_of_month, month_of_year])
            
            # Get start time to split into temp_df
            start_time = current - timedelta(days=day)
            
            if start_time not in new_df.index.values:
                
                continue
                
            temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
            
            # Gather volatilities for 30 and 90 day ranges
            if day in [30, 90]:
                volatility = temp_df[f'{currency}-1DayPriceChange'].std()
                volatility_list.append([current, volatility])    
            
            # Find min and max price indexes
            min_id = temp_df['Price'].idxmin()
            max_id = temp_df['Price'].idxmax()
            
            # Find days from max and min for certain date ranges
            if day in [30, 90, 365]:
                min_delta = current - min_id
                days_from_min = min_delta.days/day
                
                max_delta = current - max_id
                days_from_max = max_delta.days/day

            # Find min and max price
            min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        
            max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]
            
            if day in [30, 90, 365]:
                min_price_list.append([current, min_price, min_id, days_from_min])
                max_price_list.append([current, max_price, max_id, days_from_max])
            else:
                min_price_list.append([current, min_price, min_id])
                max_price_list.append([current, max_price, max_id])
                
            if day in [30, 365]:
                volume = temp_df['Volume'].mean()
                volume_list.append([current, volume])
            
        
        # Add Prices to main dataframe
        if day in [30, 90, 365]:
            min_prices = pd.DataFrame(min_price_list, columns = ['date', f'{day}Day_Min_Price', f'{day}Day_Min_Date', f'{currency}-DaysFrom-{day}DayMin']).set_index('date')  
            max_prices = pd.DataFrame(max_price_list, columns = ['date', f'{day}Day_Max_Price', f'{day}Day_Max_Date', f'{currency}-DaysFrom-{day}DayMax']).set_index('date')
        
        else:
            min_prices = pd.DataFrame(min_price_list, columns = ['date', f'{day}Day_Min_Price', f'{day}Day_Min_Date']).set_index('date')  
            max_prices = pd.DataFrame(max_price_list, columns = ['date', f'{day}Day_Max_Price', f'{day}Day_Max_Date']).set_index('date')
        
        prices_df = pd.merge(min_prices, max_prices, on='date')
        
        # Merge Dataframes together
        new_df = new_df.merge(prices_df, on='date', how='left')
        
        # Add H/L Ratio column
        new_df[f'{currency}-{day}Day-H/L-Ratio'] = [(y-z)/(x-z) for x, y, z in list(zip(new_df[f'{day}Day_Max_Price'].values, new_df['Price'].values, new_df[f'{day}Day_Min_Price'].values))]
        
        # Add Volatility Measures to main df
        if day in [30, 90]:
            
            volatility_df = pd.DataFrame(volatility_list, columns = ['date', f'{currency}-{day}Day-Volatility']).set_index('date')  
            
            new_df = new_df.merge(volatility_df, on='date', how='left')
            
            
        # Add Volume Measures to df   
        if day in [30, 365]:
            
            volume_df = pd.DataFrame(volume_list, columns = ['date', f'{currency}-{day}Day-AvgVolume']).set_index('date')  
            
            new_df = new_df.merge(volume_df, on='date', how='left')
            
            new_df[f'{currency}-{day}Day-AvgVolumeRatio'] = [x/y if y != 0 else 0 for x, y in list(zip(new_df[f'{currency}-{day}Day-AvgVolume'], new_df[f'{currency}-MarketCap']))]
            
            
            
        # Add day counts to df    
        if day == 3:
            dt_df = pd.DataFrame(dt_list, columns = ['date', 'Day of Week', 'Day of Month', 'Month of Year']).set_index('date')
            
            new_df = new_df.merge(dt_df, on='date', how='left')
            
        new_df.drop_duplicates(inplace=True)
        
    new_df.dropna(inplace=True)
    
    # Only keep necessary columns
    new_df = new_df[['Price', 'Day of Week', 'Day of Month', 'Month of Year',
                    f'{currency}-3Day-H/L-Ratio', f'{currency}-7Day-H/L-Ratio', f'{currency}-30Day-H/L-Ratio', f'{currency}-90Day-H/L-Ratio', f'{currency}-365Day-H/L-Ratio',
                    f'{currency}-30Day-AvgVolumeRatio', f'{currency}-365Day-AvgVolumeRatio', f'{currency}-30Day-Volatility', f'{currency}-90Day-Volatility',
                    f'{currency}-DaysFrom-30DayMax', f'{currency}-DaysFrom-90DayMax', f'{currency}-DaysFrom-365DayMax',
                    f'{currency}-DaysFrom-30DayMin', f'{currency}-DaysFrom-90DayMin', f'{currency}-DaysFrom-365DayMin']]
    
    final_df = find_targets(new_df)
        
    print('Process Complete!')
    
    return final_df

In [5]:
# Retrieve Data from dB

# Store environmental variable
from getpass import getpass
db_password = 'Lincoln100!'

In [6]:
# Create engine link to db
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/CryptoAnalysisdb"
engine = create_engine(db_string)

# Import data into dataframe
df = pd.read_sql('SELECT * FROM historical_market_data;', con=engine).set_index('date')
df.tail()

Unnamed: 0_level_0,prices,market_caps,total_volumes,coin_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-10-30,0.101578,1287825000.0,97266400.0,zilliqa
2021-10-31,0.112505,1429685000.0,599784500.0,zilliqa
2021-11-01,0.112415,1423508000.0,171194000.0,zilliqa
2021-11-02,0.114672,1450237000.0,138253000.0,zilliqa
2021-11-04,0.107805,1371410000.0,115412900.0,zilliqa


In [7]:
# Retrieve List of Currencies
currency_list = df['coin_id'].unique()
print(currency_list)

['0x' '1inch' '3x-long-bitcoin-token' '3x-long-ethereum-token' 'aave'
 'aavegotchi' 'adax' 'adventure-gold' 'aelf' 'algorand' 'alibabacoin'
 'alien-worlds' 'alpaca-finance' 'alpha-finance'
 'ampleforth-governance-token' 'ankr' 'amp-token' 'aragon' 'ardor'
 'arpa-chain' 'arweave' 'asd' 'audius' 'augur' 'avalanche-2'
 'axie-infinity' 'bakerytoken' 'balancer' 'bancor' 'band-protocol'
 'barnbridge' 'basic' 'basic-attention-token' 'binancecoin' 'binance-usd'
 'bitcoin' 'bitcoin-cash' 'bitcoin-cash-sv' 'bitcoin-diamond' 'bitdao'
 'bito-coin' 'bitshares' 'bittorrent-2' 'blockstack' 'bluzelle' 'brz'
 'cardano' 'cartesi' 'celer-network' 'celo' 'chainlink' 'chia' 'chiliz'
 'chromaway' 'civic' 'clover-finance' 'coin98' 'coinmetro'
 'compound-governance-token' 'cosmos' 'coti' 'crypto-com-chain' 'dai'
 'curve-dao-token' 'dash' 'decentraland' 'decred' 'defipulse-index'
 'dego-finance' 'dent' 'dia-data' 'digibyte' 'dirham' 'dock' 'dodo'
 'dogecoin' 'dora-factory' 'dvision-network' 'dydx' 'ecash' 'ede

In [8]:
# Select a currency
currency_to_predict = 'cardano'

In [9]:
# Gather Currency Names
currency_names = pd.read_sql(f'SELECT name, coin_id FROM coins;', con=engine).set_index('coin_id')

In [10]:
# Find name of currency
currency = currency_names.loc[currency_to_predict,:].values[0]
currency

'Cardano'

In [11]:
# Prepare the table by adding features and targets
currency_df = df[df['coin_id'] == currency_to_predict]

ml_df = prepare_table(currency_df)
ml_df.sample(5)

Going through 3 day range features...

Going through 7 day range features...

Going through 30 day range features...

Going through 90 day range features...

Going through 365 day range features...

Finding Target Values...

Process Complete!


Unnamed: 0_level_0,Day of Week,Day of Month,Month of Year,Cardano-3Day-H/L-Ratio,Cardano-7Day-H/L-Ratio,Cardano-30Day-H/L-Ratio,Cardano-90Day-H/L-Ratio,Cardano-365Day-H/L-Ratio,Cardano-30Day-AvgVolumeRatio,Cardano-365Day-AvgVolumeRatio,Cardano-30Day-Volatility,Cardano-90Day-Volatility,Cardano-DaysFrom-30DayMax,Cardano-DaysFrom-90DayMax,Cardano-DaysFrom-365DayMax,Cardano-DaysFrom-30DayMin,Cardano-DaysFrom-90DayMin,Cardano-DaysFrom-365DayMin,target
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018-11-22,0.571429,0.733333,0.916667,0.0,0.0,0.0,0.0,0.013851,0.035562,0.08573,0.050354,0.051734,0.533333,0.911111,0.884932,0.0,0.0,1.0,0
2019-01-10,0.571429,0.322581,0.083333,0.0,0.208469,0.686835,0.305798,0.017409,0.041316,0.072595,0.062072,0.056904,0.033333,0.722222,0.99726,0.866667,0.288889,0.071233,0
2019-03-17,1.0,0.548387,0.25,0.572791,0.729375,0.860162,0.9116,0.063899,0.029379,0.057999,0.043489,0.049462,0.033333,0.744444,0.871233,0.433333,1.0,0.252055,1
2020-07-06,0.142857,0.193548,0.583333,1.0,1.0,1.0,1.0,1.0,0.098966,0.048824,0.044871,0.046874,0.0,0.0,0.0,0.833333,0.911111,0.317808,1
2020-08-23,1.0,0.741935,0.666667,0.0,0.0,0.0,0.718146,0.785405,0.132347,0.054473,0.050638,0.051614,0.933333,0.311111,0.076712,0.0,1.0,0.449315,0


In [12]:
# use describe to check if it is normalized and standardized
ml_df.describe()

Unnamed: 0,Day of Week,Day of Month,Month of Year,Cardano-3Day-H/L-Ratio,Cardano-7Day-H/L-Ratio,Cardano-30Day-H/L-Ratio,Cardano-90Day-H/L-Ratio,Cardano-365Day-H/L-Ratio,Cardano-30Day-AvgVolumeRatio,Cardano-365Day-AvgVolumeRatio,Cardano-30Day-Volatility,Cardano-90Day-Volatility,Cardano-DaysFrom-30DayMax,Cardano-DaysFrom-90DayMax,Cardano-DaysFrom-365DayMax,Cardano-DaysFrom-30DayMin,Cardano-DaysFrom-90DayMin,Cardano-DaysFrom-365DayMin,target
count,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0,1083.0
mean,0.571956,0.518393,0.540012,0.519251,0.539665,0.549631,0.527027,0.457125,0.100787,0.070633,0.05459,0.056254,0.451924,0.448661,0.479915,0.572238,0.576259,0.558841,0.570637
std,0.28578,0.289492,0.287498,0.426874,0.385791,0.35118,0.346273,0.341856,0.051799,0.030438,0.018673,0.013531,0.35858,0.37008,0.394936,0.35389,0.35065,0.337859,0.495214
min,0.142857,0.032258,0.083333,0.0,0.0,0.0,0.0,0.0,0.016686,0.026621,0.026293,0.03128,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.285714,0.266667,0.291667,0.0,0.120456,0.203066,0.205073,0.145491,0.065519,0.045529,0.041095,0.048095,0.1,0.077778,0.060274,0.233333,0.222222,0.227397,0.0
50%,0.571429,0.516129,0.5,0.556094,0.594517,0.597256,0.521155,0.388887,0.092593,0.066945,0.050683,0.05367,0.4,0.377778,0.408219,0.633333,0.644444,0.594521,1.0
75%,0.857143,0.774194,0.75,1.0,0.945178,0.885705,0.870786,0.796278,0.134765,0.087117,0.063164,0.066776,0.8,0.833333,0.906849,0.933333,0.922222,0.884932,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.297902,0.209916,0.110306,0.088882,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [13]:
# Separate X and Y values
X = ml_df.drop(columns = ['target', 'Day of Week', 'Day of Month', 'Month of Year']).values
Y = ml_df['target'].values

print(X[0], Y[0])

[0.02773204 0.6175407  0.31667738 0.11080933 0.04704383 0.03088965
 0.04843954 0.05575036 0.05424163 0.83333333 0.95555556 0.7890411
 1.         0.34444444 0.96164384] 0


In [14]:
# Use sklearn to split dataset

X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=23)

In [72]:
# Create the Keras Sequential model
crypto_model = Sequential()

In [73]:
# Add layers to the model
crypto_model.add(Dense(32, activation= 'tanh', input_dim=X_train.shape[1]))
# crypto_model.add(Dropout(0.2))
crypto_model.add(BatchNormalization())

crypto_model.add(Dense(64, activation= 'relu'))
# crypto_model.add(Dropout(0.2))
crypto_model.add(BatchNormalization())

crypto_model.add(Dense(2, activation='softmax'))

crypto_model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_38 (Dense)             (None, 32)                512       
_________________________________________________________________
batch_normalization_25 (Batc (None, 32)                128       
_________________________________________________________________
dense_39 (Dense)             (None, 64)                2112      
_________________________________________________________________
batch_normalization_26 (Batc (None, 64)                256       
_________________________________________________________________
dense_40 (Dense)             (None, 2)                 130       
Total params: 3,138
Trainable params: 2,946
Non-trainable params: 192
_________________________________________________________________


In [74]:
# Compile the Sequential model together and customize metrics
crypto_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

In [75]:
# Fit the model to the training data
fit_model = crypto_model.fit(X_train, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [76]:
# Evaluate the model using the test data
model_loss, model_accuracy = crypto_model.evaluate(X_test, y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

9/9 - 0s - loss: 0.2462 - accuracy: 0.9188
Loss: 0.2461835891008377, Accuracy: 0.9188191890716553


In [66]:
y_test

array([0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 0], dtype=int64)