In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from datetime import datetime, timedelta

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

In [2]:
# Constants
future_days_to_predict = 30

In [3]:
def find_targets(df):
    # Create empty list for target price
    target_price_list = []
    print('Finding Target Values...\n')
    
    # Iterate through the table
    for current, row in df.iterrows():
        
        # Find target time based on constant set
        target_time = current + timedelta(days=future_days_to_predict)
        
        # Skip times not found in table
        if target_time not in df.index.values:
            continue
        
        # Find target price and append to list
        target_price = df.loc[df.index == target_time, 'Price'].values[0]
        
        target_price_list.append([current, target_price])
        
    # Convert into dataframe
    
    target_price_df = pd.DataFrame(target_price_list, columns = ['date', 'target_price'])
        
    new_df = df.merge(target_price_df, how='right', on='date')
    
    new_df.dropna(inplace=True)
    
    # Find price change
    
    new_df[f'{future_days_to_predict}Day-PriceChange'] = [(f-c)/c for c, f in list(zip(new_df['Price'], new_df['target_price']))]
    
    # Finalize dataframe after adding targets
    
    new_df.drop(columns = ['Price', 'target_price'], inplace=True)
    
    new_df.set_index('date', inplace=True)
    
    return new_df

In [4]:
def prepare_table(df):
    
    # Features to have: PriceChange, VolumeChange, Supply, 3-Day H/L Ratio, 7-Day H/L Ratio, 30-Day H/L Ratio, 90-Day H/L Ratio, 
    # 365-Day H/L Ratio, 30-Day Volatility, 90-Day Volatility, Days from 90-Day High, Days From 90-Day Low, Days From 365-Day High,
    # Days From 365-Day Low, 30-Day Average Volume, 365-Day Average Volume, Day of Week, Month of Year, 
    # Day of Month
    
    # Copy a new df too keep old one
    new_df = df.copy().drop(columns = ['prices', 'market_caps', 'total_volumes', 'coin_id'])
    
    # Add basic columns
    new_df['Price'] = df['prices']
    new_df['Volume'] = df['total_volumes']
    new_df[f'{currency}-1DayPriceChange'] = df['prices'].pct_change()
    new_df[f'{currency}-1DayVolumeChange'] = df['total_volumes'].pct_change()
    
    new_df[f'{currency}-Supply'] = [x/y for x, y in list(zip(df['market_caps'].values, df['prices']))]
    
    # Remove nan and inf values
    new_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    new_df.dropna(inplace=True)
    
    # Set index to datetime
    new_df.index = pd.to_datetime(new_df.index)
    
    day_list = [3, 7, 30, 90, 365]
    
    # Iterate through the day ranges
    for day in day_list:
        
        print(f'Going through {day} day range features...\n')
        
        min_price_list = []
        max_price_list = []
        volatility_list = []
        volume_list = []
        dt_list = []
        
        # Iterate through new df
        for current, row in new_df.iterrows():
            if day == 3:
                day_of_week = current.isoweekday()
                day_of_month = current.day
                month_of_year = current.month
                dt_list.append([current, day_of_week, day_of_month, month_of_year])
            
            # Get start time to split into temp_df
            start_time = current - timedelta(days=day)
            
            if start_time not in new_df.index.values:
                
                continue
                
            temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
            
            # Gather volatilities for 30 and 90 day ranges
            if day in [30, 90]:
                volatility = temp_df[f'{currency}-1DayPriceChange'].std()
                volatility_list.append([current, volatility])    
            
            # Find min and max price indexes
            min_id = temp_df['Price'].idxmin()
            max_id = temp_df['Price'].idxmax()
            
            # Find days from max and min for certain date ranges
            if day in [30, 90, 365]:
                min_delta = current - min_id
                days_from_min = min_delta.days/day
                
                max_delta = current - max_id
                days_from_max = max_delta.days/day

            # Find min and max price
            min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        
            max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]
            
            if day in [30, 90, 365]:
                min_price_list.append([current, min_price, min_id, days_from_min])
                max_price_list.append([current, max_price, max_id, days_from_max])
            else:
                min_price_list.append([current, min_price, min_id])
                max_price_list.append([current, max_price, max_id])
                
            if day in [30, 365]:
                volume = temp_df['Volume'].mean()
                volume_list.append([current, volume])
            
        
        # Add Prices to main dataframe
        if day in [30, 90, 365]:
            min_prices = pd.DataFrame(min_price_list, columns = ['date', f'{day}Day_Min_Price', f'{day}Day_Min_Date', f'{currency}-DaysFrom-{day}DayMin']).set_index('date')  
            max_prices = pd.DataFrame(max_price_list, columns = ['date', f'{day}Day_Max_Price', f'{day}Day_Max_Date', f'{currency}-DaysFrom-{day}DayMax']).set_index('date')
        
        else:
            min_prices = pd.DataFrame(min_price_list, columns = ['date', f'{day}Day_Min_Price', f'{day}Day_Min_Date']).set_index('date')  
            max_prices = pd.DataFrame(max_price_list, columns = ['date', f'{day}Day_Max_Price', f'{day}Day_Max_Date']).set_index('date')
        
        prices_df = pd.merge(min_prices, max_prices, on='date')
        
        # Merge Dataframes together
        new_df = new_df.merge(prices_df, on='date', how='left')
        
        # Add H/L Ratio column
        new_df[f'{currency}-{day}Day-H/L-Ratio'] = [(y-z)/(x-z) for x, y, z in list(zip(new_df[f'{day}Day_Max_Price'].values, new_df['Price'].values, new_df[f'{day}Day_Min_Price'].values))]
        
        # Add Volatility Measures to main df
        if day in [30, 90]:
            
            volatility_df = pd.DataFrame(volatility_list, columns = ['date', f'{currency}-{day}Day-Volatility']).set_index('date')  
            
            new_df = new_df.merge(volatility_df, on='date', how='left')
            
            
        # Add Volume Measures to df   
        if day in [30, 365]:
            
            volume_df = pd.DataFrame(volume_list, columns = ['date', f'{currency}-{day}Day-AvgVolume']).set_index('date')  
            
            new_df = new_df.merge(volume_df, on='date', how='left')
            
            new_df[f'{currency}-{day}Day-AvgVolumeRatio'] = [x/y if y != 0 else 0 for x, y in list(zip(new_df[f'{currency}-{day}Day-AvgVolume'], new_df[f'{currency}-Supply']))]
            
            
            
        # Add day counts to df    
        if day == 3:
            dt_df = pd.DataFrame(dt_list, columns = ['date', 'Day of Week', 'Day of Month', 'Month of Year']).set_index('date')
            
            new_df = new_df.merge(dt_df, on='date', how='left')
            
        new_df.drop_duplicates(inplace=True)
        
    new_df.dropna(inplace=True)
    
    # Only keep necessary columns
    final_df = new_df[['Price', 'Day of Week', 'Day of Month', 'Month of Year',
                      f'{currency}-3Day-H/L-Ratio', f'{currency}-7Day-H/L-Ratio', f'{currency}-30Day-H/L-Ratio', f'{currency}-90Day-H/L-Ratio', f'{currency}-365Day-H/L-Ratio',
                      f'{currency}-30Day-AvgVolumeRatio', f'{currency}-365Day-AvgVolumeRatio', f'{currency}-30Day-Volatility', f'{currency}-90Day-Volatility',
                      f'{currency}-DaysFrom-30DayMax', f'{currency}-DaysFrom-90DayMax', f'{currency}-DaysFrom-365DayMax',
                      f'{currency}-DaysFrom-30DayMin', f'{currency}-DaysFrom-90DayMin', f'{currency}-DaysFrom-365DayMin']]
    
    final_df = find_targets(final_df)
        
    print('Process Complete!')
    
    return final_df

In [5]:
# Retrieve Data from dB

# Store environmental variable
from getpass import getpass
db_password = 'Lincoln100!'

In [6]:
# Create engine link to db
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/CryptoAnalysisdb"
engine = create_engine(db_string)

# Import data into dataframe
df = pd.read_sql('SELECT * FROM historical_market_data;', con=engine).set_index('date')
df.tail()

Unnamed: 0_level_0,prices,market_caps,total_volumes,coin_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-10-30,0.101578,1287825000.0,97266400.0,zilliqa
2021-10-31,0.112505,1429685000.0,599784500.0,zilliqa
2021-11-01,0.112415,1423508000.0,171194000.0,zilliqa
2021-11-02,0.114672,1450237000.0,138253000.0,zilliqa
2021-11-04,0.107805,1371410000.0,115412900.0,zilliqa


In [7]:
# Retrieve List of Currencies
currency_list = df['coin_id'].unique()
print(currency_list)

['0x' '1inch' '3x-long-bitcoin-token' '3x-long-ethereum-token' 'aave'
 'aavegotchi' 'adax' 'adventure-gold' 'aelf' 'algorand' 'alibabacoin'
 'alien-worlds' 'alpaca-finance' 'alpha-finance'
 'ampleforth-governance-token' 'ankr' 'amp-token' 'aragon' 'ardor'
 'arpa-chain' 'arweave' 'asd' 'audius' 'augur' 'avalanche-2'
 'axie-infinity' 'bakerytoken' 'balancer' 'bancor' 'band-protocol'
 'barnbridge' 'basic' 'basic-attention-token' 'binancecoin' 'binance-usd'
 'bitcoin' 'bitcoin-cash' 'bitcoin-cash-sv' 'bitcoin-diamond' 'bitdao'
 'bito-coin' 'bitshares' 'bittorrent-2' 'blockstack' 'bluzelle' 'brz'
 'cardano' 'cartesi' 'celer-network' 'celo' 'chainlink' 'chia' 'chiliz'
 'chromaway' 'civic' 'clover-finance' 'coin98' 'coinmetro'
 'compound-governance-token' 'cosmos' 'coti' 'crypto-com-chain' 'dai'
 'curve-dao-token' 'dash' 'decentraland' 'decred' 'defipulse-index'
 'dego-finance' 'dent' 'dia-data' 'digibyte' 'dirham' 'dock' 'dodo'
 'dogecoin' 'dora-factory' 'dvision-network' 'dydx' 'ecash' 'ede

In [8]:
# Select a currency
currency_to_predict = 'cardano'

In [9]:
# Gather Currency Names
currency_names = pd.read_sql(f'SELECT name, coin_id FROM coins;', con=engine).set_index('coin_id')

In [10]:
# Find name of currency
currency = currency_names.loc[currency_to_predict,:].values[0]
currency

'Cardano'

In [11]:
currency_df = df[df['coin_id'] == currency_to_predict]

ml_df = prepare_table(currency_df)

Going through 3 day range features...

Going through 7 day range features...

Going through 30 day range features...

Going through 90 day range features...

Going through 365 day range features...

Finding Target Values...

Process Complete!


In [12]:
ml_df.sample(20)

Unnamed: 0_level_0,Day of Week,Day of Month,Month of Year,Cardano-3Day-H/L-Ratio,Cardano-7Day-H/L-Ratio,Cardano-30Day-H/L-Ratio,Cardano-90Day-H/L-Ratio,Cardano-365Day-H/L-Ratio,Cardano-30Day-AvgVolumeRatio,Cardano-365Day-AvgVolumeRatio,Cardano-30Day-Volatility,Cardano-90Day-Volatility,Cardano-DaysFrom-30DayMax,Cardano-DaysFrom-90DayMax,Cardano-DaysFrom-365DayMax,Cardano-DaysFrom-30DayMin,Cardano-DaysFrom-90DayMin,Cardano-DaysFrom-365DayMin,30Day-PriceChange
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-08-09,5,9,8,0.0,0.0,0.0,0.0,0.208534,0.004936,0.003759,0.055216,0.058769,1.0,0.488889,1.0,0.0,0.0,0.649315,-0.022741
2020-11-25,3,25,11,0.483793,0.81782,0.850533,0.876765,0.922336,0.02014,0.010212,0.053393,0.052177,0.033333,0.011111,0.00274,0.766667,0.7,0.706849,0.010659
2021-06-20,7,20,6,0.456943,0.241267,0.206634,0.304727,0.614615,0.145892,0.084901,0.071058,0.07353,0.566667,0.388889,0.09589,0.933333,0.977778,0.739726,-0.261109
2019-12-05,4,5,12,0.386813,0.085157,0.240991,0.135075,0.140087,0.003157,0.004106,0.030978,0.035057,0.6,0.866667,0.443836,0.366667,0.122222,0.972603,-0.08592
2018-12-19,3,19,12,1.0,1.0,0.285476,0.115839,0.006258,0.001475,0.003573,0.07034,0.054715,1.0,0.966667,0.958904,0.133333,0.044444,0.010959,0.253237
2019-06-20,4,20,6,0.0,0.0,0.560474,0.792252,0.395633,0.008148,0.003199,0.047274,0.05775,0.6,0.2,0.920548,0.966667,1.0,0.512329,-0.286717
2021-09-14,2,14,9,0.0,0.094822,0.45673,0.702345,0.803618,0.163318,0.107384,0.061172,0.054861,0.366667,0.122222,0.030137,0.933333,0.622222,0.975342,-0.093754
2020-11-27,5,27,11,0.108329,0.467896,0.648129,0.709883,0.817167,0.021985,0.010407,0.057869,0.053205,0.1,0.033333,0.008219,0.833333,0.722222,0.712329,0.101896
2021-09-27,1,27,9,0.0,0.470556,0.167553,0.56571,0.711894,0.138983,0.110641,0.050873,0.052369,0.8,0.266667,0.065753,0.2,0.766667,0.90137,-0.100509
2021-02-23,2,23,2,0.0,0.323311,0.791115,0.828075,0.845821,0.158481,0.031877,0.091958,0.086255,0.1,0.033333,0.008219,0.9,0.988889,0.953425,0.147758
