In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from datetime import datetime, timedelta

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

In [2]:
# Constants
future_days_to_predict = 7

In [157]:
def add_features(df):
    
    # Features to have: PriceChange, VolumeChange, Supply, 3-Day H/L Ratio, 7-Day H/L Ratio, 30-Day H/L Ratio, 90-Day H/L Ratio, 
    # 365-Day H/L Ratio, 30-Day Volatility, 90-Day Volatility, Days from 90-Day High, Days From 90-Day Low, Days From 365-Day High,
    # Days From 365-Day Low, 30-Day Average Volume, 365-Day Average Volume, Day of Week, Month of Year, 
    # Day of Month
    
    new_df = df.copy().drop(columns = ['prices', 'market_caps', 'total_volumes', 'coin_id'])
    
    new_df['Price'] = df['prices']
    new_df['Volume'] = df['total_volumes']
    new_df[f'{currency}-1DayPriceChange'] = df['prices'].pct_change()
    new_df[f'{currency}-1DayVolumeChange'] = df['total_volumes'].pct_change()
    
    new_df[f'{currency}-Supply'] = [x/y for x, y in list(zip(df['market_caps'].values, df['prices']))]
    
    new_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    new_df.dropna(inplace=True)
    
    new_df.index = pd.to_datetime(new_df.index)
    
    day_list = [3, 7, 30, 90, 365]
    
    # Iterate through the day ranges
    for day in day_list:
        
        print(f'Going through {day} day range features...\n')
        
        min_price_list = []
        max_price_list = []
        volatility_list = []
        volume_list = []
        dt_list = []
        
        # Iterate through new df
        for current, row in new_df.iterrows():
            if day == 3:
                day_of_week = current.isoweekday()
                day_of_month = current.day
                month_of_year = current.month
                dt_list.append([current, day_of_week, day_of_month, month_of_year])
            
            # Get start time to split into temp_df
            start_time = current - timedelta(days=day)
            
            if start_time not in new_df.index.values:
                
                continue
                
            temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
            
            if day in [30, 90]:
                volatility = temp_df[f'{currency}-1DayPriceChange'].std()
                volatility_list.append([current, volatility])    
            
            
            min_id = temp_df['Price'].idxmin()
            max_id = temp_df['Price'].idxmax()
            
            if day in [30, 90, 365]:
                min_delta = current - min_id
                days_from_min = min_delta.days/day
                
                max_delta = current - max_id
                days_from_max = max_delta.days/day

            min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        
            max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]
            
            if day in [30, 90, 365]:
                min_price_list.append([current, min_price, min_id, days_from_min])
                max_price_list.append([current, max_price, max_id, days_from_max])
            else:
                min_price_list.append([current, min_price, min_id])
                max_price_list.append([current, max_price, max_id])
                
            if day in [30, 365]:
                volume = temp_df['Volume'].mean()
                volume_list.append([current, volume])
            
        
        # Add Prices to main dataframe
        if day in [30, 90, 365]:
            min_prices = pd.DataFrame(min_price_list, columns = ['date', f'{day}Day_Min_Price', f'{day}Day_Min_Date', f'{currency}-DaysFrom-{day}DayMin']).set_index('date')  
            max_prices = pd.DataFrame(max_price_list, columns = ['date', f'{day}Day_Max_Price', f'{day}Day_Max_Date', f'{currency}-DaysFrom-{day}DayMax']).set_index('date')
        
        else:
            min_prices = pd.DataFrame(min_price_list, columns = ['date', f'{day}Day_Min_Price', f'{day}Day_Min_Date']).set_index('date')  
            max_prices = pd.DataFrame(max_price_list, columns = ['date', f'{day}Day_Max_Price', f'{day}Day_Max_Date']).set_index('date')
        
        prices_df = pd.merge(min_prices, max_prices, on='date')
        
        new_df = new_df.merge(prices_df, on='date', how='left')
        
        new_df[f'{currency}-{day}Day-H/L-Ratio'] = [(y-z)/(x-z) for x, y, z in list(zip(new_df[f'{day}Day_Max_Price'].values, new_df['Price'].values, new_df[f'{day}Day_Min_Price'].values))]
        
        # Add Volatility Measures to main df
        if day in [30, 90]:
            
            volatility_df = pd.DataFrame(volatility_list, columns = ['date', f'{currency}-{day}Day-Volatility']).set_index('date')  
            
            new_df = new_df.merge(volatility_df, on='date', how='left')
            
            
        # Add Volume Measures to df   
        if day in [30, 365]:
            
            volume_df = pd.DataFrame(volume_list, columns = ['date', f'{currency}-{day}Day-AvgVolume']).set_index('date')  
            
            new_df = new_df.merge(volume_df, on='date', how='left')
            
            new_df[f'{currency}-{day}Day-AvgVolumeRatio'] = [x/y if y != 0 else 0 for x, y in list(zip(new_df[f'{currency}-{day}Day-AvgVolume'], new_df[f'{currency}-Supply']))]
            
            
            
        # Add day counts to df    
        if day == 3:
            dt_df = pd.DataFrame(dt_list, columns = ['date', 'Day of Week', 'Day of Month', 'Month of Year']).set_index('date')
            
            new_df = new_df.merge(dt_df, on='date', how='left')
            
        new_df.drop_duplicates(inplace=True)
        
    new_df.dropna(inplace=True)
    
    # Only keep necessary columns
    final_df = new_df[['Price', f'{currency}-1DayPriceChange', f'{currency}-1DayVolumeChange', 'Day of Week', 'Day of Month', 'Month of Year',
                      f'{currency}-3Day-H/L-Ratio', f'{currency}-7Day-H/L-Ratio', f'{currency}-30Day-H/L-Ratio', f'{currency}-90Day-H/L-Ratio', f'{currency}-365Day-H/L-Ratio',
                      f'{currency}-30Day-AvgVolumeRatio', f'{currency}-365Day-AvgVolumeRatio', f'{currency}-30Day-Volatility', f'{currency}-90Day-Volatility',
                      f'{currency}-DaysFrom-30DayMax', f'{currency}-DaysFrom-90DayMax', f'{currency}-DaysFrom-365DayMax',
                      f'{currency}-DaysFrom-30DayMin', f'{currency}-DaysFrom-90DayMin', f'{currency}-DaysFrom-365DayMin']]
        
    print('Process Complete!')
    
    return final_df

In [4]:
def normalize_data(df):
    pass
    
def find_targets(df):
    pass

In [5]:
# Retrieve Data from dB

# Store environmental variable
from getpass import getpass
db_password = 'Lincoln100!'

In [6]:
# Create engine link to db
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/CryptoAnalysisdb"
engine = create_engine(db_string)

# Import data into dataframe
df = pd.read_sql('SELECT * FROM historical_market_data;', con=engine).set_index('date')
df.tail()

Unnamed: 0_level_0,prices,market_caps,total_volumes,coin_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-10-30,0.101578,1287825000.0,97266400.0,zilliqa
2021-10-31,0.112505,1429685000.0,599784500.0,zilliqa
2021-11-01,0.112415,1423508000.0,171194000.0,zilliqa
2021-11-02,0.114672,1450237000.0,138253000.0,zilliqa
2021-11-04,0.107805,1371410000.0,115412900.0,zilliqa


In [7]:
# Retrieve List of Currencies
currency_list = df['coin_id'].unique()
print(currency_list)

['0x' '1inch' '3x-long-bitcoin-token' '3x-long-ethereum-token' 'aave'
 'aavegotchi' 'adax' 'adventure-gold' 'aelf' 'algorand' 'alibabacoin'
 'alien-worlds' 'alpaca-finance' 'alpha-finance'
 'ampleforth-governance-token' 'ankr' 'amp-token' 'aragon' 'ardor'
 'arpa-chain' 'arweave' 'asd' 'audius' 'augur' 'avalanche-2'
 'axie-infinity' 'bakerytoken' 'balancer' 'bancor' 'band-protocol'
 'barnbridge' 'basic' 'basic-attention-token' 'binancecoin' 'binance-usd'
 'bitcoin' 'bitcoin-cash' 'bitcoin-cash-sv' 'bitcoin-diamond' 'bitdao'
 'bito-coin' 'bitshares' 'bittorrent-2' 'blockstack' 'bluzelle' 'brz'
 'cardano' 'cartesi' 'celer-network' 'celo' 'chainlink' 'chia' 'chiliz'
 'chromaway' 'civic' 'clover-finance' 'coin98' 'coinmetro'
 'compound-governance-token' 'cosmos' 'coti' 'crypto-com-chain' 'dai'
 'curve-dao-token' 'dash' 'decentraland' 'decred' 'defipulse-index'
 'dego-finance' 'dent' 'dia-data' 'digibyte' 'dirham' 'dock' 'dodo'
 'dogecoin' 'dora-factory' 'dvision-network' 'dydx' 'ecash' 'ede

In [169]:
# Select a currency
currency_to_predict = 'vechain'

In [170]:
# Gather Currency Names
currency_names = pd.read_sql(f'SELECT name, coin_id FROM coins;', con=engine).set_index('coin_id')

In [171]:
# Find name of currency
currency = currency_names.loc[currency_to_predict,:].values[0]
currency

'VeChain'

In [172]:
currency_df = df[df['coin_id'] == currency_to_predict]

ml_df = add_features(currency_df)

Going through 3 day range features...

Going through 7 day range features...

Going through 30 day range features...

Going through 90 day range features...

Going through 365 day range features...

Process Complete!


In [173]:
ml_df

Unnamed: 0_level_0,Price,VeChain-1DayPriceChange,VeChain-1DayVolumeChange,Day of Week,Day of Month,Month of Year,VeChain-3Day-H/L-Ratio,VeChain-7Day-H/L-Ratio,VeChain-30Day-H/L-Ratio,VeChain-90Day-H/L-Ratio,...,VeChain-30Day-AvgVolumeRatio,VeChain-365Day-AvgVolumeRatio,VeChain-30Day-Volatility,VeChain-90Day-Volatility,VeChain-DaysFrom-30DayMax,VeChain-DaysFrom-90DayMax,VeChain-DaysFrom-365DayMax,VeChain-DaysFrom-30DayMin,VeChain-DaysFrom-90DayMin,VeChain-DaysFrom-365DayMin
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-07-27,0.005677,-0.044680,-0.024284,6,27,7,0.000000,0.000000,0.115663,0.115663,...,0.000872,0.000378,0.062239,0.059435,0.933333,0.311111,0.994521,0.366667,0.122222,0.613699
2019-07-28,0.005652,-0.004313,-0.028960,7,28,7,0.000000,0.000000,0.109557,0.109557,...,0.000818,0.000376,0.062087,0.059419,0.966667,0.322222,0.997260,0.400000,0.133333,0.616438
2019-07-29,0.005600,-0.009280,-0.051512,1,29,7,0.000000,0.000000,0.096475,0.096475,...,0.000790,0.000378,0.060624,0.059280,1.000000,0.333333,1.000000,0.433333,0.144444,0.619178
2019-07-30,0.005519,-0.014393,-0.128730,2,30,7,0.000000,0.000000,0.103508,0.076374,...,0.000754,0.000377,0.056066,0.058888,1.000000,0.344444,1.000000,0.466667,0.155556,0.621918
2019-07-31,0.005595,0.013639,0.143668,3,31,7,0.565768,0.177947,0.131648,0.095148,...,0.000721,0.000378,0.053158,0.058680,1.000000,0.355556,1.000000,0.500000,0.166667,0.624658
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-30,0.130259,-0.049712,-0.129561,6,30,10,0.435893,0.303346,0.743904,0.682723,...,0.008373,0.013771,0.052032,0.057614,0.166667,0.611111,0.528767,1.000000,0.355556,0.989041
2021-10-31,0.133021,0.021201,0.120820,7,31,10,0.405267,0.462442,0.736744,0.722015,...,0.008369,0.013768,0.047948,0.057407,0.200000,0.622222,0.531507,0.633333,0.366667,0.991781
2021-11-01,0.137127,0.030873,0.070722,1,1,11,1.000000,0.699037,0.852611,0.780448,...,0.008269,0.013868,0.041645,0.057464,0.233333,0.633333,0.534247,0.666667,0.377778,0.994521
2021-11-02,0.140461,0.024311,0.062235,2,2,11,1.000000,0.965695,0.946667,0.827882,...,0.008292,0.013889,0.041581,0.057464,0.266667,0.644444,0.536986,0.700000,0.388889,0.997260
