In [112]:
# Import Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from datetime import datetime, timedelta

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

In [41]:
# Constants
future_days_to_predict = 7

In [277]:
# Features Function

def add_features(df):
    # Features to have: PriceChange, VolumeChange, Supply, 3-Day H/L Ratio, 7-Day H/L Ratio, 30-Day H/L Ratio, 90-Day H/L Ratio, 
    # 365-Day H/L Ratio, Volatility, Days from 30-Day High, Days From 30-Day Low, Days From 365-Day High,
    # Days From 365-Day Low, 30-Day Average Volume, 365-Day Average Volume, Target Day of Week, Target Month of Year, 
    # Target Day of Month
    new_df = df.copy().drop(columns = ['prices', 'market_caps', 'total_volumes', 'CoinID'])
    
    new_df['Price'] = df['prices']
    new_df['Volume'] = df['total_volumes']
    new_df[f'{currency}-1DayPriceChange'] = df['prices'].pct_change()
    new_df[f'{currency}-1DayVolumeChange'] = df['total_volumes'].pct_change()
    
    new_df[f'{currency}-MarketCap'] = df['market_caps'].values
    
    new_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    new_df.dropna(inplace=True)
    
    new_df.index = pd.to_datetime(new_df.index)
    
    
    # Find 3 Day Values
    
    min_3_price_list = []
    max_3_price_list = []
    
    for current, row in new_df.iterrows():

        start_time = current - timedelta(days=3)
        if start_time not in new_df.index.values:
            continue
        

        temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
        
        min_id = temp_df['Price'].idxmin()
        max_id = temp_df['Price'].idxmax()

        min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        

        max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]

        min_3_price_list.append([current, min_price, min_id])
        max_3_price_list.append([current, max_price, max_id])
        
    # Find 7 Day Values
    
    min_7_price_list = []
    max_7_price_list = []
    
    for current, row in new_df.iterrows():

        start_time = current - timedelta(days=7)
        
        if start_time not in new_df.index.values:
            continue
            

        temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
        
        min_id = temp_df['Price'].idxmin()
        max_id = temp_df['Price'].idxmax()

        min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        

        max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]

        min_7_price_list.append([current, min_price, min_id])
        max_7_price_list.append([current, max_price, max_id])
    
    # Find 30 Day Values
    
    min_30_price_list = []
    max_30_price_list = []
    
    for current, row in new_df.iterrows():

        start_time = current - timedelta(days=30)
        if start_time not in new_df.index.values:
            continue
            
        

        temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
        
        min_id = temp_df['Price'].idxmin()
        max_id = temp_df['Price'].idxmax()

        min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        

        max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]

        min_30_price_list.append([current, min_price, min_id])
        max_30_price_list.append([current, max_price, max_id])
    
    # Find 90 Day Values
    
    min_90_price_list = []
    max_90_price_list = []
    
    for current, row in new_df.iterrows():

        start_time = current - timedelta(days=90)
        
        if start_time not in new_df.index.values:
            continue
            
        

        temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
        
        min_id = temp_df['Price'].idxmin()
        max_id = temp_df['Price'].idxmax()

        min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        

        max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]

        min_90_price_list.append([current, min_price, min_id])
        max_90_price_list.append([current, max_price, max_id])
    
    # Find 365 Day Values
    
    min_365_price_list = []
    max_365_price_list = []
    
    for current, row in new_df.iterrows():

        start_time = current - timedelta(days=365)
        
        if start_time not in new_df.index.values:
            continue

        
        temp_df = new_df[(new_df.index >= start_time) & (new_df.index <= current)]
        
        min_id = temp_df['Price'].idxmin()
        max_id = temp_df['Price'].idxmax()

        min_price = new_df.loc[new_df.index == min_id, 'Price'].values[0]
        

        max_price = new_df.loc[new_df.index == max_id, 'Price'].values[0]

        min_365_price_list.append([current, min_price, min_id])
        max_365_price_list.append([current, max_price, max_id])
    
        
        
    min_3_prices = pd.DataFrame(min_3_price_list, columns = ['date', '3Day_Min_Price', '3Day_Min_Date'])
    max_3_prices = pd.DataFrame(max_3_price_list, columns = ['date', '3Day_Max_Price', '3Day_Max_Date'])
    
    min_7_prices = pd.DataFrame(min_7_price_list, columns = ['date', '7Day_Min_Price', '7Day_Min_Date'])
    max_7_prices = pd.DataFrame(max_7_price_list, columns = ['date', '7Day_Max_Price', '7Day_Miax_Date'])
    
    min_30_prices = pd.DataFrame(min_30_price_list, columns = ['date', '30Day_Min_Price', '30Day_Min_Date'])
    max_30_prices = pd.DataFrame(max_30_price_list, columns = ['date', '30Day_Max_Price', '30Day_Max_Date'])
    
    min_90_prices = pd.DataFrame(min_90_price_list, columns = ['date', '90Day_Min_Price', '90Day_Min_Date'])
    max_90_prices = pd.DataFrame(max_90_price_list, columns = ['date', '90Day_Max_Price', '90Day_Max_Date'])
    
    min_365_prices = pd.DataFrame(min_365_price_list, columns = ['date', '365Day_Min_Price', '365Day_Min_Date'])
    max_365_prices = pd.DataFrame(max_365_price_list, columns = ['date', '365Day_Max_Price', '365Day_Max_Date'])
    
    prices_3_df = min_3_prices.merge(max_3_prices, on='date').set_index('date')
    prices_7_df = min_7_prices.merge(max_7_prices, on='date').set_index('date')
    prices_30_df = min_30_prices.merge(max_30_prices, on='date').set_index('date')
    prices_90_df = min_90_prices.merge(max_90_prices, on='date').set_index('date')
    prices_365_df = min_365_prices.merge(max_365_prices, on='date').set_index('date')
    
    new_df = new_df.merge(prices_3_df, on='date')
    new_df = new_df.merge(prices_7_df, on='date')
    new_df = new_df.merge(prices_30_df, on='date')
    new_df = new_df.merge(prices_90_df, on='date')
    new_df = new_df.merge(prices_365_df, on='date')
    
    new_df.dropna(inplace=True)
    
    return new_df

def normalize_data(df):
    pass
    
def find_targets(df):
    pass


In [3]:
# Retrieve Data from dB

# Store environmental variable
from getpass import getpass
db_password = getpass('Enter database password')

Enter database password········


In [6]:
# Create engine link to db
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/CryptoAnalysisdb"
engine = create_engine(db_string)

# Import data into dataframe
df = pd.read_sql('SELECT * FROM historical_market_data;', con=engine).set_index('date')
df.head()

Unnamed: 0_level_0,prices,market_caps,total_volumes,CoinID
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-10-16,0.220802,110401100.0,2463741.0,0x
2017-10-17,0.219013,109506400.0,1846631.0,0x
2017-10-18,0.224353,112176500.0,1780158.0,0x
2017-10-19,0.211617,105808400.0,1787991.0,0x
2017-10-20,0.204243,102121700.0,831761.1,0x


In [13]:
# Retrieve List of Currencies
currency_list = df['CoinID'].unique()
print(currency_list)

['0x' '1inch' 'aave' 'algorand' 'alien-worlds' 'ankr' 'aragon'
 'arpa-chain' 'arweave' 'audius' 'avalanche-2' 'axie-infinity'
 'badger-dao' 'bakerytoken' 'balancer' 'band-protocol'
 'basic-attention-token' 'binancecoin' 'binance-usd' 'bitcoin'
 'bitcoin-cash' 'bitcoin-cash-sv' 'bittorrent-2' 'cardano' 'celer-network'
 'celo' 'chainlink' 'chiliz' 'chromaway' 'coin98'
 'compound-governance-token' 'cosmos' 'dai' 'crypto-com-chain'
 'curve-dao-token' 'dash' 'decentraland' 'dego-finance' 'dodo' 'dogecoin'
 'dydx' 'ecash' 'elrond-erd-2' 'enjincoin' 'eos' 'ethereum'
 'ethereum-classic' 'fantom' 'filecoin' 'flow' 'ftx-token' 'harmony'
 'havven' 'hedera-hashgraph' 'iotex' 'holotoken' 'huobi-token'
 'internet-computer' 'iostoken' 'just' 'kava' 'kusama' 'kyber-network'
 'litecoin' 'livepeer' 'loopring' 'maker' 'mask-network' 'matic-network'
 'mina-protocol' 'mirror-protocol' 'monero' 'my-neighbor-alice' 'near'
 'nem' 'neo' 'ocean-protocol' 'okb' 'omisego' 'ontology' 'orchid-protocol'
 'origin-pro

In [21]:
# Set Constants
currency_to_predict = 'bitcoin'

In [26]:
# Gather Currency Names
currency_names = pd.read_sql(f'SELECT "Name", "CoinID" FROM coins;', con=engine).set_index('CoinID')

Unnamed: 0_level_0,Name
CoinID,Unnamed: 1_level_1
0x,0x
1inch,1inch
aave,Aave
algorand,Algorand
alien-worlds,Alien Worlds
...,...
yfii-finance,DFI.money
yield-guild-games,Yield Guild Games
zcash,Zcash
zencash,Horizen


In [38]:
# Find name of currency
currency = currency_names.loc[currency_to_predict,:].values[0]
currency

'Bitcoin'

In [278]:
currency_df = df[df['CoinID'] == currency_to_predict]

ml_df = add_features(currency_df)

In [279]:
ml_df

Unnamed: 0_level_0,Price,Volume,Bitcoin-1DayPriceChange,Bitcoin-1DayVolumeChange,Bitcoin-MarketCap,3Day_Min_Price,3Day_Min_Date,3Day_Max_Price,3Day_Max_Date,7Day_Min_Price,...,30Day_Max_Price,30Day_Max_Date,90Day_Min_Price,90Day_Min_Date,90Day_Max_Price,90Day_Max_Date,365Day_Min_Price,365Day_Min_Date,365Day_Max_Price,365Day_Max_Date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-12-27,316.491000,6.705162e+07,0.006113,-0.540311,4.322769e+09,314.568000,2014-12-26,327.716971,2014-12-25,314.568000,...,381.37680,2014-12-01,309.765000,2014-12-17,418.57830,2014-11-11,309.765000,2014-12-17,936.38000,2014-01-05
2014-12-28,312.633000,9.947118e+07,-0.012190,0.483502,4.271645e+09,312.633000,2014-12-28,327.716971,2014-12-25,312.633000,...,381.37680,2014-12-01,309.765000,2014-12-17,418.57830,2014-11-11,309.765000,2014-12-17,936.38000,2014-01-05
2014-12-29,310.079000,7.160976e+07,-0.008169,-0.280095,4.237997e+09,310.079000,2014-12-29,316.491000,2014-12-27,310.079000,...,381.37680,2014-12-01,309.765000,2014-12-17,418.57830,2014-11-11,309.765000,2014-12-17,936.38000,2014-01-05
2014-12-30,317.379000,7.103611e+07,0.023542,-0.008011,4.338944e+09,310.079000,2014-12-29,317.379000,2014-12-30,310.079000,...,381.37680,2014-12-01,309.765000,2014-12-17,418.57830,2014-11-11,309.765000,2014-12-17,936.38000,2014-01-05
2014-12-31,313.992000,4.699936e+07,-0.010672,-0.338374,4.293958e+09,310.079000,2014-12-29,317.379000,2014-12-30,310.079000,...,381.37680,2014-12-01,309.765000,2014-12-17,418.57830,2014-11-11,309.765000,2014-12-17,936.38000,2014-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-30,61837.257003,3.181022e+10,-0.007160,-0.176945,1.166424e+12,58641.001474,2021-10-27,62283.198056,2021-10-29,58641.001474,...,66237.51755,2021-10-20,38368.354012,2021-08-03,66237.51755,2021-10-20,13537.174272,2020-10-30,66237.51755,2021-10-20
2021-10-31,61471.868463,3.249495e+10,-0.005909,0.021526,1.159842e+12,60767.555852,2021-10-28,62283.198056,2021-10-29,58641.001474,...,66237.51755,2021-10-20,38368.354012,2021-08-03,66237.51755,2021-10-20,13558.361796,2020-11-02,66237.51755,2021-10-20
2021-11-01,61121.831201,3.583474e+10,-0.005694,0.102779,1.152831e+12,61121.831201,2021-11-01,62283.198056,2021-10-29,58641.001474,...,66237.51755,2021-10-20,38368.354012,2021-08-03,66237.51755,2021-10-20,13558.361796,2020-11-02,66237.51755,2021-10-20
2021-11-02,63247.208961,3.733507e+10,0.034773,0.041868,1.186952e+12,61121.831201,2021-11-01,63247.208961,2021-11-02,58641.001474,...,66237.51755,2021-10-20,39751.584575,2021-08-04,66237.51755,2021-10-20,13558.361796,2020-11-02,66237.51755,2021-10-20
