In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot
from collections import deque
from sklearn import preprocessing
import random

In [2]:
# Create an empty dataframe
main_dataframe = pd.DataFrame()

In [3]:
# List of cryptocurrencies
currencies = ['BCH-USD', 'BTC-USD', 'ETH-USD', 'LTC-USD']

# Load data for each cryptocurrency
for c in currencies:
    dataset = 'data/' + c + '.csv' # File path
    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume']) # Read CSV
    df.rename(columns={'close': c + '_close', 'volume': c + '_volume'}, inplace=True) # Rename columns
    df.set_index('time', inplace=True) # Set time as index
    df = df[[c + '_close', c + '_volume']] # Select relevant columns
    if len(main_dataframe) == 0: # Add data to main dataframe
        main_dataframe = df
    else:
        main_dataframe = main_dataframe.join(df)

In [4]:
# Display the first few rows of the dataframe
main_dataframe.head()

In [5]:
# Check for missing values
main_dataframe.isnull().sum()

In [6]:
# Fill missing values with previous values
main_dataframe.fillna(method='ffill', inplace=True)

In [7]:
# Check for missing values after filling
main_dataframe.isnull().sum()

In [8]:
# Fill missing values with next values
main_dataframe.fillna(method='bfill', inplace=True)

In [9]:
# Check for missing values after filling
main_dataframe.isnull().sum()

In [10]:
# Add a column 'future' containing the close price of LTC 3 days later
main_dataframe['future'] = main_dataframe['LTC-USD_close'].shift(-3)
 # we want predict LTC -- we want know price of 3 days later -- get 3 days later as target  
# check all features and tell me 3 dayes later i buy LTC or sell it 

In [11]:
# Display the first few rows of the dataframe with the new column
main_dataframe.head()

In [12]:
# Define a function to compare current and future prices
# this finction add a column to daata set and check all features and tell me 3 dayes later I buy(1) LTC or sell(0) it 
def compare(current, future):
    if future > current:
        return 1
    else:
        return 0

# Add a column 'target' to indicate whether to buy (1) or sell (0) LTC based on future price
main_dataframe['target'] = list(map(compare, main_dataframe['LTC-USD_close'], main_dataframe['future']))

In [13]:
# Display the first few rows of the dataframe with the 'target' column
main_dataframe.head()

In [14]:
# Get sorted index values of the dataframe
times = sorted(main_dataframe.index.values)

In [15]:
# Calculate the index value corresponding to the last 10% of the data
last_10pct = sorted(main_dataframe.index.values)[-int(0.1 * len(times))]

In [16]:
# Display the index value corresponding to the last 10% of the data
last_10pct

In [17]:
# Separate data into train and test sets
main_dataframe_test = main_dataframe[(main_dataframe.index >= last_10pct)]
main_dataframe_train = main_dataframe[(main_dataframe.index < last_10pct)]

In [18]:
# Display the length of the test set
len(main_dataframe_test)

In [19]:
# Display the length of the train set
len(main_dataframe_train)

In [20]:
# Display the 'BTC-USD_close' column
main_dataframe['BTC-USD_close']

In [21]:
# Calculate percentage change of 'BTC-USD_close'
main_dataframe['BTC-USD_close'].pct_change()

In [22]:
# Define a function to preprocess the main dataframe
def preprocess_main_dataframe(df):
    df = df.drop('future', axis=1)
    for col in df.columns:
        if col != 'target': 
            df[col] = df[col].pct_change() 
            df.dropna(inplace=True) # delete nulls or nan
            df[col] = preprocessing.scale(df[col].values) # scale between 0 and 1
    df.dropna(inplace=True) 
    sequences = []
    prev_days = deque(maxlen=30) # deque(maxlen=30) --> 30 satr ra bebinad va begoyad roz ya satr 31 buy konim ya sell
    for i in df.values:
        prev_days.append([n for n in i[:-1]]) # add all column exept target(last column)
        if len(prev_days) == 30:
            sequences.append([np.array(prev_days), i[-1]]) 
    random.shuffle(sequences)
    buys = []
    sells = []
    for seq, target in sequences:
        if target == 0:
            sells.append([seq, target])
        elif target == 1:
            buys.append([seq, target])
    random.shuffle(buys)
    random.shuffle(sells)
    lower = min(len(buys), len(sells)) # for balance data
    buys = buys[:lower]
    sells = sells[:lower]
    sequential_data = buys + sells
    random.shuffle(sequential_data)
    X = []
    y = []
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X), np.array(y)

In [23]:
# Preprocess the main dataframe for training
train_X, train_y = preprocess_main_dataframe(main_dataframe_train)

In [24]:
# Display the shape of the training data
train_X.shape

In [25]:
# Display the shape of the training labels
train_y.shape

In [26]:
# Display the unique values and their counts in the training labels
np.unique(train_y, return_counts=True)

In [27]:
# Preprocess the main dataframe for testing
test_X, test_y = preprocess_main_dataframe(main_dataframe_test)

In [28]:
# Display the shape of the testing data
test_X.shape

In [29]:
# Import necessary libraries for building the model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization

In [30]:
# Define the model architecture
model = Sequential()
model.add(LSTM(64, input_shape=(train_X.shape[1:])))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [31]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [32]:
# Train the model
history = model.fit(train_X, train_y, batch_size=100, epochs=100, validation_data=(test_X, test_y))