In [53]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [75]:
import pandas as pd

df = pd.read_csv("/content/gdrive/My Drive/crypto_data/LTC-USD.csv", names=["time", "low", "high", "open", "close", "volume"])

print(df.head())

         time        low       high       open      close      volume
0  1528968660  96.580002  96.589996  96.589996  96.580002    9.647200
1  1528968720  96.449997  96.669998  96.589996  96.660004  314.387024
2  1528968780  96.470001  96.570000  96.570000  96.570000   77.129799
3  1528968840  96.449997  96.570000  96.570000  96.500000    7.216067
4  1528968900  96.279999  96.540001  96.500000  96.389999  524.539978


In [81]:
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np

SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"

def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

def preprocess_df(df):
    if 'future' in df.columns:
        df = df.drop(columns=['future'])
    for col in df.columns:
        if col != "target":
            df[col] = df[col].pct_change()
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace=True)

    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:
      prev_days.append([n for n in i[:-1]])
      if len(prev_days) == SEQ_LEN:
        sequential_data.append([np.array(prev_days), i[-1]])
    random.shuffle(sequential_data)


In [77]:
main_df = pd.DataFrame()

In [78]:
ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
for ratio in ratios:
  dataset = f"/content/gdrive/My Drive/crypto_data/{ratio}.csv"
  df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])
  print(df.head())



         time          low         high         open        close    volume
0  1528968660  6489.549805  6489.560059  6489.560059  6489.549805  0.587100
1  1528968720  6487.370117  6489.560059  6489.549805  6487.379883  7.706374
2  1528968780  6479.410156  6487.370117  6487.370117  6479.410156  3.088252
3  1528968840  6479.410156  6479.419922  6479.419922  6479.410156  1.404100
4  1528968900  6475.930176  6479.979980  6479.410156  6479.979980  0.753000
         time        low       high       open      close      volume
0  1528968660  96.580002  96.589996  96.589996  96.580002    9.647200
1  1528968720  96.449997  96.669998  96.589996  96.660004  314.387024
2  1528968780  96.470001  96.570000  96.570000  96.570000   77.129799
3  1528968840  96.449997  96.570000  96.570000  96.500000    7.216067
4  1528968900  96.279999  96.540001  96.500000  96.389999  524.539978
         time        low   high        open      close     volume
0  1528968720  485.98999  486.5  486.019989  486.01001  26

In [None]:
  df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True)
  df.set_index("time", inplace=True)
  df = df[[f"{ratio}_close", f"{ratio}_volume"]]

  if len(main_df) == 0:
    main_df = df
  else:
    main_df = main_df.join(df, how='outer', lsuffix='_left', rsuffix='_right')

main_df["future"] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)

main_df["target"] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"], main_df["future"]))

#print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(10))

In [71]:
times = sorted(main_df.index.values)
last_5pct = times[-int(0.05*len(times))]
print(last_5pct)


1534571100


In [82]:
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

preprocess_df(main_df)
#train_x, train_y = preprocess_df(main_df)
#validation_x, validation_y = preprocess_df(validation_main_df)