In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from datetime import datetime


batch_size = 32
window_size = int(256)  # must be a multiple of batch_size
validation_size = 8192 * batch_size  # must be a multiple of batch_size
test_size = 8192 * batch_size  # must be a multiple of batch_size
ma_periods = 14  # Simple Moving Average periods length
ticker = 'btcusd'  # Your data file name without extention
start_date = '2014-09-17'  
seed = 42  # An arbitrary value to make sure your seed is the same
full_time_series_path = "/kaggle/input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2020-09-14.csv"
train_time_series_path = f'/kaggle/working/{ticker}-train.csv'
validate_time_series_path = f'/kaggle/working/{ticker}-validate.csv'
test_time_series_path = f'/kaggle/working/{ticker}-test.csv'

def dateparse(timeInSeconds):
    return datetime.fromtimestamp(float(timeInSeconds))

os.makedirs("/kaggle/working", exist_ok=True)
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_csv = pd.read_csv(full_time_series_path, usecols=['Timestamp','High','Low'], 
    index_col=['Timestamp'], parse_dates=['Timestamp'], date_parser=dateparse)
df = df_csv[df_csv.index >= pd.to_datetime(start_date)]
df

In [None]:
df["HLAvg"] = df['High'].add(df['Low']).div(2)
del df['High']
del df['Low']
# Simple Moving Average
df['MA'] = df['HLAvg'].rolling(window=ma_periods).mean()
# Log Returns
df['Returns'] = np.log(df['MA']/df['MA'].shift(1))
df.head(15)

In [None]:
df.dropna(how='any', inplace=True)
df = df[df.shape[0] % batch_size:]
df

In [None]:
df_train = df[:- validation_size - test_size]
df_validation = df[- validation_size - test_size - window_size:- test_size]
df_test = df[- test_size - window_size:]
print(f'df_train.shape {df_train.shape}, df_validation.shape {df_validation.shape}, df_test.shape {df_test.shape}')

In [None]:
df_train.to_csv(train_time_series_path)
df_validation.to_csv(validate_time_series_path)
df_test.to_csv(test_time_series_path)