# 1. 学習、検証、テストデータの概要

In [1]:
import numpy as np
import pandas as pd

train_data = pd.read_csv('train_min_BTCJPY.csv', header=None, names=['time', 'open', 'high', 'low', 'close', 'amount'])
train_data['time'] = pd.to_datetime(train_data['time'])

print(train_data.info())  # 学習データとなる30日間の分足。'NaN'は取引がなかった時と、取引所のメンテナンス中に発生。
print(train_data.head())
print(train_data.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43200 entries, 0 to 43199
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   time    43200 non-null  datetime64[ns]
 1   open    41643 non-null  float64       
 2   high    41643 non-null  float64       
 3   low     41643 non-null  float64       
 4   close   41643 non-null  float64       
 5   amount  41643 non-null  float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 2.0 MB
None
                 time      open      high       low     close     amount
0 2017-07-04 17:01:00  296016.0  296558.0  296016.0  296540.0   1.158600
1 2017-07-04 17:02:00  296539.0  296769.0  296060.0  296679.0  11.115507
2 2017-07-04 17:03:00  296060.0  296090.0  296060.0  296060.0   5.527494
3 2017-07-04 17:04:00  296060.0  296260.0  296015.0  296015.0   8.414064
4 2017-07-04 17:05:00  296361.0  296540.0  296155.0  296155.0   3.993010
                     time      open   

In [2]:
val_data = pd.read_csv('val_min_BTCJPY.csv', header=None, names=['time', 'open', 'high', 'low', 'close', 'amount'])
val_data['time'] = pd.to_datetime(val_data['time'])

print(val_data.info())  # 検証データとなる1週間の分足
print(val_data.head())
print(val_data.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10080 entries, 0 to 10079
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   time    10080 non-null  datetime64[ns]
 1   open    9809 non-null   float64       
 2   high    9809 non-null   float64       
 3   low     9809 non-null   float64       
 4   close   9809 non-null   float64       
 5   amount  9809 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 472.6 KB
None
                 time      open      high       low     close    amount
0 2017-08-03 17:01:00  302589.0  302851.0  302589.0  302760.0  9.631020
1 2017-08-03 17:02:00  302780.0  302852.0  302323.0  302852.0  0.259996
2 2017-08-03 17:03:00  302852.0  302852.0  302852.0  302852.0  0.023996
3 2017-08-03 17:04:00  302501.0  302852.0  302500.0  302852.0  6.577205
4 2017-08-03 17:05:00  302865.0  302865.0  302500.0  302500.0  2.017794
                     time      open      h

In [3]:
test_data = pd.read_csv('test_min_BTCJPY.csv', header=None, names=['time', 'open', 'high', 'low', 'close', 'amount'])
test_data['time'] = pd.to_datetime(test_data['time'])

print(test_data.info())  # テストデータとなる直近の暴落した約12日間の分足
print(test_data.head())
print(test_data.tail())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16561 entries, 0 to 16560
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   time    16561 non-null  datetime64[ns]
 1   open    16264 non-null  float64       
 2   high    16264 non-null  float64       
 3   low     16264 non-null  float64       
 4   close   16264 non-null  float64       
 5   amount  16264 non-null  float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 776.4 KB
None
                 time       open       high        low      close     amount
0 2021-05-12 21:00:00  6151000.0  6151589.0  6144740.0  6146269.0  14.478422
1 2021-05-12 21:01:00  6146269.0  6146996.0  6140762.0  6143139.0   2.861733
2 2021-05-12 21:02:00  6142300.0  6147069.0  6142300.0  6145280.0   1.609866
3 2021-05-12 21:03:00  6147072.0  6147075.0  6137814.0  6142453.0   1.097219
4 2021-05-12 21:04:00  6139713.0  6139713.0  6134116.0  6136047.0   3.400958
            

# 2. 価格変化率への変換

In [4]:
# 12時間前の価格と現在の価格の変化率
open_val = val_data.loc[:, 'open'].dropna().reset_index(drop=True)
open_val_f = open_val.pct_change(720).dropna().reset_index(drop=True)

open_val_f.head()

0    0.013031
1    0.012716
2    0.012541
3    0.013716
4    0.012497
Name: open, dtype: float64

In [5]:
# 意図した計算ができているかどうか確認
print((open_val[720] / open_val[0]) - 1)
print((open_val[721] / open_val[1]) - 1)
print((open_val[722] / open_val[2]) - 1)
print((open_val[723] / open_val[3]) - 1)
print((open_val[724] / open_val[4]) - 1)

0.013030876865979923
0.012715503005482631
0.012540778994360346
0.013715657138323456
0.012497317286579879


# 3. Dataloader化 

In [9]:
import mydlmodules

time_diff_li = [i for i in range(1, 31)]  # 上記の変化率を30分間取得してモデルの入力値にする

pos_neg = open_val_f.mask(open_val_f >= 0, 1).mask(open_val_f < 0, 0).rename('pos_neg').astype(int)
series_f = pd.concat([open_val_f, pos_neg], axis=1)
train_loader = mydlmodules.tde_generator_class(series_f['open'], series_f['pos_neg'], time_diff_li, 50, False)
batch = next(iter(train_loader))
x, t = batch
print(x.shape, t.shape)

torch.Size([50, 30, 1]) torch.Size([50])


In [10]:
print(x[0][0])  #入力値の最初の値
print(x[0][29]) #入力値の最後の値
print(t[0])  # 予測値。1分後の価格とその12時間前の価格の変化率が'+'なら'1'、'-'なら'0'

tensor([0.0130])
tensor([0.0140])
tensor(1)


In [8]:
import torch

print(torch.tensor((open_val[720] / open_val[0]) - 1))
print(torch.tensor((open_val[749] / open_val[29]) - 1))
print((open_val[750] / open_val[30]) - 1)

tensor(0.0130, dtype=torch.float64)
tensor(0.0140, dtype=torch.float64)
0.01404628837970856
