In [27]:
import requests
import pandas as pd
import os
import numpy as np

def fetch_cryptocompare_ohlcv_data(fsym, tsym, limit=2000, aggregate=1, interval='day'):
    """
    Cryptocompare APIを使用して、過去のOHLCVデータを取得する関数
    """
    base_url = 'https://min-api.cryptocompare.com/data/v2/histo'
    intervals = {'minute': 'minute', 'hour': 'hour', 'day': 'day'}
    
    if interval not in intervals:
        raise ValueError(f"Invalid interval: {interval}")

    url = f"{base_url}{intervals[interval]}"
    params = {
        'fsym': fsym.upper(),
        'tsym': tsym.upper(),
        'limit': limit,
        'aggregate': aggregate
    }
    response = requests.get(url, params=params)
    data = response.json()['Data']['Data']
    
    # データをpandas DataFrameに変換
    df = pd.DataFrame(data)
    df['timestamp'] = pd.to_datetime(df['time'], unit='s')
    df = df[['timestamp', 'open', 'high', 'low', 'close', 'volumeto']]
    df.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
    
    return df

def save_to_csv(df, filename):
    df.to_csv(filename, index=False)

def load_from_csv(filename):
    return pd.read_csv(filename)

filename = 'ohlcv_data.csv'

if os.path.exists(filename):
    # ローカルにファイルが存在する場合、データをローカルから読み込む
    ohlcv_data = load_from_csv(filename)
else:
    # ローカルにファイルが存在しない場合、データを取得し、ローカルに保存する
    fsym = 'BTC'
    tsym = 'JPY'
    ohlcv_data = fetch_cryptocompare_ohlcv_data(fsym, tsym)
    save_to_csv(ohlcv_data, filename)

print(ohlcv_data)

       timestamp        open        high         low       close        volume
0     2017-10-31   679910.34   727288.22   674553.25   723579.09  4.728300e+10
1     2017-11-01   723751.37   766552.45   713575.88   764334.90  5.844858e+10
2     2017-11-02   764247.87   857528.51   761617.11   805165.37  9.242606e+10
3     2017-11-03   805246.62   853429.62   786480.90   823854.18  8.196366e+10
4     2017-11-04   823851.31   845389.80   800200.82   839556.37  6.597304e+10
...          ...         ...         ...         ...         ...           ...
1996  2023-04-19  4069137.27  4076795.31  3872362.29  3893088.33  2.048354e+10
1997  2023-04-20  3893088.33  3918645.83  3765607.66  3786412.95  1.844144e+10
1998  2023-04-21  3786412.95  3802188.72  3644461.28  3654750.87  1.702854e+10
1999  2023-04-22  3654750.87  3754880.86  3641006.51  3747411.86  8.716559e+09
2000  2023-04-23  3747411.86  3747981.50  3700333.12  3709590.27  3.535152e+09

[2001 rows x 6 columns]


In [45]:
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer("positional_encodings", pe.unsqueeze(0))

    def forward(self, x):
        return x + self.positional_encodings[:, :x.size(1), :]


In [53]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


class TransformerModel(nn.Module):
    def __init__(self, input_dim, seq_length, n_heads, n_encoder_layers, n_decoder_layers, d_model, d_ff):
        super(TransformerModel, self).__init__()
        self.d_model = d_model
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoding = PositionalEncoding(d_model, seq_length)
        self.transformer = nn.Transformer(d_model, n_heads, n_encoder_layers, n_decoder_layers, d_ff)
        self.fc = nn.Linear(d_model, input_dim)

    def forward(self, src, tgt):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoding(src)
        tgt = self.embedding(tgt) * math.sqrt(self.d_model)
        tgt = self.pos_encoding(tgt)
        output = self.transformer(src, tgt)
        return self.fc(output)

def load_data(df):
    feature_cols = ["open", "high", "low", "close", "volume"]
    input_data = df[feature_cols].values

    scaler = MinMaxScaler()
    input_data = scaler.fit_transform(input_data)

    return input_data, scaler

def create_dataset(data, seq_length):
    x = []
    y = []
    for i in range(len(data) - seq_length - 1):
        x.append(data[i : i + seq_length, :])
        y.append(data[i + 1 : i + seq_length + 1, :])
    return np.array(x), np.array(y)

def main():
    window_size = 10
    batch_size = 32
    n_epochs = 10

    # データの取得と前処理
    input_data, scaler = load_data(ohlcv_data)
    X, y = create_dataset(input_data, window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    # テンソルに変換
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
    
    train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # モデルの作成
    input_dim = 5
    seq_length = window_size
    n_heads = 2
    n_encoder_layers = 3
    n_decoder_layers = 3
    d_model = 128
    d_ff = 256
    model = TransformerModel(input_dim, seq_length, n_heads, n_encoder_layers, n_decoder_layers, d_model, d_ff)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 学習
    for epoch in range(n_epochs):
        for batch_x, batch_y in train_dataloader:
            optimizer.zero_grad()
        
            # トランスフォーマーモデル用の入力とターゲットシーケンスを準備
            src = batch_x[:, :-1, :]
            tgt = batch_y[:, :-1, :]
        
            # 入力とターゲットのシーケンスをモデルに渡す
            output = model(src, tgt)

            loss = criterion(output, tgt)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch}/{n_epochs}, Loss: {loss:.6f}")

    # 予測
    predictions = []

    for i in range(X_test.shape[0]):
        src = X_test_tensor[i:i + 1, :-1, :]
        tgt = X_test_tensor[i:i + 1, 1:, :]
        prediction = model(src, tgt).detach().numpy()
        predictions.append(prediction)

    predictions = np.concatenate(predictions, axis=0)

    # スケーラを使って価格に戻す
    close_col_index = 3
    predictions_rescaled = scaler.inverse_transform(np.hstack([np.zeros((len(predictions), close_col_index)), predictions.reshape(-1, 1), np.zeros((len(predictions), input_data.shape[1] - close_col_index - 1))]))[:, close_col_index]

    # 予測結果の表示
    for i, pred in enumerate(predictions_rescaled):
        print(f"Predicted close price for day {i + 1}: {pred}, Actual close price: {scaler.inverse_transform(y_test[i].reshape(1, -1))[0][3]}")

if __name__ == "__main__":
    main()

Epoch 0/10, Loss: 0.067143
Epoch 1/10, Loss: 0.015088
Epoch 2/10, Loss: 0.009877
Epoch 3/10, Loss: 0.005192
Epoch 4/10, Loss: 0.003849
Epoch 5/10, Loss: 0.002649
Epoch 6/10, Loss: 0.002885
Epoch 7/10, Loss: 0.002646
Epoch 8/10, Loss: 0.002242
Epoch 9/10, Loss: 0.001800


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 398 and the array at index 1 has size 17910