In [1]:
!pip install pyts
import numpy as np
import pandas as pd
from pyts.image import GramianAngularField
from matplotlib import image
import csv
from datetime import datetime
import os
import h5py
import matplotlib.pyplot as plt

def normalize(array):
    min_val = np.min(array)
    max_val = np.max(array)
    return (array - min_val) / (max_val - min_val)

def save_gasf(df, bars, start_time):
    # Calculate the shadow and body lengths for BTC and ETH
    df['upper_shadow_BTC'] = df['High_BTC'] - df[['Open_BTC', 'Close_BTC']].max(axis=1)
    df['lower_shadow_BTC'] = df[['Open_BTC', 'Close_BTC']].min(axis=1) - df['Low_BTC']
    df['body_BTC'] = (df['Close_BTC'] - df['Open_BTC']).abs()

    df['upper_shadow_ETH'] = df['High_ETH'] - df[['Open_ETH', 'Close_ETH']].max(axis=1)
    df['lower_shadow_ETH'] = df[['Open_ETH', 'Close_ETH']].min(axis=1) - df['Low_ETH']
    df['body_ETH'] = (df['Close_ETH'] - df['Open_ETH']).abs()

    df['close_spread'] = (df['Close_BTC'] - df['Close_ETH']).abs()
    df['upper_shadow_spread'] = (df['upper_shadow_BTC'] - df['upper_shadow_ETH']).abs()
    df['lower_shadow_spread'] = (df['lower_shadow_BTC'] - df['lower_shadow_ETH']).abs()
    df['body_spread'] = (df['body_BTC'] - df['body_ETH']).abs()
    
    # Update the columns list to include the new columns
    columns = ["close_spread", "upper_shadow_spread", "lower_shadow_spread", "body_spread"]

    price_file = '/kaggle/working/price-btc-eth.csv'

    # Find the index of the given start_time
    start_index = df.index[df["Open_time"] == start_time].tolist()
    if not start_index:
        print("Start time not found in the dataframe")
        return 1
    start_index = start_index[0]
    hdf5index=0
    # Create an HDF5 file to store the concatenated images
    with h5py.File("/kaggle/working/gaf-btc-eth-10bar.hdf5", "w") as hdf5_file:
        for window_start in range(start_index, len(df) - bars + 1):
            window_end = window_start + bars
            window_df = df.iloc[window_start:window_end]
            start_timestamp = window_df["Open_time"].iloc[0]
            end_timestamp = window_df["Open_time"].iloc[-1]
            btc=window_df["Close_BTC"].iloc[-1]
            eth=window_df["Close_ETH"].iloc[-1]
            
            if end_timestamp - start_timestamp != 900000 * 9:
                continue

            gaf_images = []
            
            for col in columns:
                array = np.array(window_df[col].tolist())
                normalized_array = normalize(array)
                data = np.array([normalized_array])
                image_size = len(normalized_array)


                gasf = GramianAngularField(image_size=image_size, method='summation')
                col_gasf = gasf.fit_transform(data)

                gaf_images.append(col_gasf[0])

            stacked_gaf = np.stack(gaf_images, axis=2)

            gaf_save_name = f"{hdf5index}"
            
            hdf5_file.create_dataset(gaf_save_name, data=np.transpose(stacked_gaf, (2, 0, 1)))
            hdf5index = hdf5index + 1

            with open(price_file, mode='a', newline='') as csvfile:
                price_writer = csv.writer(csvfile)
                price_writer.writerow([end_timestamp,btc,eth])

    return 0

df = pd.read_csv('/kaggle/input/data-btc-eth-20230401/data_BTC_ETH_20230401.csv')
save_gasf(df,10, 1654531200000-900000)

Collecting pyts
  Downloading pyts-0.12.0-py3-none-any.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: pyts
Successfully installed pyts-0.12.0
[0m



0