In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load data from the text file
with open('data/IBM.txt', 'r') as file:
    lines = file.readlines()
    data = []
    dates = []

    for line in lines[1:]:
        parts = line.strip().split(',')
        date = parts[0]
        if '1980-12-12' <= date <= '2022-07-22':
            dates.append(date)
            data.append(float(parts[4]))  # 'Close' column

# Create a DataFrame from the loaded data
df = pd.DataFrame({'Date': pd.to_datetime(dates), 'Value': data})

# Set the 'Date' column as the index
df.set_index('Date', inplace=True)

# Extract the values from the DataFrame
signal = df['Value'].values
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(signal.reshape(-1, 1))

# Define the training and testing data
train_size = int(len(df_scaled) * 0.8)
train_data = df_scaled[:train_size, :]
test_data = df_scaled[train_size:, :]

def create_groups(dataset, window_size_1, window_size_2, window_size_3, timeslice, step):
    X_data, y_data = [], []
    index = 0
    while index + (timeslice * window_size_3) < len(dataset):
        i = 0
        t1, t2, t3 = [], [], []
        l1, l2, l3 = [], [], []
        while i < timeslice:
            current_slice = dataset[index + i:index + i + window_size_1, 0]
            if not np.isnan(current_slice).all():
                t1.append(np.mean(current_slice))
                l1.append(dataset[index + i + window_size_1])
                # print("gelecek_1:",index + i + window_size_1)
            i = i + step
        i = 0    
        while i < timeslice:
            current_slice = dataset[index + i:index + i + window_size_2, 0]
            if not np.isnan(current_slice).all():
                t2.append(np.mean(current_slice))
                l2.append(dataset[index + i + window_size_2])
                # print("gelecek_2:",index + i + window_size_2)
            i = i + step
        i = 0    
        while i < timeslice:
            current_slice = dataset[index + i:index + i + window_size_3, 0]
            if not np.isnan(current_slice).all():
                t3.append(np.mean(current_slice))
                l3.append(dataset[index + i + window_size_3])
                # print("gelecek_3:",index + i + window_size_3)
            
            i = i + step
        X_data.append(np.concatenate([t1, t2, t3]))
        y_data.append(np.concatenate([l1, l2, l3]))
        index = index + step

    return np.array(X_data), np.array(y_data)

window_size_1 = 1
window_size_2 = 3
window_size_3 = 9
timeslice = 4
step = 1
# x_train_data, y_train_data = create_groups(train_data, window_size_1, window_size_2, window_size_3, timeslice, step)

# x_test_data, y_test_data = create_groups(test_data, window_size_1, window_size_2, window_size_3, timeslice, step)


In [5]:
mock_data = np.array(list(range(100))).reshape(-1, 1)
x_test_data, y_test_data = create_groups(mock_data, window_size_1, window_size_2, window_size_3, timeslice, step)

In [7]:
mock_data[:20]

array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19]])

In [6]:
x_test_data

array([[ 0.,  1.,  2.,  3.,  1.,  2.,  3.,  4.,  4.,  5.,  6.,  7.],
       [ 1.,  2.,  3.,  4.,  2.,  3.,  4.,  5.,  5.,  6.,  7.,  8.],
       [ 2.,  3.,  4.,  5.,  3.,  4.,  5.,  6.,  6.,  7.,  8.,  9.],
       [ 3.,  4.,  5.,  6.,  4.,  5.,  6.,  7.,  7.,  8.,  9., 10.],
       [ 4.,  5.,  6.,  7.,  5.,  6.,  7.,  8.,  8.,  9., 10., 11.],
       [ 5.,  6.,  7.,  8.,  6.,  7.,  8.,  9.,  9., 10., 11., 12.],
       [ 6.,  7.,  8.,  9.,  7.,  8.,  9., 10., 10., 11., 12., 13.],
       [ 7.,  8.,  9., 10.,  8.,  9., 10., 11., 11., 12., 13., 14.],
       [ 8.,  9., 10., 11.,  9., 10., 11., 12., 12., 13., 14., 15.],
       [ 9., 10., 11., 12., 10., 11., 12., 13., 13., 14., 15., 16.],
       [10., 11., 12., 13., 11., 12., 13., 14., 14., 15., 16., 17.],
       [11., 12., 13., 14., 12., 13., 14., 15., 15., 16., 17., 18.],
       [12., 13., 14., 15., 13., 14., 15., 16., 16., 17., 18., 19.],
       [13., 14., 15., 16., 14., 15., 16., 17., 17., 18., 19., 20.],
       [14., 15., 16., 17., 15., 1