In [25]:
import pandas as pd
from utils.matrix_builder import create_feature_matrix

# read data
csv_hour_file = '../data/ta_metrics/final_price_ta_metrics.csv'
simple_df = pd.read_csv(csv_hour_file, parse_dates=['Datetime'])
simple_df = simple_df[['Datetime', 'MarginalES']]

# Sliding window of 6 days
train_start_date = '2019-01-01'
train_end_date = '2019-01-07'

simple_subset = simple_df[(simple_df['Datetime'] >= train_start_date) & (simple_df['Datetime'] <= train_end_date)]

# Number of previous days as columns (features)
lag_price_window = 3

X_simple, y_simple = create_feature_matrix(simple_subset, lag_price_window)

In [24]:
print(X_simple)
print(y_simple)

       0      1      2
0  65.88  63.16  66.70
1  63.16  66.70  69.17
2  66.70  69.17  64.00
0    69.17
1    64.00
2    64.86
dtype: float64


In [26]:
import pandas as pd
from utils.matrix_builder import create_feature_matrix, create_expanded_feature_matrix

# read data
csv_hour_file = '../data/ta_metrics/final_price_ta_metrics.csv'

features_df = pd.read_csv(csv_hour_file, parse_dates=['Datetime'])
simple_df = features_df[['Datetime', 'MarginalES']]

# Sliding window of 9 days for training
train_start_date = '2019-01-01'
train_end_date = '2019-01-07'

simple_subset = simple_df[(simple_df['Datetime'] >= train_start_date) & (simple_df['Datetime'] <= train_end_date)]
feature_subset = features_df[(features_df['Datetime'] >= train_start_date) & (features_df['Datetime'] <= train_end_date)]

lag_price_window = 3

# Print simple subset
print(simple_subset)

               Datetime  MarginalES
187 2019-01-01 14:00:00       65.88
188 2019-01-02 14:00:00       63.16
189 2019-01-03 14:00:00       66.70
190 2019-01-04 14:00:00       69.17
191 2019-01-05 14:00:00       64.00
192 2019-01-06 14:00:00       64.86


In [10]:
# Add this to test the function
def debug_create_feature_matrix(data, lag_price_window):
    if 'MarginalES' in data.columns:
        prices = data['MarginalES'].values
    else:
        prices = data.iloc[:, 1].values
    
    print(f"Prices array: {prices}")
    print(f"Length of prices: {len(prices)}")
    print(f"Window size: {lag_price_window}")
    print(f"Expected samples: {len(prices) - lag_price_window}")
    print(f"Range: {list(range(len(prices) - lag_price_window))}")
    
    return create_feature_matrix(data, lag_price_window)

X_debug, y_debug = debug_create_feature_matrix(simple_subset, lag_price_window)

Prices array: [65.88 63.16 66.7  69.17 64.   64.86 68.74 67.97 64.25]
Length of prices: 9
Window size: 3
Expected samples: 6
Range: [0, 1, 2, 3, 4, 5]


In [27]:
X_simple, y_simple = create_feature_matrix(simple_subset, lag_price_window)

# print the shape of the matrix
print("Shapes:", X_simple.shape, y_simple.shape)

# print the actual matrix
print("X_simple:")
print(X_simple)
print("y_simple:")
print(y_simple)
print("Length of y_simple:", len(y_simple))

print("\nLength of simple_subset:", len(simple_subset))
print("simple_subset:")
print(simple_subset)

Shapes: (3, 3) (3,)
X_simple:
       0      1      2
0  65.88  63.16  66.70
1  63.16  66.70  69.17
2  66.70  69.17  64.00
y_simple:
0    69.17
1    64.00
2    64.86
dtype: float64
Length of y_simple: 3

Length of simple_subset: 6
simple_subset:
               Datetime  MarginalES
187 2019-01-01 14:00:00       65.88
188 2019-01-02 14:00:00       63.16
189 2019-01-03 14:00:00       66.70
190 2019-01-04 14:00:00       69.17
191 2019-01-05 14:00:00       64.00
192 2019-01-06 14:00:00       64.86


In [28]:
X_simple, y_simple = create_feature_matrix(simple_subset, lag_price_window)

# print the shape of the matrix
print(X_simple.shape, y_simple.shape)

# print the actual matrix
print(X_simple)
print(y_simple)

(3, 3) (3,)
       0      1      2
0  65.88  63.16  66.70
1  63.16  66.70  69.17
2  66.70  69.17  64.00
0    69.17
1    64.00
2    64.86
dtype: float64


In [None]:
#! Expandir esto para incluir nuevas metricas (mas columnas)
# df_prices = df[['Datetime', 'MarginalES']]

# # Select all relevant columns except 'Datetime'
# feature_columns = df.columns[1:]  # Exclude 'Datetime'
# df = df[['Datetime'] + list(feature_columns)]

# print(df)

In [29]:
X_extended, y_extended = create_expanded_feature_matrix(feature_subset, lag_price_window)

print(X_extended.shape, y_extended.shape)

print(X_extended)
print(y_extended)

(3, 33) (3,)
   price_t-3  price_t-2  price_t-1      SMA_3   SMA_5      SMA_7     SMA_14  \
0      65.88      63.16      66.70  65.246667  64.974  63.842857  64.975000   
1      63.16      66.70      69.17  66.343333  66.026  64.490000  65.401429   
2      66.70      69.17      64.00  66.623333  65.782  65.434286  65.330000   

      SMA_30     SMA_90    SMA_180  ...     RSI_30  BB_Width_7  BB_Width_14  \
0  64.886000  65.044333  67.227333  ...  50.823486   17.871701    15.638302   
1  64.988333  65.038667  67.272889  ...  52.153121   21.198596    16.528083   
2  64.947333  65.184222  67.267611  ...  49.268675   11.634263    16.685668   

      STD_7    STD_14    STD_30    STD_90  month  day_of_week  is_weekend  
0  3.080999  2.636139  2.620006  4.626188    1.0          3.0         0.0  
1  3.691585  2.804414  2.726835  4.620755    1.0          4.0         0.0  
2  2.055690  2.828060  2.732317  4.369895    1.0          5.0         1.0  

[3 rows x 33 columns]
0    69.17
1    64.00
2   