In [2]:
import pickle
import pandas as pd
from vmdpy import VMD

df = pd.read_csv("../data/Final_Btc_Data_with_Indicators.csv")

In [2]:
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,EPU,GPRD,Gold,MA50,EMA50,OBV,MACD,MACD Signal,PSAR
0,2014-04-01,463.5,444.7,487.4,438.0,3970.0,0.0423,111.578388,120.928154,1279.599976,463.5,463.5,3970.0,0.0,0.0,463.5
1,2014-04-02,424.4,463.5,480.3,409.3,4380.0,-0.0843,111.578388,94.748909,1290.5,443.95,461.966667,-410.0,-3.119088,-0.623818,424.4
2,2014-04-03,436.3,424.4,448.7,386.7,4020.0,0.0281,111.578388,63.297474,1284.400024,441.4,460.960131,3610.0,-4.577986,-1.414651,487.4
3,2014-04-04,444.4,436.3,456.5,415.5,2570.0,0.0185,111.578388,59.551781,1303.199951,442.15,460.310714,6180.0,-5.022672,-2.136255,485.386
4,2014-04-05,456.6,444.4,461.2,439.4,1160.0,0.0276,111.578388,60.448311,1303.199951,445.04,460.165196,7340.0,-4.340615,-2.577127,483.41228


In [3]:
target_col = "Price"
features = [col for col in df.columns if col != target_col and col != "Date"]
# train_size = int(len(df) * 0.7)

In [4]:
alpha = 2000       # Moderate bandwidth constraint
tau = 0.0          # Noise-tolerance (0 for no strict fidelity)
K = 5              # Number of modes
DC = 0             # No DC mode
init = 1           # Initialize omegas uniformly
tol = 1e-7

In [5]:
vmd_results = {}

for feature in features:
    print(f"Processing {feature}...")

    # Get the feature data
    signal = df[feature].values

    # Apply VMD
    u, u_hat, omega = VMD(signal, alpha, tau, K, DC, init, tol)

    # Store results
    vmd_results[feature] = {
        'modes': u,           # decomposed modes
        'freq_domain': u_hat, # frequency domain representation
        'frequencies': omega   # center frequencies
    }

    # Create DataFrame with decomposed modes for this feature
    mode_df = pd.DataFrame(u.T, columns=[f'{feature}_Mode_{i+1}' for i in range(K)])
    vmd_results[feature]['mode_df'] = mode_df

Processing Open...
Processing High...
Processing Low...
Processing Vol....
Processing Change %...
Processing EPU...
Processing GPRD...
Processing Gold...
Processing MA50...
Processing EMA50...
Processing OBV...
Processing MACD...
Processing MACD Signal...
Processing PSAR...


In [6]:
for feature in features:
    # Get the modes for this feature
    modes = vmd_results[feature]['modes']

    # Add each mode as a new column to the original dataframe
    for i in range(K):
        column_name = f"{feature}_mode_{i+1}"
        df[column_name] = modes[i]

In [7]:
features = [col for col in df.columns if col != target_col and col != "Date"]

In [8]:
features

['Open',
 'High',
 'Low',
 'Vol.',
 'Change %',
 'EPU',
 'GPRD',
 'Gold',
 'MA50',
 'EMA50',
 'OBV',
 'MACD',
 'MACD Signal',
 'PSAR',
 'Open_mode_1',
 'Open_mode_2',
 'Open_mode_3',
 'Open_mode_4',
 'Open_mode_5',
 'High_mode_1',
 'High_mode_2',
 'High_mode_3',
 'High_mode_4',
 'High_mode_5',
 'Low_mode_1',
 'Low_mode_2',
 'Low_mode_3',
 'Low_mode_4',
 'Low_mode_5',
 'Vol._mode_1',
 'Vol._mode_2',
 'Vol._mode_3',
 'Vol._mode_4',
 'Vol._mode_5',
 'Change %_mode_1',
 'Change %_mode_2',
 'Change %_mode_3',
 'Change %_mode_4',
 'Change %_mode_5',
 'EPU_mode_1',
 'EPU_mode_2',
 'EPU_mode_3',
 'EPU_mode_4',
 'EPU_mode_5',
 'GPRD_mode_1',
 'GPRD_mode_2',
 'GPRD_mode_3',
 'GPRD_mode_4',
 'GPRD_mode_5',
 'Gold_mode_1',
 'Gold_mode_2',
 'Gold_mode_3',
 'Gold_mode_4',
 'Gold_mode_5',
 'MA50_mode_1',
 'MA50_mode_2',
 'MA50_mode_3',
 'MA50_mode_4',
 'MA50_mode_5',
 'EMA50_mode_1',
 'EMA50_mode_2',
 'EMA50_mode_3',
 'EMA50_mode_4',
 'EMA50_mode_5',
 'OBV_mode_1',
 'OBV_mode_2',
 'OBV_mode_3',
 'OBV

In [9]:
from sklearn.preprocessing import StandardScaler

target_col = "Price"
features = [col for col in df.columns if col != target_col and col != "Date"]

In [10]:
# Train-test split
train_size = int(len(df) * 0.7)
train_df = df[:train_size]

# Initialize scaler
feature_vmd_scaler = StandardScaler()
target_vmd_scaler = StandardScaler()

In [11]:
feature_vmd_scaler.fit(train_df[features])
target_vmd_scaler.fit(train_df[[target_col]])

0,1,2
,copy,True
,with_mean,True
,with_std,True


In [3]:
def scaled_data_vmd(df):
    target_col = "Price"
    features = [col for col in df.columns if col != target_col and col != "Date"]

    alpha = 2000
    tau = 0.0
    K = 5
    DC = 0
    init = 1
    tol = 1e-7

    vmd_results = {}

    for feature in features:
        signal = df[feature].values

        u, u_hat, omega = VMD(signal, alpha, tau, K, DC, init, tol)

        vmd_results[feature] = {
            'modes': u,
            'freq_domain': u_hat,
            'frequencies': omega
        }

        mode_df = pd.DataFrame(u.T, columns=[f'{feature}_Mode_{i+1}' for i in range(K)])
        vmd_results[feature]['mode_df'] = mode_df

    for feature in features:
        modes = vmd_results[feature]['modes']

        for i in range(K):
            column_name = f"{feature}_mode_{i+1}"
            df[column_name] = modes[i]

    features = [col for col in df.columns if col != target_col and col != "Date"]

    with open("../models/scaler/feature_vmd_scaler.pkl", "rb") as f:
        feature_scaler = pickle.load(f)

    with open("../models/scaler/target_vmd_scaler.pkl", "rb") as f:
        target_scaler = pickle.load(f)

    df[features] = feature_scaler.transform(df[features])
    df[target_col] = target_scaler.transform(df[[target_col]]).flatten()

    return df

In [4]:
scaled_data_vmd(df)

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %,EPU,GPRD,Gold,...,MACD Signal_mode_1,MACD Signal_mode_2,MACD Signal_mode_3,MACD Signal_mode_4,MACD Signal_mode_5,PSAR_mode_1,PSAR_mode_2,PSAR_mode_3,PSAR_mode_4,PSAR_mode_5
0,2014-04-01,-0.619852,-0.623591,-0.616800,-0.626223,-0.771881,1.013036,-1.079547,0.583055,-0.343253,...,-0.276752,-0.154765,0.044300,-0.040699,0.030280,-0.696135,-0.048689,0.040386,0.007594,0.107091
1,2014-04-02,-0.624188,-0.621492,-0.617565,-0.629553,-0.770160,-2.224378,-1.079547,-0.020825,-0.293792,...,-0.276787,-0.154727,0.043887,-0.040304,0.027886,-0.696098,-0.048508,0.039214,0.004382,-0.271356
2,2014-04-03,-0.622868,-0.625858,-0.620967,-0.632176,-0.771671,0.649914,-1.079547,-0.746320,-0.321472,...,-0.276840,-0.154620,0.043119,-0.039441,0.023379,-0.695987,-0.048031,0.037147,-0.000247,0.313429
3,2014-04-04,-0.621970,-0.624529,-0.620127,-0.628834,-0.777755,0.404422,-1.079547,-0.832722,-0.236164,...,-0.276888,-0.154408,0.042073,-0.038012,0.017200,-0.695883,-0.047536,0.033247,-0.009925,-0.237013
4,2014-04-05,-0.620617,-0.623625,-0.619621,-0.626060,-0.783671,0.637128,-1.079547,-0.812042,-0.236164,...,-0.276920,-0.154070,0.040796,-0.035964,0.009812,-0.695846,-0.047228,0.026853,-0.025289,0.086560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3649,2024-03-28,7.175890,7.080750,7.033470,7.324872,-0.484386,0.417208,-0.123370,0.091678,3.912169,...,5.515014,6.309277,-5.242803,-3.743532,-2.682603,5.810635,6.814432,-4.145919,-4.778192,-2.496023
3650,2024-03-29,7.077149,7.227825,6.965047,7.340422,-0.541029,-0.390866,-0.123370,0.704501,3.912169,...,5.527146,6.370162,-5.470872,-4.102451,-2.781103,5.813636,6.850084,-5.056913,0.343194,4.208619
3651,2024-03-30,7.048295,7.127966,6.901976,7.395426,-0.663210,-0.163276,-0.123370,1.520976,3.912169,...,5.536409,6.416392,-5.642382,-4.375028,-2.826326,5.816099,6.877671,-5.767509,5.456568,-4.315179
3652,2024-03-31,7.239089,7.098513,7.014628,7.396784,-0.610427,0.562969,-0.123370,0.560577,3.912169,...,5.542431,6.447088,-5.757679,-4.559300,-2.843818,5.817504,6.895344,-6.257835,9.536974,3.158650
