<h2 style="Color:Yellow">Percentage change</h2>

In [2]:
import numpy as np
# Load your data
data= np.load("Train_Data/option_train_labeled.npz", allow_pickle=True)
X_all = data['X']
Y_all = data['Y']

cuda

In [13]:
import torch

def engineer_features_torch(X_all_np, device='cuda'):
    X = torch.tensor(X_all_np, dtype=torch.float32, device=device)  # (N, 15, 10)
    eps = 1e-6

    # Extract columns
    N_open = X[:, :, 0]
    N_close = X[:, :, 1]
    opt_type = X[:, :, 4]
    open_ = X[:, :, 5]
    close = X[:, :, 6]
    oi = X[:, :, 7]
    strike = X[:, :, 8]
    tte = X[:, :, 9]

    return_pct = (close - open_) / (open_ + eps)
    candle_dir = torch.sign(close - open_)
    candle_body_pct = torch.abs(close - open_) / (open_ + eps)

    nifty_return_pct = (N_close - N_open) / (N_open + eps)
    nifty_candle_dir = torch.sign(nifty_return_pct)
    nifty_vs_option_corr = torch.sign(nifty_return_pct * return_pct)

    # OI change
    oi_prev = torch.cat([oi[:, :1], oi[:, :-1]], dim=1)
    oi_change_pct = (oi - oi_prev) / (oi_prev + eps)
    oi_price_corr = torch.sign(oi_change_pct * return_pct)

    strike_distance = torch.abs(N_close - strike) / (N_close + eps)

    # Rolling features (window=5)
    def rolling(x, window=5):
        # shape: (N, 15-window+1)
        unfold = x.unfold(dimension=1, size=window, step=1)  # (N, T-window+1, window)
        mean = unfold.mean(dim=-1)
        std = unfold.std(dim=-1)
        # pad to match original sequence length
        pad_left = window - 1
        mean = torch.cat([mean[:, :1].repeat(1, pad_left), mean], dim=1)
        std = torch.cat([std[:, :1].repeat(1, pad_left), std], dim=1)
        return mean, std

    rolling_mean_ret, rolling_std_ret = rolling(return_pct)
    rolling_mean_oi, _ = rolling(oi_change_pct)

    # Bullish ratio
    bullish_ratio = (candle_dir > 0).float().sum(dim=1, keepdim=True) / 15.0
    bullish_ratio = bullish_ratio.repeat(1, 15)

    # Stack all features: (N, 15, F)
    X_feat = torch.stack([
        return_pct,
        candle_dir,
        candle_body_pct,
        oi_change_pct,
        oi_price_corr,
        nifty_return_pct,
        nifty_candle_dir,
        nifty_vs_option_corr,
        tte,
        strike_distance,
        opt_type,
        rolling_mean_ret,
        rolling_std_ret,
        rolling_mean_oi,
        bullish_ratio
    ], dim=-1)

    return X_feat  # (N, 15, 15)

def create_labels_torch(X_all_np, Y_all_np, device='cuda'):
    X = torch.tensor(X_all_np, dtype=torch.float32, device=device)
    Y = torch.tensor(Y_all_np[:, :3], dtype=torch.float32, device=device)  # use only y1, y2, y3

    last_close = X[:, -1, 6]  # last close from input

    y1, y2, y3 = Y[:, 0], Y[:, 1], Y[:, 2]

    # Strictly increasing
    inc_1 = y1 > last_close
    inc_2 = y2 > y1
    inc_3 = y3 > y2

    label = (inc_1 & inc_2 & inc_3).long()  # 1 = BUY, 0 = NO BUY
    return label



In [10]:
import time
import numpy as np

# Load your data (already in RAM as NumPy)
X_all = X_all.astype(np.float64)

start = time.time()
X_feat_torch = engineer_features_torch(X_all, device='cuda')
torch.cuda.synchronize()
print("✅ Done. Feature tensor shape:", X_feat_torch.shape)
print("⏱️ Time taken (GPU):", round(time.time() - start, 2), "sec")


✅ Done. Feature tensor shape: torch.Size([2383014, 15, 15])
⏱️ Time taken (GPU): 10.68 sec


In [14]:
labels_torch = create_labels_torch(X_all, Y_all, device='cuda')
print("✅ Label tensor shape:", labels_torch.shape)
print("✅ BUY samples:", torch.sum(labels_torch).item())


✅ Label tensor shape: torch.Size([2383014])
✅ BUY samples: 123602


In [15]:
torch.save({
    'X': X_feat_torch,       # shape: (2383014, 15, 15)
    'Y': torch.tensor(Y_all, dtype=torch.float32, device='cuda'),  # shape: (2383014, 5)
    'labels': labels_torch   # shape: (2383014,)
}, 'Train_Data/options_buy_model_data.pt')

print("✅ Saved to options_buy_model_data.pt")

✅ Saved to options_buy_model_data.pt


In [None]:
| Index | Feature Name               | Description                                     |
| ----- | -------------------------- | ----------------------------------------------- |
| 0     | `return_pct`               | (close - open) / open                           |
| 1     | `candle_dir`               | Direction of candle (+1, 0, -1)                 |
| 2     | `candle_body_pct`          | abs(close - open) / open                        |
| 3     | `oi_change_pct`            | % change in open interest from previous bar     |
| 4     | `oi_price_corr`            | Sign of oi\_change × return\_pct                |
| 5     | `nifty_return_pct`         | NIFTY return % over that minute                 |
| 6     | `nifty_candle_dir`         | NIFTY direction sign                            |
| 7     | `nifty_vs_option_corr`     | Sign of (nifty\_return × option\_return)        |
| 8     | `time_to_expiry`           | Time (in days) to expiry                        |
| 9     | `strike_distance`          | abs(Nifty\_close - strike) / Nifty\_close       |
| 10    | `option_type`              | 1 for CE, -1 for PE                             |
| 11    | `rolling_mean_return_5`    | Rolling mean of return\_pct (window=5)          |
| 12    | `rolling_std_return_5`     | Rolling std deviation of return\_pct (window=5) |
| 13    | `rolling_mean_oi_change_5` | Rolling mean of oi\_change\_pct (window=5)      |
| 14    | `bullish_candle_ratio`     | Ratio of positive candles in the 15-step window |


In [3]:
import os
print(os.cpu_count())


16


<h3>5min labelled</h3>

In [None]:
import pandas as pd
import numpy as np
import os
from glob import glob

root_dir = "Data3"
sequence_length = 15
lookahead = 5

features = [
    'Nifty_open', 'Nifty_close', 'Nifty_high', 'Nifty_low','type',
    'open', 'close', 'open_interest',
    'strike', 'time_to_expiry'
]

X_all = []
Y_all = []

# Get all matching CSV files in subfolders
# all_csvs = glob(os.path.join(root_dir, "exp_date_2025-06-26*", "*_NIFTY_nearby_5strikeprices.csv"))
all_csvs = glob(os.path.join(root_dir, "exp_date_*", "*_NIFTY_nearby_5strikeprices.csv"))
for file_path in all_csvs:
    try:
        df= pd.read_csv(file_path)

        # Keep NIFTY OHLC columns and datetime separately
        id_vars = ['datetime', 'open', 'high', 'low', 'close']

        # Melt all other option columns
        value_vars = [col for col in df.columns if col not in id_vars]
        long_df = df.melt(
            id_vars=id_vars,
            value_vars=value_vars,
            var_name='option_meta',  # will hold the original column names
            value_name='value'       # will hold the actual numbers
        )
        # Extract expiry, strike, type, field from option_meta
        option_parts = long_df['option_meta'].str.extract(
            r'NIFTY(?P<expiry>\d{4}-\d{2}-\d{2})\|(?P<strike>\d+)(?P<type>CE|PE)_(?P<field>\w+)'
        )
        # Combine with main DataFrame
        df_clean = pd.concat([long_df, option_parts],axis=1)
        df_clean.rename(columns={
            'open': 'Nifty_open',
            'close': 'Nifty_close',
            'high': 'Nifty_high',
            'low': 'Nifty_low'
        }, inplace=True)
        # Pivot the 'field' values into columns (open, open_interest, etc.)
        df_pivoted = df_clean.pivot_table(
            index=['datetime', 'expiry', 'strike', 'type','Nifty_open','Nifty_close','Nifty_high','Nifty_low'],
            columns='field',
            values='value',
            aggfunc='first'  # in case of duplicates
        ).reset_index()

        df_pivoted['datetime'] = pd.to_datetime(df_pivoted['datetime'])
        df_pivoted['expiry'] = pd.to_datetime(df_pivoted['expiry'])
        df_pivoted['strike'] = df_pivoted['strike'].astype(int)
        # Time to expiry in fractional days (can include hours/minutes)
        df_pivoted['time_to_expiry'] = (df_pivoted['expiry']- pd.Timedelta("-1 days +08:30:01") - df_pivoted['datetime']).dt.total_seconds() / (60 * 60 * 24)
        df_pivoted['type'] = df_pivoted['type'].map({'CE': 1, 'PE': -1})

        sequence_length = 15
        lookahead = 5
        features = [
            'Nifty_open', 'Nifty_close', 'Nifty_high', 'Nifty_low','type',
            'open', 'close', 'open_interest',
            'strike',  'time_to_expiry'
        ]

        # Convert datetime just once
        df_pivoted['datetime'] = pd.to_datetime(df_pivoted['datetime'])

        # Group by unique combinations
        unique_combinations = df_pivoted[['strike', 'type', 'expiry']].drop_duplicates()

        for _, row in unique_combinations.iterrows():
            strike = row['strike']
            option_type = row['type']
            expiry = row['expiry']

            df_filtered = df_pivoted[
                (df_pivoted['strike'] == strike) &
                (df_pivoted['type'] == option_type) &
                (df_pivoted['expiry'] == expiry)
            ].sort_values('datetime').reset_index(drop=True)

            if len(df_filtered) < sequence_length + lookahead:
                continue  # Skip if not enough data
            
            data = df_filtered[features].values
            
            for i in range(len(data) - sequence_length - lookahead):
                x_seq = data[i:i + sequence_length]
                y_seq = data[i + sequence_length : i + sequence_length + lookahead]
                close_col = y_seq[:, 6]  # Index for 'close'

                X_all.append(x_seq)
                Y_all.append(close_col)
    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")
        # Convert to arrays
X_all = np.array(X_all)
Y_all = np.array(Y_all)
print("✅ Done. X shape:", X_all.shape, "Y shape:", Y_all.shape)
    

        

✅ Done. X shape: (188860, 15, 10) Y shape: (188860, 5)


In [3]:
# Step 4: Create DataFrame
combined_df = pd.DataFrame({
    'X_all': [x.tolist() for x in X_all],
    'Y_all': [y.tolist() for y in Y_all]
})

# Step 5: Save to CSV
output_dir = 'Train_Data'
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, 'option_train_data_06_06_2025.csv')

combined_df.to_csv(output_path, index=False)
print(f"✅ CSV saved at: {output_path}")

✅ CSV saved at: Train_Data\option_train_data_06_06_2025.csv


In [4]:
import numpy as np
import os

output_dir = 'Train_Data'
os.makedirs(output_dir, exist_ok=True)

np.savez_compressed(
    os.path.join(output_dir, 'option_train_data_06_06_2025.npz'),
    X_all=np.array(X_all),
    Y_all=np.array(Y_all)
)

print("✅ Saved as compressed .npz for efficient loading.")


✅ Saved as compressed .npz for efficient loading.


<h3>Normalisation of data</h3>

In [1]:
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib

import numpy as np

# === Load Data ===
X_all, Y_all ,labels= np.load("Train_Data/option_train_labeled_3m.npz", allow_pickle=True).values()
feature_columns = [
    'Nifty_open',
    'Nifty_close', 'Nifty_high', 'Nifty_low',
    'type',
    'open', 'close', 'open_interest',
    'strike', 'time_to_expiry'
]

cols_to_normalize = [
    'Nifty_open', 'Nifty_close', 'Nifty_high', 'Nifty_low',
    'open', 'close', 'open_interest',
    'strike', 'time_to_expiry'
]

cols_idx_to_normalize = [feature_columns.index(col) for col in cols_to_normalize]

n_samples, seq_len, n_features = X_all.shape
X_reshaped = X_all.reshape(-1, n_features)

scaler_X = {}

# Standardize only the selected features
for idx in cols_idx_to_normalize:
    feature = feature_columns[idx]
    scaler = StandardScaler()
    X_reshaped[:, idx] = scaler.fit_transform(X_reshaped[:, idx].reshape(-1, 1)).flatten()
    scaler_X[feature] = scaler

# Reshape back to 3D
X_all_scaled = X_reshaped.reshape(n_samples, seq_len, n_features)

# === Standardize Y_all ===
scaler_Y = StandardScaler()
Y_all_scaled = scaler_Y.fit_transform(Y_all)

# Save the scaled data and scalers
np.savez_compressed("Train_Data/option_Xscaled_3m.npz", X_all=X_all_scaled, Y_all=Y_all_scaled, labels=labels)
joblib.dump(scaler_X, "Train_Data/scaler_X.pkl")
joblib.dump(scaler_Y, "Train_Data/scaler_Y.pkl")

print("✅ Standardization done and saved.")


✅ Standardization done and saved.


In [None]:
import pandas as pd
df= pd.read_csv("Data3/exp_date_2025-03-27/2025-03-06_NIFTY_nearby_5strikeprices.csv")
# Keep NIFTY OHLC columns and datetime separately
id_vars = ['datetime', 'open', 'high', 'low', 'close']

# Melt all other option columns
value_vars = [col for col in df.columns if col not in id_vars]
long_df = df.melt(
    id_vars=id_vars,
    value_vars=value_vars,
    var_name='option_meta',  # will hold the original column names
    value_name='value'       # will hold the actual numbers
)
# Extract expiry, strike, type, field from option_meta
option_parts = long_df['option_meta'].str.extract(
    r'NIFTY(?P<expiry>\d{4}-\d{2}-\d{2})\|(?P<strike>\d+)(?P<type>CE|PE)_(?P<field>\w+)'
)
# Combine with main DataFrame
df_clean = pd.concat([long_df, option_parts],axis=1)
df_clean.rename(columns={
    'open': 'Nifty_open',
    'close': 'Nifty_close',
    'high': 'Nifty_high',
    'low': 'Nifty_low'
}, inplace=True)
# Pivot the 'field' values into columns (open, open_interest, etc.)
df_pivoted = df_clean.pivot_table(
    index=['datetime', 'expiry', 'strike', 'type','Nifty_open','Nifty_close','Nifty_high','Nifty_low'],
    columns='field',
    values='value',
    aggfunc='first'  # in case of duplicates
).reset_index()

df_pivoted['datetime'] = pd.to_datetime(df_pivoted['datetime'])
df_pivoted['expiry'] = pd.to_datetime(df_pivoted['expiry'])
df_pivoted['strike'] = df_pivoted['strike'].astype(int)
# Time to expiry in fractional days (can include hours/minutes)
df_pivoted['time_to_expiry'] = (df_pivoted['expiry']- pd.Timedelta("-1 days +08:30:01") - df_pivoted['datetime']).dt.total_seconds() / (60 * 60 * 24)
df_pivoted['type'] = df_pivoted['type'].map({'CE': 1, 'PE': -1})
df_pivoted

In [None]:
import numpy as np
X_all = []
Y_all = []

sequence_length = 15
lookahead = 5
features = [
    'Nifty_open', 'Nifty_close', 'Nifty_high', 'Nifty_low','type',
    'open', 'close', 'open_interest',
    'strike',  'time_to_expiry'
]

# Convert datetime just once
df_pivoted['datetime'] = pd.to_datetime(df_pivoted['datetime'])

# Group by unique combinations
unique_combinations = df_pivoted[['strike', 'type', 'expiry']].drop_duplicates()

for _, row in unique_combinations.iterrows():
    strike = row['strike']
    option_type = row['type']
    expiry = row['expiry']
    
    df_filtered = df_pivoted[
        (df_pivoted['strike'] == strike) &
        (df_pivoted['type'] == option_type) &
        (df_pivoted['expiry'] == expiry)
    ].sort_values('datetime').reset_index(drop=True)
    
    if len(df_filtered) < sequence_length + lookahead:
        continue  # Skip if not enough data

    data = df_filtered[features].values

    for i in range(len(data) - sequence_length - lookahead):
        x_seq = data[i:i + sequence_length]
        y_seq = data[i + sequence_length : i + sequence_length + lookahead]
        close_col = y_seq[:, 6]  # Index for 'close'
        
        X_all.append(x_seq)
        Y_all.append(close_col)

# Convert to arrays
X_all = np.array(X_all)
Y_all = np.array(Y_all)

print("✅ Done. X shape:", X_all.shape, "Y shape:", Y_all.shape)


<h3 style="color:pink">Strong buy , buy, don't buy training data</h3>

In [None]:
import numpy as np

# Load your data
X_all, Y_all = np.load("Train_Data/option_train_data_06_06_2025.npz", allow_pickle=True).values()
print("X_all shape:", X_all.shape)
print("Y_all shape:", Y_all.shape)
print("First X sample:", X_all[0])
print("First Y sample:", Y_all[0])

X_all shape: (2383014, 15, 10)
Y_all shape: (2383014, 5)
First X sample: [[2.24763500e+04 2.24519500e+04 2.24913000e+04 2.24174500e+04
  1.00000000e+00 6.08450000e+02 6.09850000e+02 3.94500000e+04
  2.19500000e+04 2.12604167e+01]
 [2.24523000e+04 2.24520000e+04 2.24658500e+04 2.24496500e+04
  1.00000000e+00 6.70900000e+02 6.72850000e+02 3.14250000e+04
  2.19500000e+04 2.12597222e+01]
 [2.24539000e+04 2.24474500e+04 2.24568500e+04 2.24432500e+04
  1.00000000e+00 6.73950000e+02 6.78800000e+02 3.14250000e+04
  2.19500000e+04 2.12590278e+01]
 [2.24455000e+04 2.24472500e+04 2.24494000e+04 2.24409500e+04
  1.00000000e+00 6.76000000e+02 6.70200000e+02 3.14250000e+04
  2.19500000e+04 2.12583333e+01]
 [2.24455500e+04 2.24327500e+04 2.24519000e+04 2.24318500e+04
  1.00000000e+00 6.71100000e+02 6.69700000e+02 2.77500000e+04
  2.19500000e+04 2.12576389e+01]
 [2.24331000e+04 2.24173500e+04 2.24464500e+04 2.24145000e+04
  1.00000000e+00 6.68200000e+02 6.69750000e+02 2.77500000e+04
  2.19500000e+04 2

In [None]:
import numpy as np

# Load your data
X_all, Y_all ,labels= np.load("Train_Data/option_train_labeled.npz", allow_pickle=True).values()
# Find indices where label is "STRONG BUY"
strong_buy_indices = np.where(labels == "STRONG BUY")[0]

# Print first such index and the corresponding Y_all
print("Indices with STRONG BUY:", strong_buy_indices[:10])  # first 10 indices
print("\nCorresponding Y_all values:")
for idx in strong_buy_indices[:10]:  # show first 5 examples
    print(f"Index {idx} - Y_all: {Y_all[idx]}")


Indices with STRONG BUY: [ 371  464  860  907 1211 1378 1783 1790 1921 2174]

Corresponding Y_all values:
Index 371 - Y_all: [120.   122.35 129.05 135.5  137.25]
Index 464 - Y_all: [ 92.95  93.5   93.75  94.6  103.  ]
Index 860 - Y_all: [514.45 612.5  619.45 626.   692.6 ]
Index 907 - Y_all: [638.   644.5  649.3  654.4  711.05]
Index 1211 - Y_all: [ 93.95 101.75 102.3  135.95 149.4 ]
Index 1378 - Y_all: [ 93.05  93.3   93.65 102.2  107.7 ]
Index 1783 - Y_all: [120.6  129.75 133.7  147.05 147.5 ]
Index 1790 - Y_all: [104.85 124.55 147.4  156.1  161.25]
Index 1921 - Y_all: [104.   113.55 117.2  145.65 165.6 ]
Index 2174 - Y_all: [484.25 487.75 493.2  507.35 508.  ]


In [12]:
# Find indices where label is "STRONG BUY"
strong_buy_indices = np.where(labels == "STRONG BUY")[0]

# Print first such index and the corresponding Y_all
print("Indices with STRONG BUY:", strong_buy_indices[:10])  # first 10 indices
print("\nCorresponding Y_all values:")
for idx in strong_buy_indices[:10]:  # show first 5 examples
    print(f"Index {idx} - Y_all: {Y_all[idx]}")


Indices with STRONG BUY: [ 371  464  860  907 1211 1378 1783 1790 1921 2174]

Corresponding Y_all values:
Index 371 - Y_all: [120.   122.35 129.05 135.5  137.25]
Index 464 - Y_all: [ 92.95  93.5   93.75  94.6  103.  ]
Index 860 - Y_all: [514.45 612.5  619.45 626.   692.6 ]
Index 907 - Y_all: [638.   644.5  649.3  654.4  711.05]
Index 1211 - Y_all: [ 93.95 101.75 102.3  135.95 149.4 ]
Index 1378 - Y_all: [ 93.05  93.3   93.65 102.2  107.7 ]
Index 1783 - Y_all: [120.6  129.75 133.7  147.05 147.5 ]
Index 1790 - Y_all: [104.85 124.55 147.4  156.1  161.25]
Index 1921 - Y_all: [104.   113.55 117.2  145.65 165.6 ]
Index 2174 - Y_all: [484.25 487.75 493.2  507.35 508.  ]


In [5]:
import numpy as np

# Load your data
X_all, Y_all = np.load("Train_Data/option_train_data_06_06_2025.npz", allow_pickle=True).values()

# Configuration
close_price_index = 6  

labels = []
for x_seq, y_seq in zip(X_all, Y_all):
    current_close = x_seq[-1][close_price_index]
    future_close_5 = y_seq[4]

    # Rule 1: Check for STRONG BUY
    if (future_close_5 > current_close + 10) and all(y_seq[i+1] > y_seq[i] for i in range(4)):
        labels.append("STRONG BUY")
    # Rule 2: Check for BUY
    elif (future_close_5 > current_close + 10):
        labels.append("BUY")
    # Rule 3: DON'T BUY
    else:
        labels.append("NO")

# Convert labels to a NumPy array (optional: encode as integers if needed)
labels = np.array(labels)

# Example: Save this dataset
np.savez("Train_Data/option_train_labeled_06_06_2025.npz", X=X_all, Y=Y_all, labels=labels)


In [None]:
import numpy as np

# Load your data
X_all, Y_all = np.load("Train_Data/option_train_data.npz", allow_pickle=True).values()

# Configuration
close_price_index = 6  

labels = []
for x_seq, y_seq in zip(X_all, Y_all):
    current_close = x_seq[-1][close_price_index]
    future_close_5 = y_seq[4]

    # Rule 1: Check for STRONG BUY
    if (future_close_5 > current_close + 10) and all(y_seq[i+1] > y_seq[i] for i in range(4)):
        labels.append("STRONG BUY")
    # Rule 2: Check for BUY
    elif (future_close_5 > current_close + 10):
        labels.append("BUY")
    # Rule 3: DON'T BUY
    else:
        labels.append("NO")

# Convert labels to a NumPy array (optional: encode as integers if needed)
labels = np.array(labels)

# Example: Save this dataset
np.savez("Train_Data/option_train_labeled_3m.npz", X=X_all, Y=Y_all, labels=labels)


<h3>3 Min data BUY or NO</h3>

In [9]:
import numpy as np

# Load your data
X_all, Y_all ,_= np.load("Train_Data/option_train_labeled_06_06_2025.npz", allow_pickle=True).values()

# Configuration
close_price_index = 6  # Index for the close price in your feature vector

labels = []
for x_seq, y_seq in zip(X_all, Y_all):
    current_close = x_seq[-1][close_price_index]
    future_close_3 = y_seq[2]  # 3-minute future close price

    # New BUY condition
    if (future_close_3 > current_close + 5) and (y_seq[0] < y_seq[1] < y_seq[2]):
        labels.append("BUY")
    else:
        labels.append("NO")

# Convert labels to a NumPy array
labels = np.array(labels)

# Save the updated dataset
np.savez("Train_Data/option_train__labeled_3m_06_06_2025.npz", X=X_all, Y=Y_all, labels=labels)


In [11]:
import numpy as np

# Load your data
X_all, Y_all ,labels= np.load("Train_Data/option_train__labeled_3m_06_06_2025.npz", allow_pickle=True).values()
# Find indices where label is "STRONG BUY"
strong_buy_indices = np.where(labels == "BUY")[0]
print("Indices with BUY:", strong_buy_indices.shape)  
# Print first such index and the corresponding Y_all
print("Indices with STRONG BUY:", strong_buy_indices[:10])  # first 10 indices
print("\nCorresponding Y_all values:")
for idx in strong_buy_indices[:10]:  # show first 5 examples
    print(f"Index {idx} - Y_all: {Y_all[idx]}")


Indices with BUY: (15354,)
Indices with STRONG BUY: [293 361 389 431 494 512 526 555 573 586]

Corresponding Y_all values:
Index 293 - Y_all: [653.45 680.   715.95 680.   680.  ]
Index 361 - Y_all: [167.15 170.1  176.   170.45 169.15]
Index 389 - Y_all: [163.95 164.8  175.6  164.1  156.75]
Index 431 - Y_all: [155.5  168.05 217.   151.1  163.35]
Index 494 - Y_all: [153.55 169.   169.85 169.   169.  ]
Index 512 - Y_all: [149.95 150.   169.   163.65 165.  ]
Index 526 - Y_all: [140.6 141.3 162.  141.3 141.4]
Index 555 - Y_all: [144.95 146.4  171.   145.   144.15]
Index 573 - Y_all: [147.15 147.25 174.6  149.85 148.15]
Index 586 - Y_all: [162.   168.15 179.45 204.7  164.35]


In [6]:
strong_buy_indices = np.where(labels == "BUY")[0]
no_buy_indices= np.where(labels == "NO")[0]
print("Indices with BUY:", strong_buy_indices.shape)  
print("Indices with NO:", no_buy_indices.shape)


Indices with BUY: (187617,)
Indices with NO: (2195397,)
