# SENG 474 Project - Initial Model

## Initial Model

In [56]:
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Filepath
filepath = "Kraken_OHLCVT/XBTUSD_15.csv"

# Threshold for determining if a coin went up
threshold = 0.004

#############################################
# 1) Load CSV & Rename columns
#############################################
df = pd.read_csv(filepath)
df.columns = ["Timestamp", "Open", "High", "Low", "Close", "Volume", "Trades"]

#############################################
# 2) Define feature-engineering functions
#############################################
def add_datetime_features(df):
    df["Timestamp"] = pd.to_datetime(df["Timestamp"], unit="s")  # Convert to datetime

    df["Weekday"] = df["Timestamp"].dt.weekday  # 0=Mon, 6=Sun
    df["Day"] = df["Timestamp"].dt.day
    df["Year"] = df["Timestamp"].dt.year.astype(float)

    # Time of day
    df["TOD"] = df["Timestamp"].dt.hour + df["Timestamp"].dt.minute / 60.0

    # Cyclical encoding for Month
    month = df["Timestamp"].dt.month
    df["Month_Sin"] = np.sin(2 * np.pi * month / 12.0)
    df["Month_Cos"] = np.cos(2 * np.pi * month / 12.0)

    # Cyclical encoding for TOD (24 hours in a day)
    df["TOD_Sin"] = np.sin(2 * np.pi * df["TOD"] / 24.0)
    df["TOD_Cos"] = np.cos(2 * np.pi * df["TOD"] / 24.0)

    # Drop the original columns
    df.drop(columns=["Timestamp", "TOD"], inplace=True)
    return df

def compute_rsi(series, period=14):
    delta = series.diff().dropna()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / (loss + 1e-10)  # Avoid division by zero
    return 100 - (100 / (1 + rs))

def add_features(df, threshold):
    df["SMA_10"] = df["Close"].rolling(window=10).mean()
    df["SMA_50"] = df["Close"].rolling(window=50).mean()
    df["RSI_14"] = compute_rsi(df["Close"])
    df["Return"] = df["Close"].pct_change()

    # Bollinger Bands
    df["Middle_Band"] = df["Close"].rolling(window=20).mean()
    stddev = df["Close"].rolling(window=20).std()
    df["Upper_Band"] = df["Middle_Band"] + (2 * stddev)
    df["Lower_Band"] = df["Middle_Band"] - (2 * stddev)

    # Return_Signal in {-1, 0, 1}
    df["Return_Signal"] = df["Return"].apply(
        lambda x: 1 if x > threshold else (0 if x >= 0 else -1)
    )

    # Add time-based features (which won't leak future data if done carefully)
    df = add_datetime_features(df)
    return df

#############################################
# 3) Add features to the full dataframe
#############################################
df = add_features(df, threshold)

#############################################
# 4) Split into Train/Val/Test BEFORE fitting scaler
#############################################
# e.g., 70% train, 15% val, 15% test
df = df.reset_index(drop=True)
n = len(df)
train_end = int(0.7 * n)
val_end = int(0.85 * n)

train_df = df.iloc[:train_end].copy()
val_df = df.iloc[train_end:val_end].copy()
test_df = df.iloc[val_end:].copy()

#############################################
# 5) Fill NaNs using means computed on TRAIN ONLY
#############################################
train_means = train_df.mean(numeric_only=True)  # means of train set
train_df.fillna(train_means, inplace=True)
val_df.fillna(train_means, inplace=True)
test_df.fillna(train_means, inplace=True)

#############################################
# 6) Scale using only the TRAIN set
#############################################
scaler = MinMaxScaler(feature_range=(0, 1))

# Exclude the label "Return_Signal" from scaling
exclude_cols = ["Return_Signal", "Month_Sin", "Month_Cos", "TOD_Sin", "TOD_Cos"]
features_to_scale = [col for col in train_df.columns if col not in exclude_cols]

# Fit scaler on train
scaler.fit(train_df[features_to_scale])

# Transform train, val, test
train_df[features_to_scale] = scaler.transform(train_df[features_to_scale])
val_df[features_to_scale]  = scaler.transform(val_df[features_to_scale])
test_df[features_to_scale] = scaler.transform(test_df[features_to_scale])

# Save scaler
joblib.dump(scaler, "scaler.pkl")

#############################################
# 7) Reorder columns so that Return_Signal is last
#############################################
def reorder_columns(df):
    cols = list(df.columns)
    cols.remove("Return_Signal")
    cols.append("Return_Signal")
    return df[cols]

train_df = reorder_columns(train_df)
val_df = reorder_columns(val_df)
test_df = reorder_columns(test_df)

#############################################
# 8) Convert to NumPy & Shift labels {-1,0,1} -> {0,1,2}
#############################################
def to_numpy_and_shift_labels(df):
    data = df.to_numpy()
    label_idx = df.columns.get_loc("Return_Signal")
    # -1 -> 0
    data[:, label_idx] = np.where(data[:, label_idx] == -1, 0, data[:, label_idx])
    #  1 -> 2
    data[:, label_idx] = np.where(data[:, label_idx] == 1, 2, data[:, label_idx])
    return data

train_data = to_numpy_and_shift_labels(train_df)
val_data = to_numpy_and_shift_labels(val_df)
test_data = to_numpy_and_shift_labels(test_df)

#############################################
# 9) Create sequences (X, y) from each split
#############################################
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        # X: the past seq_length rows, all columns except the label
        X.append(data[i : i + seq_length, :-1])
        # y: the label in the next row
        y.append(data[i + seq_length, -1])
    return np.array(X), np.array(y)

seq_length = 30
X_train, y_train = create_sequences(train_data, seq_length)
X_val,   y_val   = create_sequences(val_data,   seq_length)
X_test,  y_test  = create_sequences(test_data,  seq_length)

#############################################
# 10) Build LSTM Classification Model
#############################################
num_features = X_train.shape[2]

model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, num_features)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation="relu"),
    Dense(3, activation="softmax")  # 3 classes: 0=down, 1=neutral, 2=up
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",  # for integer labels
    metrics=["accuracy"]
)

#############################################
# 11) Early Stopping & Training
#############################################
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

#############################################
# 11) Evaluate on Test Set
#############################################
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Classification report and confusion matrix
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))


  super().__init__(**kwargs)


Epoch 1/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 8ms/step - accuracy: 0.8851 - loss: 0.3468 - val_accuracy: 0.9420 - val_loss: 0.2157
Epoch 2/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.8855 - loss: 0.3236 - val_accuracy: 0.9420 - val_loss: 0.2139
Epoch 3/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.8860 - loss: 0.3206 - val_accuracy: 0.9420 - val_loss: 0.2090
Epoch 4/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.8868 - loss: 0.3157 - val_accuracy: 0.9420 - val_loss: 0.2054
Epoch 5/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.8849 - loss: 0.3172 - val_accuracy: 0.9420 - val_loss: 0.2072
Epoch 6/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.8854 - loss: 0.3151 - val_accuracy: 0.9420 - val_loss: 0.2128
Epoch 7/50

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Report on the Methods and Performance of the LSTM Classification Model**

This report outlines the **data preparation**, **feature engineering**, and **model training** steps used to predict whether the price of a cryptocurrency (or similar asset) will go **down (0)**, remain **neutral (1)**, or go **up (2)**. The code leverages a **Long Short-Term Memory (LSTM)** architecture, a type of recurrent neural network well-suited for sequential data.

Following training, we evaluate the model’s performance and discuss key observations of the results.


## 2. Methodology

### 2.1 Data Ingestion and Setup
1. **File Loading:**  
   - The CSV file (`"Kraken_OHLCVT/XBTUSD_15.csv"`) is read into a Pandas DataFrame `df`.
   - Columns are renamed to `["Timestamp", "Open", "High", "Low", "Close", "Volume", "Trades"]`.

2. **Threshold Definition:**  
   - A `threshold` of `0.004` is chosen for determining whether the next price movement is considered an **up** movement (`return > 0.004`).


### 2.2 Feature Engineering
1. **Technical Indicators:**
   - **Simple Moving Averages (SMA_10, SMA_50):** 10- and 50-period means of `Close`.
   - **RSI_14:** 14-period Relative Strength Index, measuring momentum.
   - **Bollinger Bands (Upper, Middle, Lower):** 20-period average ± 2 standard deviations.
   - **Return:** Percentage change of `Close` from one time step to the next.

2. **Date-Time Features (`add_datetime_features`):**  
   - Convert `Timestamp` to a Python `datetime`.  
   - Extract **Weekday** (0=Monday to 6=Sunday), **Day**, **Year**, and **Time-of-Day (TOD)**.  
   - Apply **cyclical encodings** for `Month` (`Month_Sin`, `Month_Cos`) and `TOD` (`TOD_Sin`, `TOD_Cos`) so that cyclical patterns are preserved (e.g., 23:59 is close to 00:00).

3. **Return_Signal:**  
   - Categorize each time step as **-1** (down) if `return < 0`, **0** (neutral) if `0 <= return <= threshold`, or **1** (up) if `return > threshold`.  
   - Later, these values are remapped from \{-1, 0, 1\} → \{0, 1, 2\} for compatibility with Keras classification layers.


### 2.3 Splitting the Dataset
1. **Chronological Splits:**  
   - **Train set (70%)**: The earliest portion of the time series.  
   - **Validation set (15%)**: The next portion. Used for tuning hyperparameters (via early stopping) and checking generalization during training.  
   - **Test set (15%)**: The final portion. Used **after** training to report final performance.

2. **Avoiding Data Leakage:**  
   - We reset indexes and fill missing values with the **train set means** only.  
   - We also **fit the scaler** (MinMax) on the train set and **apply** it to the validation and test sets.


### 2.4 Preprocessing and Scaling
1. **Handling Missing Data:**  
   - We compute `train_means` from numerical columns in the **train set** and use these means to fill any missing values in train, val, and test sets.
2. **MinMaxScaler:**  
   - Fits to the **train set** only, then applies transforms to val/test.  
   - Columns like `Return_Signal`, `Month_Sin`, `Month_Cos`, `TOD_Sin`, `TOD_Cos` are **excluded** from scaling since they are either labels or already in the range \([-1,1]\).

3. **Reordering Columns:**  
   - Ensures `Return_Signal` is the last column for convenient indexing, letting everything else act as features.

4. **Label Shifting:**  
   - \{-1 → 0, +1 → 2\} leaves us with final integer labels \{0,1,2\}, suitable for **`sparse_categorical_crossentropy`**.


### 2.5 Sequence Creation
1. **Sliding Window (`create_sequences`)**:  
   - We use a **sequence length** of `seq_length=30`.  
   - For each index \(i\), \(X_i\) is the **past 30 rows** of features, and \(y_i\) is the **`Return_Signal`** (remapped) on the **31st row**.  
   - This forms a dataset of shape \((\text{samples}, 30, \text{num_features})\).


### 2.6 Model Definition and Training
1. **LSTM Architecture**  
   - A two-layer LSTM:
     1. **LSTM(50, return_sequences=True)** + **Dropout(0.2)**
     2. **LSTM(50, return_sequences=False)** + **Dropout(0.2)**
   - A **Dense(25, "relu")** hidden layer.
   - A final **Dense(3, "softmax")** for 3-class classification: \{down=0, neutral=1, up=2\}.

2. **Compilation**  
   - **Optimizer:** `"adam"`
   - **Loss:** `"sparse_categorical_crossentropy"` (for integer labels)
   - **Metrics:** `["accuracy"]` for classification accuracy.

3. **Early Stopping**  
   - **patience=3**, monitoring `val_loss`.
   - Automatically **restores best weights** to avoid overfitting if the validation loss stops improving.

4. **Fitting**  
   - `epochs=50, batch_size=32`
   - Validation data: `(X_val, y_val)`


## 3. Model Evaluation and Observations

### 3.1 Final Accuracy and Loss on Test Set
- After training, the model reports:  
  - **Test Loss** around **0.15**  
  - **Test Accuracy** around **0.96** (96%)

On the surface, **96% accuracy** seems impressive. However, further investigation via the **classification report** and **confusion matrix** reveals:

```
Classification Report:
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98     45837
         2.0       0.00      0.00      0.00      1798

    accuracy                           0.96     47635
   macro avg       0.48      0.50      0.49     47635
weighted avg       0.93      0.96      0.94     47635

Confusion Matrix:
[[45837     0]
 [ 1798     0]]
```

- **Class 0** (down) is predicted **45837 times** correctly and **0** times incorrectly.  
- **Class 2** (up) was **never predicted** (0 times), even though 1798 samples are truly class 2.  
- **Class 1** (neutral) doesn’t appear in the table, which suggests none of the final test rows were labeled 1 in this particular sample.  

Thus, the model **mostly predicts one class** (class 0) in the test set, ignoring the minority class(es). This can happen when:

1. The data is **highly imbalanced** (far more “down” than “up” signals).  
2. The threshold of `0.004` might be too large or too small, leading to few “up” examples.  
3. The model’s loss function with a large class imbalance may lead it to favor the majority class.


### 3.2 Conclusions and Future Work
- **High Accuracy but Class Imbalance:** A ~96% accuracy is misleading because the model rarely predicts the “up” class. It appears it learned to always predict “down” for minimal penalty.
- **Potential Remedies:**
  1. **Class Weights:** Give more weight to minority classes during training to encourage the model to learn them.  
  2. **Data Balancing:** Either oversample “up” or undersample “down,” or use SMOTE-like techniques if the dataset is highly imbalanced.  
  3. **Tune Threshold:** Adjust the threshold (0.004) if it’s causing too few “up” labels.  
  4. **More Features:** Possibly incorporate additional signals that help differentiate “down” vs. “up.”

Overall, the pipeline for data engineering and LSTM modeling is sound, but the **results highlight class imbalance**. Improving class balance or weighting could lead to more meaningful predictions across all three categories.


## 4. Summary
1. **Pipeline**: Data is **chronologically split** into train/validation/test sets, we carefully engineer features (SMA, RSI, Bollinger, cyclical time features, etc.), then form a **sequence** dataset for the LSTM.  
2. **Model**: A **two-layer LSTM** with dropout and a dense output layer for **3-class classification**.  
3. **Observations**: Although the model achieves **high accuracy**, it **ignores the minority class** in practice, revealing **class imbalance**.  
4. **Next Steps**: Adjust thresholds, apply class weighting, or resample data to ensure all labels are properly learned.

This approach demonstrates how an LSTM can handle **sequence classification** for financial time series, but also shows the critical importance of evaluating class distributions and exploring deeper metrics (confusion matrix, F1-scores) to ensure fair performance across all classes.

---

Model learned that only predicting "down" is correct. We verify that this could be an issue (and the issue is not with preprocessing):

In [58]:
import pandas as pd

# Provide your file path
filepath = "Kraken_OHLCVT/XBTUSD_15.csv"

# Threshold
threshold = 0.004

# Load dataset
df = pd.read_csv(filepath, names=["Timestamp", "Open", "High", "Low", "Close", "Volume", "Trades"], header=None)

# Add the 'Return' column (pct change of Close)
df["Return"] = df["Close"].pct_change()

# Derive the Return_Signal in {-1,0,1} using the same threshold logic
def classify_return(x, threshold=0.004):
    if pd.isna(x):
        return None  # or 0, if you want to fill NaNs differently
    elif x > threshold:
        return 1
    elif x >= 0:
        return 0
    else:
        return -1

df["Return_Signal"] = df["Return"].apply(lambda x: classify_return(x, threshold))

# Drop the first row if 'Return' was NaN after pct_change
df.dropna(subset=["Return_Signal"], inplace=True)

# Count the distribution
distribution = df["Return_Signal"].value_counts()
print("Distribution of Return_Signal:\n", distribution)

# Percentage breakdown
distribution_pct = df["Return_Signal"].value_counts(normalize=True) * 100
print("\nPercentage Distribution:\n", distribution_pct)


Distribution of Return_Signal:
 Return_Signal
-1.0    153036
 0.0    134806
 1.0     29923
Name: count, dtype: int64

Percentage Distribution:
 Return_Signal
-1.0    48.160118
 0.0    42.423174
 1.0     9.416707
Name: proportion, dtype: float64


We retry with class weights:

In [59]:
import numpy as np
from collections import Counter
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Suppose y_train contains labels in {0,1,2} with class imbalance
# e.g., y_train = np.array([0,0,2,0,1,2,0,0,0, ... ])

# 1) Count how many samples per class in the training set
counter = Counter(y_train)  # e.g., Counter({0: 153036, 1: 134806, 2: 29923})
print("Training Distribution:", counter)

# 2) Compute class weights (Inverse Frequency Example)
# total_samples = sum of all classes
total_samples = len(y_train)
num_classes = len(np.unique(y_train))

class_weight = {}
for label, count in counter.items():
    # weight_i = total_samples / (num_classes * class_count_i)
    class_weight[label] = total_samples / (num_classes * count)

print("Class Weight Dictionary:", class_weight)
# Example output: {0: 1.39, 1: 1.58, 2: 7.12}  (numbers are hypothetical)

# 3) Build a model (example: LSTM) for classification
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, num_features)),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation="relu"),
    Dense(3, activation="softmax")  # 3 classes: 0,1,2
])

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",  # for integer labels
    metrics=["accuracy"]
)

# 4) Apply Early Stopping
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)

# 5) Train with class_weight
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1,
    class_weight=class_weight  # <--- here we pass the weights
)

# 6) Evaluate on test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Training Distribution: Counter({0.0: 197063, 2.0: 25342})
Class Weight Dictionary: {0.0: 0.5642992342550351, 2.0: 4.388071186173152}


  super().__init__(**kwargs)


Epoch 1/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 8ms/step - accuracy: 0.5960 - loss: 0.6668 - val_accuracy: 0.7423 - val_loss: 0.5355
Epoch 2/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 8ms/step - accuracy: 0.6521 - loss: 0.6113 - val_accuracy: 0.8174 - val_loss: 0.4673
Epoch 3/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 8ms/step - accuracy: 0.6525 - loss: 0.6028 - val_accuracy: 0.8112 - val_loss: 0.4726
Epoch 4/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.6546 - loss: 0.6002 - val_accuracy: 0.7314 - val_loss: 0.5187
Epoch 5/50
[1m6951/6951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 8ms/step - accuracy: 0.6588 - loss: 0.5954 - val_accuracy: 0.8103 - val_loss: 0.4868
[1m1489/1489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9200 - loss: 0.4038
Test Loss: 0.4121701419353485
Test Accuracy: 0.9291487336158752


In [60]:
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))

Classification Report:
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98     45837
         2.0       0.00      0.00      0.00      1798

    accuracy                           0.96     47635
   macro avg       0.48      0.50      0.49     47635
weighted avg       0.93      0.96      0.94     47635

Confusion Matrix:
[[45837     0]
 [ 1798     0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Report on Using Class Weights for an Imbalanced LSTM Classification Task**


This report describes how **class weighting** was introduced into an LSTM classification pipeline to address **imbalanced training data**, where one class (“0.0”) heavily outnumbered another class (“2.0”). The goal is to enable the model to learn minority classes more effectively.

Despite using class weights, the final confusion matrix and classification report show the model **still** overwhelmingly predicts the majority class. Below, we discuss the approach and evaluate the resulting performance.


## **2. Methodology**

1. **Data**:  
   - The training labels \(\{0, 1, 2\}\) exhibit significant imbalance.  
   - Example distribution: \(\text{Counter}(\{0.0: 197063, 2.0: 25342\})\).  
   - We focus here on classes **0.0** and **2.0** (no class 1.0 in the distribution snippet).

2. **Computing Class Weights**:  
   - **Inverse Frequency Formula**:
     \[
       \text{weight}_i = \frac{\text{total\_samples}}{\text{num\_classes} \times \text{class\_count}_i}
     \]
   - This results in a higher weight for minority classes, penalizing errors for smaller classes more heavily.

3. **Model Architecture**:  
   - **Two-layer LSTM** network with 50 units each, plus dropout for regularization.  
   - Output layer: `Dense(3, activation="softmax")` for a 3-class classification.

4. **Training with `class_weight`**:  
   - Passed a `class_weight` dictionary into `model.fit(...)`.  
   - Also employed **early stopping** (`patience=3`), monitoring validation loss.


## **3. Training Results**

### **3.1 Training Distribution and Weights**

```
Training Distribution: Counter({0.0: 197063, 2.0: 25342})
Class Weight Dictionary: {0.0: 0.5643, 2.0: 4.3881}
```

- Class **0.0** (majority) got a weight of ~0.56.  
- Class **2.0** (minority) got a weight of ~4.39.  

This indicates the minority class’ errors should be penalized more.

### **3.2 Epoch Logs**

```
Epoch 1/50
...
accuracy: 0.5960 - loss: 0.6668
val_accuracy: 0.7423 - val_loss: 0.5355

Epoch 2/50
...
accuracy: 0.6521 - loss: 0.6113
val_accuracy: 0.8174 - val_loss: 0.4673

...
Epoch 5/50
...
accuracy: 0.6588 - loss: 0.5954
val_accuracy: 0.8103 - val_loss: 0.4868
```

- The model steadily **increased** accuracy on the training set (from ~59.6% to ~65.9%).  
- Validation accuracy rose to above 80% by epoch 2, then fluctuated slightly.

### **3.3 Final Test Performance**

```
Test Loss: 0.4122
Test Accuracy: 0.9291
```

- **Test accuracy** is around **92.9%**.  
- On the surface, this is high. However, a closer look at the confusion matrix reveals the issue with the minority class.


## **4. Model Evaluation**

### **4.1 Confusion Matrix and Classification Report**

```
Classification Report:
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98     45837
         2.0       0.00      0.00      0.00      1798

Confusion Matrix:
[[45837     0]
 [ 1798     0]]
```

- The model **never** predicted “2.0,” resulting in:
  - 100% correct predictions for class “0.0” (all 45,837).
  - **0** correct predictions for class “2.0.”  
- The **model simply learned to predict the majority class** every time.

### **4.2 Interpretation**

Even though we used **class weights**, the model still defaulted to predicting the majority class (“0.0”) exclusively. Potential reasons include:

1. **Imbalance is Still High**: A ratio of ~197k to ~25k is quite large, and while class weighting helps, it may not be enough.  
2. **Loss Minimization**: The model can still minimize overall loss by predicting the majority class, especially if the minority class signals are weak or not well-differentiated.  
3. **Insufficient Minority Examples**: If the minority class is only ~11% of data, the model might need additional strategies to see enough examples.


## **5. Conclusion and Recommendations**

1. **High Accuracy ≠ Good Minority Detection**  
   - A 92.9% test accuracy can be misleading when the minority class is completely unrecognized.

2. **Improving Minority Class Recognition**  
   - **Increase Class Weights Further**: The current inverse frequency approach gave a weight of ~4.39 to class “2.0.” Possibly even higher weights or manual tuning might help.  
   - **Resampling / SMOTE**: Oversample minority examples or undersample the majority to achieve a more balanced dataset.  
   - **Additional Features / Different Threshold**: If “2.0” is defined by a threshold, reconsider the threshold or provide stronger signals that differentiate “up” from “down.”  
   - **Use Other Metrics**: Track **F1** for each class, especially the minority. Then tune until you see improvement for class “2.0.”  

3. **Next Steps**  
   - **Try combining** class weights with oversampling (e.g., SMOTE).  
   - **Examine** whether features truly capture the upward price signals.  
   - **Assess** real-world cost: If missing an “up” signal is costly, weighting the minority class more aggressively can be justified.

Despite best efforts with class weighting, the model remains dominated by the majority class. By integrating more advanced imbalance-handling techniques, you can encourage predictions for minority classes and achieve a more balanced performance overall.