In [20]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import numpy as np

In [21]:
try:
    df = pd.read_csv('DXY.csv')
except FileNotFoundError:
    print("Error: DXY.csv not found. Please ensure the file is in the correct directory.")
    exit()

In [22]:
required_columns = ['Open', 'High', 'Low', 'Close']
if not all(col in df.columns for col in required_columns):
    print(f"Error: Missing one or more required columns ({required_columns}) in DXY.csv.")
    exit()

In [23]:
df['Prev_Open'] = df['Open'].shift(1)
df['Prev_High'] = df['High'].shift(1)
df['Prev_Low'] = df['Low'].shift(1)
df['Prev_Close'] = df['Close'].shift(1)

In [24]:
df.dropna(inplace=True)

In [25]:
X = df[['Prev_Open', 'Prev_High', 'Prev_Low', 'Prev_Close']]
y = df[['Open', 'High', 'Low', 'Close']]

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [27]:
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)


In [28]:
print("Training the model...")
model.fit(X_train, y_train)
print("Model training complete.")

Training the model...
Model training complete.


In [29]:
y_pred = model.predict(X_test)

In [30]:
print("\n--- Model Evaluation ---")


--- Model Evaluation ---


In [31]:
mae_open = mean_absolute_error(y_test['Open'], y_pred[:, 0])
mae_high = mean_absolute_error(y_test['High'], y_pred[:, 1])
mae_low = mean_absolute_error(y_test['Low'], y_pred[:, 2])
mae_close = mean_absolute_error(y_test['Close'], y_pred[:, 3])

In [32]:
print(f"Mean Absolute Error (Open): {mae_open:.4f}")
print(f"Mean Absolute Error (High): {mae_high:.4f}")
print(f"Mean Absolute Error (Low): {mae_low:.4f}")
print(f"Mean Absolute Error (Close): {mae_close:.4f}")

Mean Absolute Error (Open): 0.3449
Mean Absolute Error (High): 0.6613
Mean Absolute Error (Low): 0.1785
Mean Absolute Error (Close): 0.1785


In [33]:
r2_overall = r2_score(y_test, y_pred)
print(f"Overall R-squared: {r2_overall:.4f}")

Overall R-squared: -0.1376


In [34]:
r2_open = r2_score(y_test['Open'], y_pred[:, 0])
r2_high = r2_score(y_test['High'], y_pred[:, 1])
r2_low = r2_score(y_test['Low'], y_pred[:, 2])
r2_close = r2_score(y_test['Close'], y_pred[:, 3])

In [35]:
print(f"R-squared (Open): {r2_open:.4f}")
print(f"R-squared (High): {r2_high:.4f}")
print(f"R-squared (Low): {r2_low:.4f}")
print(f"R-squared (Close): {r2_close:.4f}")

R-squared (Open): -0.1785
R-squared (High): -0.1458
R-squared (Low): -0.0913
R-squared (Close): -0.1350


In [36]:
prev_close_for_direction_check = X_test['Prev_Close']
actual_direction = np.sign(y_test['Close'] - prev_close_for_direction_check)
predicted_direction = np.sign(y_pred[:, 3] - prev_close_for_direction_check)
correct_direction_predictions = (actual_direction == predicted_direction) & (actual_direction != 0)
total_directional_moves = (actual_direction != 0).sum()
win_rate = (correct_direction_predictions.sum() / total_directional_moves) * 100 \
           if total_directional_moves > 0 else 0

In [37]:
print(f"Win Rate (Close Price Direction): {win_rate:.2f}%")

Win Rate (Close Price Direction): 74.76%


In [38]:
if not df.empty:
    last_candle_data = df[['Open', 'High', 'Low', 'Close']].iloc[-1].values
    
    new_prediction_input = pd.DataFrame(
        [last_candle_data],
        columns=['Prev_Open', 'Prev_High', 'Prev_Low', 'Prev_Close']
    )

    predicted_next_candle = model.predict(new_prediction_input)
    print("\n--- Example Prediction ---")
    print(f"Using previous candle OHLC: {last_candle_data}")
    print(f"Predicted next candle OHLC (Open, High, Low, Close): {predicted_next_candle[0].round(4)}")
else:
    print("\nNo data available for example prediction after processing.")



--- Example Prediction ---
Using previous candle OHLC: [ 0.11999512  0.63999939 -0.27000427 -0.25      ]
Predicted next candle OHLC (Open, High, Low, Close): [-0.0791 -0.1521  0.0095  0.0635]
