In [1]:
"b5c2099919a2a704815b709fba4f167d-210831ef47a2e4f07292f3cefea33b5d"

'b5c2099919a2a704815b709fba4f167d-210831ef47a2e4f07292f3cefea33b5d'

In [2]:
import oandapyV20
from oandapyV20 import API


In [3]:
#!pip install oandapyV20

## Oanda API Connection , Get Data And Get the Features And Target
## And Create Features From Technical Indicators


In [7]:
import oandapyV20
from oandapyV20 import API
import oandapyV20.endpoints.instruments as instruments
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import ta  # pip install ta

# --- SETUP: OANDA API Credentials ---
account_id = "101-011-29597843-001"        # Replace with your account ID
access_token = "b5c2099919a2a704815b709fba4f167d-210831ef47a2e4f07292f3cefea33b5d"      # Replace with your OANDA API access token
client = API(access_token=access_token)

# --- Define the time range for the past 5 years ---
end_date = datetime.utcnow() - timedelta(days=2)
start_date = end_date - timedelta(days=5*365)
start_str = start_date.strftime("%Y-%m-%dT00:00:00Z")
end_str = end_date.strftime("%Y-%m-%dT00:00:00Z")

# --- Set Parameters for the Request ---
params = {
    "from": start_str,
    "to": end_str,
    "granularity": "D",  # Daily candles
    "price": "M"         # Use mid prices
}

# --- Request Historical Data for XAU/USD ---
instrument = "XAU_USD"
r = instruments.InstrumentsCandles(instrument=instrument, params=params)
client.request(r)
candles = r.response.get('candles', [])

# --- Convert Retrieved Data to a DataFrame ---
data = []
for candle in candles:
    if candle["complete"]:
        time = candle["time"]
        o = float(candle["mid"]["o"])
        h = float(candle["mid"]["h"])
        l = float(candle["mid"]["l"])
        c = float(candle["mid"]["c"])
        data.append([time, o, h, l, c])
        
df = pd.DataFrame(data, columns=["Date", "Open", "High", "Low", "Close"])
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)
print("Data Head from OANDA:")
print(df.head())

# --- Feature Engineering: Compute Technical Indicators ---
# 20-day Simple Moving Average
df['SMA20'] = df['Close'].rolling(window=20).mean()

# 14-day Relative Strength Index
df['RSI'] = ta.momentum.rsi(df['Close'], window=14)

# MACD and MACD Signal
macd = ta.trend.MACD(df['Close'])
df['MACD'] = macd.macd()
df['MACD_signal'] = macd.macd_signal()

# Bollinger Bands (20-day window, 2 std deviations)
bollinger = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['Bollinger_High'] = bollinger.bollinger_hband()
df['Bollinger_Low'] = bollinger.bollinger_lband()

# --- Create the Target: Next Day Return ---
df['Return'] = df['Close'].pct_change().shift(-1)

# --- Drop Rows with Missing Values ---
df.dropna(inplace=True)

# --- Prepare the Feature Set ---
features = df[['SMA20', 'RSI', 'MACD', 'MACD_signal', 'Bollinger_High', 'Bollinger_Low']]
target = df['Return']

# --- Inspect the Prepared Data ---
print("\nFeatures Head:")
print(features.head())
print("\nTarget (Next Day Return) Head:")
print(target.head())


Data Head from OANDA:
                               Open      High       Low     Close
Date                                                             
2020-03-18 21:00:00+00:00  1485.969  1501.098  1464.316  1472.795
2020-03-19 21:00:00+00:00  1470.276  1516.153  1455.320  1499.094
2020-03-22 21:00:00+00:00  1501.337  1561.228  1482.823  1553.114
2020-03-23 21:00:00+00:00  1553.232  1634.440  1552.933  1627.730
2020-03-24 21:00:00+00:00  1629.657  1640.228  1596.376  1617.351

Features Head:
                                SMA20        RSI       MACD  MACD_signal  \
Date                                                                       
2020-05-05 21:00:00+00:00  1703.09550  54.122086  25.632576    34.615684   
2020-05-06 21:00:00+00:00  1706.59600  60.795755  25.074687    32.707485   
2020-05-07 21:00:00+00:00  1707.54025  56.898674  23.301724    30.826333   
2020-05-10 21:00:00+00:00  1706.75590  55.541975  21.275525    28.916171   
2020-05-11 21:00:00+00:00  1705.52015  56.67

In [8]:
df.shape

(1256, 11)

In [15]:
features.tail()

Unnamed: 0_level_0,SMA20,RSI,MACD,MACD_signal,Bollinger_High,Bollinger_Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-03-10 21:00:00+00:00,2911.6556,57.750335,23.492875,31.566524,2957.323367,2865.987833
2025-03-11 21:00:00+00:00,2913.1381,60.74738,24.03103,30.059425,2959.627974,2866.648226
2025-03-12 21:00:00+00:00,2916.15635,68.213764,28.573048,29.76215,2972.905617,2859.407083
2025-03-13 21:00:00+00:00,2921.27775,67.266959,31.504053,30.110531,2983.198545,2859.356955
2025-03-16 21:00:00+00:00,2926.40975,69.237476,34.735308,31.035486,2996.411573,2856.407927


In [18]:
def fetch_oanda_data(instrument, start_date, end_date, granularity="D", access_token=None):
    """
    Fetch historical OHLC data from OANDA for the given instrument.
    """
    client = API(access_token=access_token)
    start_str = start_date.strftime("%Y-%m-%dT00:00:00Z")
    end_str = end_date.strftime("%Y-%m-%dT00:00:00Z")
    
    params = {
        "from": start_str,
        "to": end_str,
        "granularity": granularity,
        "price": "M"  # Use mid prices
    }
    
    r = instruments.InstrumentsCandles(instrument=instrument, params=params)
    client.request(r)
    candles = r.response.get('candles', [])
    
    data = []
    for candle in candles:
        if candle["complete"]:
            time = candle["time"]
            o = float(candle["mid"]["o"])
            h = float(candle["mid"]["h"])
            l = float(candle["mid"]["l"])
            c = float(candle["mid"]["c"])
            data.append([time, o, h, l, c])
    
    df = pd.DataFrame(data, columns=["Date", "Open", "High", "Low", "Close"])
    df["Date"] = pd.to_datetime(df["Date"])
    df.set_index("Date", inplace=True)
    return df

def compute_technical_indicators(df):
    """
    Compute key technical indicators and add them as new columns.
    """
    # 20-day Simple Moving Average
    df['SMA20'] = df['Close'].rolling(window=20).mean()

    # 14-day Relative Strength Index
    df['RSI'] = ta.momentum.rsi(df['Close'], window=14)

    # MACD and MACD Signal
    macd = ta.trend.MACD(df['Close'])
    df['MACD'] = macd.macd()
    df['MACD_signal'] = macd.macd_signal()

    # Bollinger Bands (20-day, 2 standard deviations)
    bollinger = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
    df['Bollinger_High'] = bollinger.bollinger_hband()
    df['Bollinger_Low'] = bollinger.bollinger_lband()
    
    return df

def prepare_features_and_target(df):
    """
    Create the regression target (next-day return) and prepare the feature set.
    """
    # Calculate next day's return
    df['Return'] = df['Close'].pct_change().shift(-1)
    df = df.dropna()  # Drop rows with NaN values from rolling calculations and shift
    
    features = df[['SMA20', 'RSI', 'MACD', 'MACD_signal', 'Bollinger_High', 'Bollinger_Low']]
    target = df['Return']
    return features, target

## Use Linear regression Decision Trees and RFs to do Cross Validation and Hyperparameter tuning and get the best model

In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Assume features and target are already loaded as X and y from your preprocessing steps
X = features  # e.g., technical indicators
y = target    # next day return

# Define parameter grids for each model
param_grids = {
    "LinearRegression": {
        # Linear Regression has few hyperparameters; we tune fit_intercept for illustration.
        'fit_intercept': [True, False]
    },
    "DecisionTree": {
        'max_depth': [None, 5, 10, 15, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    },
    "RandomForest": {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 5, 10, 15],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
}

# Create a dictionary of models
models = {
    "LinearRegression": LinearRegression(),
    "DecisionTree": DecisionTreeRegressor(random_state=42),
    "RandomForest": RandomForestRegressor(random_state=42)
}

# Define KFold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

best_models = {}
results = {}

for model_name, model in models.items():
    print(f"Training and tuning {model_name}...")
    param_grid = param_grids[model_name]
    
    # Use GridSearchCV to tune hyperparameters based on negative MSE
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=kf,
        scoring='neg_mean_squared_error',  # scoring function (negative MSE)
        n_jobs=-1
    )
    
    grid_search.fit(X, y)
    
    # Retrieve the best model and parameters
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    best_score = -grid_search.best_score_  # convert to positive MSE
    
    best_models[model_name] = best_model
    results[model_name] = {"Best Params": best_params, "Best MSE": best_score}
    
# Convert results to a DataFrame for better visualization
results_df = pd.DataFrame(results).T
print("\nHyperparameter Tuning Results (5-Fold):")
print(results_df)

# Select the best overall model based on lowest MSE
best_overall_model_name = min(results, key=lambda x: results[x]["Best MSE"])
print(f"\nBest overall model: {best_overall_model_name}")
print("Best model details:")
print(best_models[best_overall_model_name])


Training and tuning LinearRegression...
Training and tuning DecisionTree...
Training and tuning RandomForest...

Hyperparameter Tuning Results (5-Fold):
                                                        Best Params  Best MSE
LinearRegression                            {'fit_intercept': True}  0.000085
DecisionTree      {'max_depth': 5, 'min_samples_leaf': 2, 'min_s...   0.00009
RandomForest      {'max_depth': 5, 'min_samples_leaf': 2, 'min_s...  0.000085

Best overall model: RandomForest
Best model details:
RandomForestRegressor(max_depth=5, min_samples_leaf=2, min_samples_split=10,
                      n_estimators=200, random_state=42)


In [19]:
from datetime import datetime, timedelta

# --- Define test period: get recent data ending two days ago ---
test_end_date = datetime.utcnow() - timedelta(days=2)
test_start_date = test_end_date - timedelta(days=30)  # using last 30 days to ensure enough data for rolling calculations

# Fetch test data for the specified period
df_test_raw = fetch_oanda_data(instrument, test_start_date, test_end_date, granularity="D", access_token=access_token)
df_test_with_indicators = compute_technical_indicators(df_test_raw.copy())
features_test, target_test = prepare_features_and_target(df_test_with_indicators.copy())

# Select the most recent row (i.e. yesterday's data)
test_sample_features = features_test.iloc[-1:]
actual_return = target_test.iloc[-1]

# Predict using the best model (assumes best_overall_model is defined from tuning)
predicted_return = best_overall_model.predict(test_sample_features)[0]

# Display the date, actual return, and predicted return
test_date = features_test.index[-1].strftime("%Y-%m-%d")
print("Test Date:", test_date)
print("Actual Return:", actual_return)
print("Predicted Return:", predicted_return)


IndexError: single positional indexer is out-of-bounds

In [21]:
test_sample_features 

Unnamed: 0_level_0,SMA20,RSI,MACD,MACD_signal,Bollinger_High,Bollinger_Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [22]:
test_end_date

datetime.datetime(2025, 3, 18, 11, 54, 43, 950222)

In [23]:
test_start_date

datetime.datetime(2025, 2, 16, 11, 54, 43, 950222)

In [24]:
df_test_raw

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-02-16 22:00:00+00:00,2893.375,2906.525,2878.785,2898.49
2025-02-17 22:00:00+00:00,2898.71,2936.955,2892.07,2935.465
2025-02-18 22:00:00+00:00,2935.06,2947.08,2918.64,2933.26
2025-02-19 22:00:00+00:00,2934.175,2954.955,2924.035,2939.35
2025-02-20 22:00:00+00:00,2939.43,2949.93,2916.82,2936.05
2025-02-23 22:00:00+00:00,2939.785,2956.31,2921.475,2952.065
2025-02-24 22:00:00+00:00,2952.84,2953.425,2888.18,2915.045
2025-02-25 22:00:00+00:00,2915.835,2930.18,2890.865,2916.275
2025-02-26 22:00:00+00:00,2915.98,2920.815,2867.795,2877.195
2025-02-27 22:00:00+00:00,2876.52,2885.24,2832.72,2858.14


In [28]:
df_test_with_indicators

Unnamed: 0_level_0,Open,High,Low,Close,SMA20,RSI,MACD,MACD_signal,Bollinger_High,Bollinger_Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2025-02-16 22:00:00+00:00,2893.375,2906.525,2878.785,2898.49,,,,,,
2025-02-17 22:00:00+00:00,2898.71,2936.955,2892.07,2935.465,,,,,,
2025-02-18 22:00:00+00:00,2935.06,2947.08,2918.64,2933.26,,,,,,
2025-02-19 22:00:00+00:00,2934.175,2954.955,2924.035,2939.35,,,,,,
2025-02-20 22:00:00+00:00,2939.43,2949.93,2916.82,2936.05,,,,,,
2025-02-23 22:00:00+00:00,2939.785,2956.31,2921.475,2952.065,,,,,,
2025-02-24 22:00:00+00:00,2952.84,2953.425,2888.18,2915.045,,,,,,
2025-02-25 22:00:00+00:00,2915.835,2930.18,2890.865,2916.275,,,,,,
2025-02-26 22:00:00+00:00,2915.98,2920.815,2867.795,2877.195,,,,,,
2025-02-27 22:00:00+00:00,2876.52,2885.24,2832.72,2858.14,,,,,,


In [26]:
features_test, target_test

(Empty DataFrame
 Columns: [SMA20, RSI, MACD, MACD_signal, Bollinger_High, Bollinger_Low]
 Index: [],
 Series([], Name: Return, dtype: float64))