In [5]:
import yfinance as yf
import pandas as pd
import numpy as np
import ta  # pip install ta

# 1. Data Collection using an alternative ticker ("GLD")
ticker = "GLD"  # Gold ETF ticker on Yahoo Finance
data = yf.download(ticker, period="5y", interval="1d")

if data.empty:
    raise ValueError("No data was retrieved. Please check the ticker or your internet connection.")

print("Raw Data Head:")
print(data.head())

# 2. Feature Engineering: Compute key technical indicators

# Simple Moving Average (20-day)
data['SMA20'] = data['Close'].rolling(window=20).mean()

# Relative Strength Index (RSI) with a 14-day window
data['RSI'] = ta.momentum.rsi(data['Close'], window=14)

# MACD (Moving Average Convergence Divergence)
macd = ta.trend.MACD(data['Close'])
data['MACD'] = macd.macd()
data['MACD_signal'] = macd.macd_signal()

# Bollinger Bands (20-day window with 2 standard deviations)
bollinger = ta.volatility.BollingerBands(data['Close'], window=20, window_dev=2)
data['Bollinger_High'] = bollinger.bollinger_hband()
data['Bollinger_Low'] = bollinger.bollinger_lband()

# 3. Create the Target: Next Day Return
# The raw OHLC data is used to compute these indicators and the target;
# however, they don't necessarily have to be used as features.
data['Return'] = data['Close'].pct_change().shift(-1)

# Drop rows with missing values (due to rolling calculations and shifting)
data.dropna(inplace=True)

# 4. Prepare the Feature Set
# We'll use the computed technical indicators as our features.
features = data[['SMA20', 'RSI', 'MACD', 'MACD_signal', 'Bollinger_High', 'Bollinger_Low']]
target = data['Return']

# Optional: inspect the prepared features and target
print("\nFeatures Head:")
print(features.head())
print("\nTarget (Next Day Return) Head:")
print(target.head())

# Now, 'features' and 'target' are ready for use in a regression task to predict
# the magnitude of the next day's price change.


Failed to get ticker 'GLD' reason: Expecting value: line 1 column 1 (char 0)
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['GLD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


ValueError: No data was retrieved. Please check the ticker or your internet connection.

In [6]:
"b5c2099919a2a704815b709fba4f167d-210831ef47a2e4f07292f3cefea33b5d"

'b5c2099919a2a704815b709fba4f167d-210831ef47a2e4f07292f3cefea33b5d'

In [7]:
import oandapyV20
from oandapyV20 import API


ModuleNotFoundError: No module named 'oandapyV20'

In [8]:
!pip install oandapyV20

    torch (>=1.9.*)
           ~~~~~~^

Collecting oandapyV20
  Downloading oandapyV20-0.7.2.tar.gz (51 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: oandapyV20
  Building wheel for oandapyV20 (setup.py): started
  Building wheel for oandapyV20 (setup.py): finished with status 'done'
  Created wheel for oandapyV20: filename=oandapyV20-0.7.2-py3-none-any.whl size=69848 sha256=0651cbcefff2385e95c5facd91bc3b85cb0891b8c59b3a82bcc2a08bcc56091f
  Stored in directory: c:\users\shashank\appdata\local\pip\cache\wheels\a6\a4\5b\f3a35a028b0e2fe8e5dc29f03bd9acaed72b5ae9775d14fb38
Successfully built oandapyV20
Installing collected packages: oandapyV20
Successfully installed oandapyV20-0.7.2



    torch (>=1.9.*)
           ~~~~~~^

[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import oandapyV20
from oandapyV20 import API
import oandapyV20.endpoints.instruments as instruments
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import ta  # pip install ta

# --- SETUP: OANDA API Credentials ---
account_id = "101-011-29597843-001"        # Replace with your account ID
access_token = "b5c2099919a2a704815b709fba4f167d-210831ef47a2e4f07292f3cefea33b5d"      # Replace with your OANDA API access token
client = API(access_token=access_token)

# --- Define the time range for the past 5 years ---
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=5*365)
start_str = start_date.strftime("%Y-%m-%dT00:00:00Z")
end_str = end_date.strftime("%Y-%m-%dT00:00:00Z")

# --- Set Parameters for the Request ---
params = {
    "from": start_str,
    "to": end_str,
    "granularity": "D",  # Daily candles
    "price": "M"         # Use mid prices
}

# --- Request Historical Data for XAU/USD ---
instrument = "XAU_USD"
r = instruments.InstrumentsCandles(instrument=instrument, params=params)
client.request(r)
candles = r.response.get('candles', [])

# --- Convert Retrieved Data to a DataFrame ---
data = []
for candle in candles:
    if candle["complete"]:
        time = candle["time"]
        o = float(candle["mid"]["o"])
        h = float(candle["mid"]["h"])
        l = float(candle["mid"]["l"])
        c = float(candle["mid"]["c"])
        data.append([time, o, h, l, c])
        
df = pd.DataFrame(data, columns=["Date", "Open", "High", "Low", "Close"])
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)
print("Data Head from OANDA:")
print(df.head())

# --- Feature Engineering: Compute Technical Indicators ---
# 20-day Simple Moving Average
df['SMA20'] = df['Close'].rolling(window=20).mean()

# 14-day Relative Strength Index
df['RSI'] = ta.momentum.rsi(df['Close'], window=14)

# MACD and MACD Signal
macd = ta.trend.MACD(df['Close'])
df['MACD'] = macd.macd()
df['MACD_signal'] = macd.macd_signal()

# Bollinger Bands (20-day window, 2 std deviations)
bollinger = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['Bollinger_High'] = bollinger.bollinger_hband()
df['Bollinger_Low'] = bollinger.bollinger_lband()

# --- Create the Target: Next Day Return ---
df['Return'] = df['Close'].pct_change().shift(-1)

# --- Drop Rows with Missing Values ---
df.dropna(inplace=True)

# --- Prepare the Feature Set ---
features = df[['SMA20', 'RSI', 'MACD', 'MACD_signal', 'Bollinger_High', 'Bollinger_Low']]
target = df['Return']

# --- Inspect the Prepared Data ---
print("\nFeatures Head:")
print(features.head())
print("\nTarget (Next Day Return) Head:")
print(target.head())


Data Head from OANDA:
                               Open      High       Low     Close
Date                                                             
2020-03-19 21:00:00+00:00  1470.276  1516.153  1455.320  1499.094
2020-03-22 21:00:00+00:00  1501.337  1561.228  1482.823  1553.114
2020-03-23 21:00:00+00:00  1553.232  1634.440  1552.933  1627.730
2020-03-24 21:00:00+00:00  1629.657  1640.228  1596.376  1617.351
2020-03-25 21:00:00+00:00  1615.513  1644.433  1597.794  1630.183

Features Head:
                                SMA20        RSI       MACD  MACD_signal  \
Date                                                                       
2020-05-06 21:00:00+00:00  1706.59600  60.359187  23.243456    30.064837   
2020-05-07 21:00:00+00:00  1707.54025  56.449796  21.598977    28.371665   
2020-05-10 21:00:00+00:00  1706.75590  55.090119  19.692847    26.635901   
2020-05-11 21:00:00+00:00  1705.52015  56.242222  18.352182    24.979158   
2020-05-12 21:00:00+00:00  1705.57055  59.65

In [10]:
df.shape

(1255, 11)

In [11]:
features.tail()

Unnamed: 0_level_0,SMA20,RSI,MACD,MACD_signal,Bollinger_High,Bollinger_Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-03-10 21:00:00+00:00,2911.6556,57.750335,23.492875,31.566524,2957.323367,2865.987833
2025-03-11 21:00:00+00:00,2913.1381,60.74738,24.03103,30.059425,2959.627974,2866.648226
2025-03-12 21:00:00+00:00,2916.15635,68.213764,28.573048,29.76215,2972.905617,2859.407083
2025-03-13 21:00:00+00:00,2921.27775,67.266959,31.504053,30.110531,2983.198545,2859.356955
2025-03-16 21:00:00+00:00,2926.40975,69.237476,34.735308,31.035486,2996.411573,2856.407927
