<a href="https://colab.research.google.com/github/sharabhsinghi/ml-crash-course/blob/main/EURUSD_predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [148]:
!pip install yfinance



In [149]:
# For data manipulation
import numpy as np
import pandas as pd

# To fetch financial data
import yfinance as yf

In [150]:
# Set the ticker as 'EURUSD=X'
forex_data = yf.download('EURUSD=X',
                        # start='2019-01-02',
                        #  end='2021-12-31'
                         )

# Set the index to a datetime object
forex_data.index = pd.to_datetime(forex_data.index)

# Display the last five rows
forex_data.head()

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,EURUSD=X,EURUSD=X,EURUSD=X,EURUSD=X,EURUSD=X
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2003-12-01,1.196501,1.204007,1.194401,1.203398,0
2003-12-02,1.208897,1.210903,1.1946,1.196101,0
2003-12-03,1.212298,1.213003,1.2077,1.209,0
2003-12-04,1.208094,1.214403,1.204398,1.212004,0
2003-12-05,1.218695,1.219096,1.206593,1.207802,0


In [151]:
# Change forex_data columns from multiindex to regular
forex_data.columns = [col[0] for col in forex_data.columns]

forex_data.drop(columns=['Volume'], inplace=True)



In [152]:
# Feature calculation functions

# Calculate Moving Average (MA)
def calculate_ma(data, period):
    return data['Close'].rolling(window=period).mean()

# Calculate Bollinger Bands (BB)
def calculate_bb(data, period, num_std):
    rolling_mean = data['Close'].rolling(window=period).mean()
    rolling_std = data['Close'].rolling(window=period).std()
    upper_band = rolling_mean + (rolling_std * num_std)
    lower_band = rolling_mean - (rolling_std * num_std)
    return upper_band, lower_band

# Calculate Average True Range (ATR)
def calculate_atr(data, period):
    high_low = data['High'] - data['Low']
    high_close_prev = abs(data['High'] - data['Close'].shift(1))
    low_close_prev = abs(data['Low'] - data['Close'].shift(1))
    tr = pd.concat([high_low, high_close_prev, low_close_prev], axis=1).max(axis=1)
    return tr.rolling(window=period).mean()

# Calculate Relative Strength Index (RSI)
def calculate_rsi(data, period):
    delta = data['Close'].diff(1)
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    ma_up = up.rolling(window=period).mean()
    ma_down = down.rolling(window=period).mean()
    rsi = 100 - (100 / (1 + ma_up / ma_down))
    return rsi

# Calculate Stochastic Oscillator (%K and %D)
def calculate_stochastic(data, period_k, period_d):
    low_min = data['Low'].rolling(window=period_k).min()
    high_max = data['High'].rolling(window=period_k).max()
    k = 100 * (data['Close'] - low_min) / (high_max - low_min)
    d = k.rolling(window=period_d).mean()
    return k, d

In [153]:
# Add feature columns

forex_data['MA_20'] = calculate_ma(forex_data, 20)
forex_data['MA_50'] = calculate_ma(forex_data, 50)

forex_data['BB_Upper_20'], forex_data['BB_Lower_20'] = calculate_bb(forex_data, 20, 2)

forex_data['ATR_14'] = calculate_atr(forex_data, 14)

forex_data['RSI_14'] = calculate_rsi(forex_data, 14)

forex_data['Stochastic_K_14'], forex_data['Stochastic_D_3'] = calculate_stochastic(forex_data, 14, 3)

In [154]:
## IMPORTANT STEP
# Create Target variable based on Next 5 Days Average Close

forex_data['Avg_Close_Next_7_Days'] = forex_data['Close'].rolling(window=7, min_periods=1).mean().shift(-7)

forex_data['Target'] = np.where(forex_data['Avg_Close_Next_7_Days'] > forex_data['Close'], 1, 0)

# Display the last 15 rows
forex_data.tail(15)

Unnamed: 0_level_0,Close,High,Low,Open,MA_20,MA_50,BB_Upper_20,BB_Lower_20,ATR_14,RSI_14,Stochastic_K_14,Stochastic_D_3,Avg_Close_Next_7_Days,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2025-02-26,1.05241,1.052742,1.047647,1.05241,1.040968,1.038518,1.055741,1.026195,0.007443,61.481332,98.074299,82.245084,1.057038,1
2025-02-27,1.048878,1.048988,1.040485,1.048878,1.041286,1.038505,1.056467,1.026104,0.007714,59.251459,83.101737,84.216599,1.06235,1
2025-02-28,1.039512,1.042101,1.038184,1.039512,1.041275,1.038595,1.056461,1.026088,0.007862,57.242112,43.401785,74.859274,1.068697,1
2025-03-03,1.041385,1.050023,1.039047,1.041385,1.042105,1.038693,1.05516,1.02905,0.008222,59.59788,45.446717,57.316746,1.075844,1
2025-03-04,1.048504,1.055955,1.047175,1.048504,1.042893,1.038797,1.055462,1.030325,0.008591,60.12711,58.67126,49.173254,1.081576,1
2025-03-05,1.062699,1.078586,1.060288,1.062699,1.044164,1.039239,1.059235,1.029094,0.010163,66.600703,60.678467,54.932148,1.084842,1
2025-03-06,1.079494,1.085105,1.078249,1.079494,1.046135,1.040024,1.067815,1.024454,0.010894,70.671957,88.041799,69.130509,1.086076,1
2025-03-07,1.078795,1.088566,1.078214,1.078795,1.048146,1.0408,1.073944,1.022348,0.011309,69.015016,80.606868,76.442378,1.088398,1
2025-03-10,1.086059,1.087347,1.080625,1.086059,1.050891,1.041675,1.080483,1.021298,0.011588,71.960589,95.025188,87.891285,,0
2025-03-11,1.08394,1.093028,1.083682,1.08394,1.05358,1.042495,1.08496,1.022201,0.011682,73.320073,83.430567,86.354208,,0


In [155]:
# Get last 5 days
forex_data_latest = forex_data.tail(15)

forex_data = forex_data.iloc[:-15]

In [156]:
# Drop NaN values resulting from calculations
forex_data = forex_data.dropna()

# Select features and target
features = ['Close', 'MA_20', 'MA_50', 'BB_Upper_20', 'BB_Lower_20', 'ATR_14', 'RSI_14', 'Stochastic_K_14', 'Stochastic_D_3']
X = forex_data[features]
y = forex_data['Target']

In [157]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Fit the scaler to the features and transform them
X_normalized = scaler.fit_transform(X)

# Convert the normalized features back to a DataFrame (optional)
X_normalized_df = pd.DataFrame(X_normalized, columns=features)

In [158]:
from sklearn.model_selection import train_test_split

# Randomize the data and split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

In [159]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [160]:
from sklearn.metrics import classification_report

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.80      0.81       573
           1       0.79      0.82      0.80       520

    accuracy                           0.81      1093
   macro avg       0.81      0.81      0.81      1093
weighted avg       0.81      0.81      0.81      1093



In [171]:
test_prob = model.predict_proba(X_test)

In [180]:
y_pred_thresholded = (test_prob[:, 1] >= 0.4).astype(int)
print(classification_report(y_test, y_pred_thresholded))

              precision    recall  f1-score   support

           0       0.90      0.60      0.72       573
           1       0.68      0.93      0.78       520

    accuracy                           0.75      1093
   macro avg       0.79      0.76      0.75      1093
weighted avg       0.79      0.75      0.75      1093



In [178]:
test_data = pd.DataFrame(X_test, columns=features)
test_data['Target'] = y_test.values
test_data['Predicted'] = y_pred
test_data['Predicted_Prob_0'] = test_prob[:, 0]
test_data['Predicted_Prob_1'] = test_prob[:, 1]
test_data.index = y_test.index
test_data.head()

Unnamed: 0_level_0,Close,MA_20,MA_50,BB_Upper_20,BB_Lower_20,ATR_14,RSI_14,Stochastic_K_14,Stochastic_D_3,Target,Predicted,Predicted_Prob_0,Predicted_Prob_1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-08-11,0.216756,0.214264,0.186446,0.200793,0.217033,0.040521,0.363525,0.267302,0.241554,0,0,0.88,0.12
2008-04-11,0.974038,0.982329,0.928889,0.917374,0.978738,0.114968,0.568505,0.76517,0.817906,1,1,0.39,0.61
2006-08-15,0.49983,0.493697,0.485488,0.455465,0.500634,0.045647,0.582026,0.470715,0.478465,1,1,0.49,0.51
2012-06-12,0.450887,0.465484,0.520389,0.443596,0.457118,0.077713,0.41603,0.469065,0.649316,1,1,0.42,0.58
2006-11-01,0.494112,0.472368,0.485712,0.435619,0.479424,0.025317,0.7113,0.779481,0.896489,1,1,0.34,0.66


In [162]:
train_data = pd.DataFrame(X_train, columns=features)
train_data['Target'] = y_train.values
train_data.index = y_train.index
train_data.sort_index(inplace=True)

In [163]:
train_data.tail(20)

Unnamed: 0_level_0,Close,MA_20,MA_50,BB_Upper_20,BB_Lower_20,ATR_14,RSI_14,Stochastic_K_14,Stochastic_D_3,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2025-01-28,0.132807,0.096041,0.095745,0.081174,0.109806,0.040336,0.639821,0.670676,0.752286,0
2025-01-29,0.130301,0.096231,0.095173,0.081733,0.109584,0.040464,0.661819,0.63064,0.755259,0
2025-01-30,0.129706,0.09684,0.094543,0.082961,0.109478,0.039349,0.682865,0.621126,0.7142,0
2025-01-31,0.125331,0.097911,0.094029,0.083299,0.111251,0.036432,0.726648,0.486347,0.642706,0
2025-02-03,0.101943,0.097433,0.093264,0.083912,0.109639,0.04335,0.505409,0.112307,0.441665,1
2025-02-04,0.114395,0.096944,0.092737,0.083414,0.109199,0.046464,0.542219,0.329421,0.328523,1
2025-02-05,0.121501,0.09717,0.09249,0.083663,0.109382,0.050001,0.591504,0.453328,0.315717,1
2025-02-06,0.125889,0.097884,0.092178,0.084368,0.110049,0.049283,0.607368,0.529849,0.477665,0
2025-02-07,0.123523,0.098616,0.091568,0.084683,0.111169,0.045443,0.616861,0.488593,0.539402,1
2025-02-10,0.111929,0.099173,0.0907,0.083846,0.11318,0.041381,0.416291,0.286423,0.474666,1


In [164]:
check_test_x, check_test_y = forex_data_latest[features], forex_data_latest['Target']
check_test_x_normalized = scaler.transform(check_test_x)
check_test_y_pred = model.predict(check_test_x_normalized)

In [165]:
forex_data_latest['Predicted'] = check_test_y_pred
forex_data_latest.head(15)

Unnamed: 0_level_0,Close,High,Low,Open,MA_20,MA_50,BB_Upper_20,BB_Lower_20,ATR_14,RSI_14,Stochastic_K_14,Stochastic_D_3,Avg_Close_Next_7_Days,Target,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-02-26,1.05241,1.052742,1.047647,1.05241,1.040968,1.038518,1.055741,1.026195,0.007443,61.481332,98.074299,82.245084,1.057038,1,0
2025-02-27,1.048878,1.048988,1.040485,1.048878,1.041286,1.038505,1.056467,1.026104,0.007714,59.251459,83.101737,84.216599,1.06235,1,1
2025-02-28,1.039512,1.042101,1.038184,1.039512,1.041275,1.038595,1.056461,1.026088,0.007862,57.242112,43.401785,74.859274,1.068697,1,1
2025-03-03,1.041385,1.050023,1.039047,1.041385,1.042105,1.038693,1.05516,1.02905,0.008222,59.59788,45.446717,57.316746,1.075844,1,1
2025-03-04,1.048504,1.055955,1.047175,1.048504,1.042893,1.038797,1.055462,1.030325,0.008591,60.12711,58.67126,49.173254,1.081576,1,1
2025-03-05,1.062699,1.078586,1.060288,1.062699,1.044164,1.039239,1.059235,1.029094,0.010163,66.600703,60.678467,54.932148,1.084842,1,1
2025-03-06,1.079494,1.085105,1.078249,1.079494,1.046135,1.040024,1.067815,1.024454,0.010894,70.671957,88.041799,69.130509,1.086076,1,0
2025-03-07,1.078795,1.088566,1.078214,1.078795,1.048146,1.0408,1.073944,1.022348,0.011309,69.015016,80.606868,76.442378,1.088398,1,1
2025-03-10,1.086059,1.087347,1.080625,1.086059,1.050891,1.041675,1.080483,1.021298,0.011588,71.960589,95.025188,87.891285,,0,0
2025-03-11,1.08394,1.093028,1.083682,1.08394,1.05358,1.042495,1.08496,1.022201,0.011682,73.320073,83.430567,86.354208,,0,0


In [167]:
import datetime as dt
new_data = yf.download('EURUSD=X',
                        start=dt.datetime.now().strftime('%Y-%m-%d'),
                        #  end='2021-12-31'
                         )

# Set the index to a datetime object
new_data.index = pd.to_datetime(new_data.index)


[*********************100%***********************]  1 of 1 completed


In [168]:
new_data

Price,Close,High,Low,Open,Volume
Ticker,EURUSD=X,EURUSD=X,EURUSD=X,EURUSD=X,EURUSD=X
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2025-03-18,1.09505,1.09577,1.089681,1.092657,0
