<a href="https://colab.research.google.com/github/treezy254/Stocks-Market-prediction/blob/master/Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('eur_usd.csv')

In [None]:
# Drop any missing values
df.dropna(inplace=True)

# Convert the 'date' column to a datetime object
df['date'] = pd.to_datetime(df['date'])

# Create a new column that represents the direction of the price movement
df['direction'] = np.where(df['close'].shift(-1) > df['close'], 1, -1)

In [None]:
# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train_data = df[:train_size]
test_data = df[train_size:]

In [None]:
# Create input and output arrays for training the model
X_train = np.array(train_data['close']).reshape(-1, 1)
y_train = np.array(train_data['direction'])
X_test = np.array(test_data['close']).reshape(-1, 1)
y_test = np.array(test_data['direction'])

In [None]:
# Fit a linear regression model to the training data
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Make predictions on the testing set
y_pred = model.predict(X_test)

In [None]:
# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print('Mean squared error:', mse)

In [None]:
# Visualize the predicted and actual directions of the price movement
plt.plot(test_data['date'], y_test, label='Actual')
plt.plot(test_data['date'], y_pred, label='Predicted')
plt.xlabel('Date')
plt.ylabel('Direction')
plt.title('Predicted vs. Actual Direction of EUR/USD Price Movement')
plt.legend()
plt.show()

LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [None]:
df = pd.read_csv('eur_usd.csv')

In [None]:
# Drop any missing values
df.dropna(inplace=True)

# Convert the 'date' column to a datetime object
df['date'] = pd.to_datetime(df['date'])

# Create a new column that represents the direction of the price movement
df['direction'] = np.where(df['close'].shift(-1) > df['close'], 1, 0)

In [None]:
# Normalize the input data
scaler = MinMaxScaler(feature_range=(0, 1))
df['close'] = scaler.fit_transform(df['close'].values.reshape(-1,1))

In [None]:
# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train_data = df[:train_size]
test_data = df[train_size:]

In [None]:
def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X.iloc[i:(i + time_steps)].values)
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 5

X_train, y_train = create_sequences(train_data[['close']], train_data['direction'], TIME_STEPS)
X_test, y_test = create_sequences(test_data[['close']], test_data['direction'], TIME_STEPS)

In [None]:
model = Sequential()
model.add(LSTM(64, input_shape=(TIME_STEPS, 1)))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

In [None]:
y_pred = model.predict(X_test)

In [None]:
# Visualize the predicted and actual directions of the price movement
plt.plot(test_data['date'].iloc[TIME_STEPS:], y_test, label='Actual')
plt.plot(test_data['date'].iloc[TIME_STEPS:], y_pred.round(), label='Predicted')
plt.xlabel('Date')
plt.ylabel('Direction')
plt.title('Predicted vs. Actual

In [None]:
from sklearn.metrics import accuracy_score

y_pred_round = y_pred.round()
accuracy = accuracy_score(y_test, y_pred_round)
print("Accuracy: {:.2f}%".format(accuracy*100))

gradient boosting


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv("EURUSD.csv")

In [None]:
# Feature engineering
df["Returns"] = df["Close"].pct_change()
df["Direction"] = (df["Returns"] > 0).astype(int)

# Define the input features and output variable
X = df[["Open", "High", "Low", "Close"]]
y_rf = df["Direction"]
y_gb = df["Direction"].shift(-1)

# Split the dataset into training and testing sets
X_train, X_test, y_rf_train, y_rf_test, y_gb_train, y_gb_test = train_test_split(
    X, y_rf, y_gb, test_size=0.2, shuffle=False)

In [None]:
# Train and evaluate the Random Forests model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=5)
rf_model.fit(X_train, y_rf_train)
y_rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_rf_test, y_rf_pred)
print("Random Forests Accuracy: {:.2f}%".format(rf_accuracy*100))

also random forest - switch


In [None]:
# Train and evaluate the Gradient Boosted Trees model
gb_model = GradientBoostingClassifier(n_estimators=100, max_depth=5)
gb_model.fit(X_train, y_gb_train)
y_gb_pred = gb_model.predict(X_test)
gb_accuracy = accuracy_score(y_gb_test[:-1], y_gb_pred[1:])
print("Gradient Boosted Trees Accuracy: {:.2f}%".format(gb_accuracy*100))

online learning

This code loads the latest data for the EUR/USD currency pair from Yahoo Finance, preprocesses it by calculating the daily returns and direction, and scales the input features using the MinMaxScaler function from scikit-learn. It then defines an LSTM model architecture using the Sequential class from TensorFlow, compiles the model using the Adam optimizer and binary cross-entropy loss function, and trains the model on a sliding window of the data.

After training the model, the code makes a prediction for the direction of the next day's movement by feeding the most recent n_steps days of data into the model and using the output to predict the direction using a threshold of 0.5.

Note that this code is meant to demonstrate the basic steps involved in building an online machine learning model using LSTM, and should not be used for actual trading or investment purposes. Additionally, it's important to carefully evaluate the performance of the model using appropriate evaluation metrics and testing methodologies, and to take into account any potential biases or limitations in the data that may affect the model's performance.

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler

# Define the number of previous days to use for predicting the next day's movement
n_steps = 20

# Define the input features and output variable
features = ["Open", "High", "Low", "Close"]
target = "Direction"

# Load the latest data from Yahoo Finance and preprocess it
ticker = "EURUSD=X"
df = yf.download(ticker, period="1d", interval="1d")[features]
df["Returns"] = df["Close"].pct_change()
df[target] = (df["Returns"] > 0).astype(int)
df.dropna(inplace=True)
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Define the LSTM model architecture
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(n_steps, len(features))))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation="sigmoid"))

# Compile the model and define the loss function and optimizer
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Train the model on a sliding window of the data
for i in range(n_steps, len(df)):
    X = np.array([df.iloc[i-n_steps:i][features]])
    y = np.array([df.iloc[i][target]])
    model.fit(X, y, epochs=1, verbose=0)

    # Make a prediction for the next day's movement
    X_next = np.array([df.iloc[i-n_steps+1:i+1][features]])
    y_pred = model.predict(X_next)[0][0]
    y_pred_direction = 1 if y_pred > 0.5 else 0
    print("Predicted Direction: {}".format(y_pred_direction))


another

Sure, here's an example of an online learning model that predicts the change in the EUR/USD currency pair's movement for the next 5 minutes using a Gradient Boosting Regressor:

python
Copy code
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import MinMaxScaler

# Define the number of previous minutes to use for predicting the next 5 minutes' change
n_steps = 10

# Define the input features and output variable
features = ["Open", "High", "Low", "Close"]
target = "Change_5min"

# Load the latest data from Yahoo Finance and preprocess it
ticker = "EURUSD=X"
df = yf.download(ticker, period="1d", interval="1m")[features]
df[target] = df["Close"].pct_change(periods=5).shift(-5)
df.dropna(inplace=True)
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Initialize the Gradient Boosting Regressor
model = GradientBoostingRegressor(loss="ls", learning_rate=0.1, n_estimators=100, max_depth=3)

# Train the model on a sliding window of the data
for i in range(n_steps, len(df)):
    X = np.array([df.iloc[i-n_steps:i][features]])
    y = np.array([df.iloc[i][target]])
    model.fit(X, y)

    # Make a prediction for the next 5 minutes' change
    X_next = np.array([df.iloc[i-n_steps+1:i+1][features]])
    y_pred = model.predict(X_next)[0]
    print("Predicted Change in the Next 5 Minutes: {}".format(y_pred))
This code loads the latest data for the EUR/USD currency pair from Yahoo Finance, preprocesses it by calculating the change in the closing price for the next 5 minutes and scaling the input features using the MinMaxScaler function from scikit-learn. It then initializes a Gradient Boosting Regressor using the GradientBoostingRegressor class from scikit-learn, and trains the model on a sliding window of the data.

After training the model, the code makes a prediction for the change in the next 5 minutes' movement by feeding the most recent n_steps minutes of data into the model and using the output as the predicted change in the closing price.

Note that this code is meant to demonstrate the basic steps involved in building an online learning model using Gradient Boosting Regressor, and should not be used for actual trading or investment purposes. Additionally, it's important to carefully evaluate the performance of the model using appropriate evaluation metrics and testing methodologies, and to take into account any potential biases or limitations in the data that may affect the model's performance.

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import MinMaxScaler

# Define the number of previous minutes to use for predicting the next 5 minutes' change
n_steps = 10

# Define the input features and output variable
features = ["Open", "High", "Low", "Close"]
target = "Change_5min"

# Load the latest data from Yahoo Finance and preprocess it
ticker = "EURUSD=X"
df = yf.download(ticker, period="1d", interval="1m")[features]
df[target] = df["Close"].pct_change(periods=5).shift(-5)
df.dropna(inplace=True)
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Initialize the Gradient Boosting Regressor
model = GradientBoostingRegressor(loss="ls", learning_rate=0.1, n_estimators=100, max_depth=3)

# Train the model on a sliding window of the data
for i in range(n_steps, len(df)):
    X = np.array([df.iloc[i-n_steps:i][features]])
    y = np.array([df.iloc[i][target]])
    model.fit(X, y)

    # Make a prediction for the next 5 minutes' change
    X_next = np.array([df.iloc[i-n_steps+1:i+1][features]])
    y_pred = model.predict(X_next)[0]
    print("Predicted Change in the Next 5 Minutes: {}".format(y_pred))


## USING LSTM for 5 minute

This code first preprocesses the data in the same way as the previous example, and then splits it into a training set and a testing set. It then defines the LSTM model architecture using the Sequential class from Keras, which consists of two LSTM layers with 50 neurons each followed by a dense layer with a single output. The model is compiled using mean squared error as the loss function and the Adam optimizer.

The model is trained on the training set using batches of 32 and for 10 epochs. After training the model, the code evaluates its performance on the testing set using the mean squared error as the evaluation metric.

Finally, the code makes a prediction for the next 5 minutes' change by feeding the most recent n_steps minutes of data into the model and using the output as the predicted change in the closing price.

Note that like the previous example, this code is meant to demonstrate the basic steps involved in building an LSTM model for online learning, and should not be used for actual trading or investment purposes. Additionally, it's important to carefully evaluate the performance of the model using appropriate evaluation metrics

In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define the number of previous minutes to use for predicting the next 5 minutes' change
n_steps = 10

# Define the input features and output variable
features = ["Open", "High", "Low", "Close"]
target = "Change_5min"

# Load the latest data from Yahoo Finance and preprocess it
ticker = "EURUSD=X"
df = yf.download(ticker, period="1d", interval="1m")[features]
df[target] = df["Close"].pct_change(periods=5).shift(-5)
df.dropna(inplace=True)
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train_data = df.iloc[:train_size]
test_data = df.iloc[train_size:]

# Define the LSTM model architecture
model = Sequential()
model.add(LSTM(50, input_shape=(n_steps, len(features)), return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))

# Compile the model using mean squared error as the loss function and Adam optimizer
model.compile(loss='mse', optimizer='adam')

# Train the model on the training set using batches of 32 and for 10 epochs
for i in range(n_steps, train_size):
    X_train = np.array([train_data.iloc[i-n_steps:i][features]])
    y_train = np.array([train_data.iloc[i][target]])
    model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on the testing set
X_test = np.array([test_data.iloc[n_steps:][features]])
y_test = np.array([test_data.iloc[n_steps:][target]])
score = model.evaluate(X_test, y_test, batch_size=32)
print("Test set Mean Squared Error: {}".format(score))

# Make a prediction for the next 5 minutes' change
X_next = np.array([df.iloc[-n_steps:][features]])
y_pred = model.predict(X_next)[0][0]
print("Predicted Change in the Next 5 Minutes: {}".format(y_pred))

ModuleNotFoundError: ignored

visualize


This code first preprocesses the data in the same way as the previous example, and then splits it into a training set and a testing set. It then defines the LSTM model architecture using the Sequential class from Keras, which consists of two LSTM layers with 50 neurons each followed by a dense layer with a single output. The model is compiled using mean squared error as the loss function and the Adam optimizer.

The model is trained on the training set using batches of 32 and for 10 epochs. After training the model, the code evaluates its performance on the testing set using the mean squared error as the evaluation metric.

Finally,

In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# Define the number of previous minutes to use for predicting the next 5 minutes' change
n_steps = 10

# Define the input features and output variable
features = ["Open", "High", "Low", "Close"]
target = "Change_5min"

# Load the latest data from Yahoo Finance and preprocess it
ticker = "EURUSD=X"
df = yf.download(ticker, period="1d", interval="1m")[features]
df[target] = df["Close"].pct_change(periods=5).shift(-5)
df.dropna(inplace=True)
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Split the data into training and testing sets
train_size = int(len(df) * 0.8)
train_data = df.iloc[:train_size]
test_data = df.iloc[train_size:]

# Define the LSTM model architecture
model = Sequential()
model.add(LSTM(50, input_shape=(n_steps, len(features)), return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1))

# Compile the model using mean squared error as the loss function and Adam optimizer
model.compile(loss='mse', optimizer='adam')

# Train the model on the training set using batches of 32 and for 10 epochs
for i in range(n_steps, train_size):
    X_train = np.array([train_data.iloc[i-n_steps:i][features]])
    y_train = np.array([train_data.iloc[i][target]])
    model.fit(X_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on the testing set
X_test = np.array([test_data.iloc[n_steps:][features]])
y_test = np.array([test_data.iloc[n_steps:][target]])
score = model.evaluate(X_test, y_test, batch_size=32)
print("Test set Mean Squared Error: {}".format(score))

# Make a prediction for the next 5 minutes' change
X_next = np.array([df.iloc[-n_steps:][features]])
y_pred = model.predict(X_next)[0][0]
print("Predicted Change in the Next 5 Minutes: {}".format(y_pred))

# Visualize the performance of the model on the testing set
y_pred_test = model.predict(X_test)
y_pred_test = scaler.inverse_transform(y_pred_test)
y_test = scaler.inverse_transform(y_test)
plt.figure(figsize=(12,6))
plt.plot(y_test, label="Actual")
plt.plot(y_pred_test, label="Predicted")
plt.title("LSTM Model Performance")
plt.xlabel("Minutes")
plt.ylabel("Change in Closing Price")
plt.legend()
plt.show()


ModuleNotFoundError: ignored