In [1]:
ls

forex.csv  forex.ipynb  requirements.txt


In [24]:
# Importing necessary libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations
from sklearn.model_selection import train_test_split, GridSearchCV  # For splitting the data and performing hyperparameter tuning
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor  # For machine learning models
from sklearn.metrics import mean_squared_error  # For evaluating the model performance
import xgboost as xgb  # For XGBoost machine learning model
import matplotlib.pyplot as plt  # For data visualization


In [25]:
!pip install xgboost



In [30]:
# Function to load CSV data
def load_data(file_path):
    df = pd.read_csv(file_path)  # Read the CSV file into a DataFrame
    return df  # Return the DataFrame

# Load the data
file_path = 'forex.csv'  # Path to the CSV file
df = load_data(file_path)  # Load the data

# Print original column names to debug
print("Original columns:", df.columns)

# Clean the column names
df.columns = df.columns.str.strip().str.replace('"', '')

# Print cleaned column names to debug
print("Cleaned columns:", df.columns)

# Visualize the first and last five rows of the dataset
print("First five rows of the dataset:")
print(df.head())
print("\nLast five rows of the dataset:")
print(df.tail())



Original columns: Index(['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %'], dtype='object')
Cleaned columns: Index(['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %'], dtype='object')
First five rows of the dataset:
         Date   Price    Open    High     Low  Vol. Change %
0  07/19/2024  171.30  171.48  171.90  170.90   NaN   -0.08%
1  07/18/2024  171.44  170.82  171.60  169.98   NaN    0.38%
2  07/17/2024  170.79  172.59  172.86  170.69   NaN   -1.01%
3  07/16/2024  172.54  172.17  172.96  172.13   NaN    0.23%
4  07/15/2024  172.14  172.20  172.57  171.57   NaN   -0.03%

Last five rows of the dataset:
            Date   Price    Open    High     Low  Vol. Change %
6397  01/07/2000  108.44  108.67  109.03  107.74   NaN   -0.20%
6398  01/06/2000  108.66  107.65  109.31  107.14   NaN    0.99%
6399  01/05/2000  107.59  106.34  107.75  105.65   NaN    1.04%
6400  01/04/2000  106.48  104.02  106.60  103.92   NaN    2.10%
6401  01/03/2000  104.29  102.66  104.39  102.0

In [31]:
# Function to preprocess the data
def preprocess_data(df):
    print("Preprocessing data...")
    print("Before preprocessing:")
    print(df.head())
    df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y')  # Correct date format
    df.set_index('Date', inplace=True)

    df['Price_diff'] = df['Price'].diff()
    df['Open_diff'] = df['Open'].diff()
    df['High_diff'] = df['High'].diff()
    df['Low_diff'] = df['Low'].diff()
    df['Change_diff'] = df['Change %'].str.replace('%', '').astype(float).diff()

    df['Vol.'] = df['Vol.'].replace('', 0).astype(float)
    df['Vol_diff'] = df['Vol.'].diff()

    df.dropna(inplace=True)
    
    print("After preprocessing:")
    print(df.head())


    X = df[['Open', 'High', 'Low', 'Vol.', 'Price_diff', 'Open_diff', 'High_diff', 'Low_diff', 'Vol_diff', 'Change_diff']]
    y = df['Price']
   
    print("Data preprocessing completed.")
    return X, y
