In [2]:

import pandas as pd
import numpy as np

# Load the raw data
df = pd.read_csv("/content/eurusd_fx.csv")

# Drop the first two non-data rows
df = df.drop([0, 1]).reset_index(drop=True)

# Rename 'Price' to 'Date' and convert to datetime
df.rename(columns={'Price': 'Date'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Convert columns to numeric
cols_to_numeric = ['Close', 'High', 'Low', 'Open', 'Volume']
for col in cols_to_numeric:
    df[col] = pd.to_numeric(df[col], errors='coerce')


In [3]:

# Drop duplicates and missing values
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)


In [4]:

# Remove outliers using IQR
def remove_outliers_iqr(dataframe, columns):
    for col in columns:
        Q1 = dataframe[col].quantile(0.25)
        Q3 = dataframe[col].quantile(0.75)
        IQR = Q3 - Q1
        lower = Q1 - 1.5 * IQR
        upper = Q3 + 1.5 * IQR
        dataframe = dataframe[(dataframe[col] >= lower) & (dataframe[col] <= upper)]
    return dataframe

df = remove_outliers_iqr(df, ['Close', 'High', 'Low', 'Open'])


In [5]:

# Save the cleaned dataset
df.to_csv("eurusd_fx_cleaned.csv", index=False)
print("Cleaned dataset saved as 'eurusd_fx_cleaned.csv'")


Cleaned dataset saved as 'eurusd_fx_cleaned.csv'


In [6]:
cdf=pd.read_csv("/content/eurusd_fx_cleaned.csv")
cdf.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-01,1.122083,1.122838,1.115947,1.122083,0
1,2020-01-02,1.122083,1.122712,1.116682,1.121894,0
2,2020-01-03,1.117144,1.118068,1.11257,1.117081,0
3,2020-01-06,1.116196,1.120825,1.11581,1.116246,0
4,2020-01-07,1.119799,1.119946,1.113487,1.119583,0
