In [7]:
"""
These data are downloaded from Tickstore (https://www.tickstore.com/) directly as CSV files.
""";

In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# Fix the path construction - use forward slashes or raw strings
import os

# Method 1: Use forward slashes (recommended) - corrected path
path = os.path.abspath(os.path.join("..", "10_backtrader", "01_CDL_Patterns_Strategy", "data", "USA500IDXUSD_H1_raw.csv"))

# Alternative: If the file is in a different location, try this path
# path = os.path.abspath(os.path.join("..", "10_backtrader", "01_CDL_Patterns_Strategy", "01_data", "USA500IDXUSD_H1_raw.csv"))

# Check if file exists before reading
if not os.path.exists(path):
    print(f"File not found at: {path}")
    print("Available directories in 10_backtrader/01_CDL_Patterns_Strategy:")
    base_dir = os.path.abspath(os.path.join("..", "10_backtrader", "01_CDL_Patterns_Strategy"))
    if os.path.exists(base_dir):
        for item in os.listdir(base_dir):
            print(f"  - {item}")
    else:
        print("Base directory doesn't exist")
else:
    print(f"File found")

# Read the CSV file
df = pd.read_csv(path)

# lower case column names for easier access
df.columns = [col.lower() for col in df.columns]

# convert date column to string
df['date'] = df['date'].astype(str)

# split date column into YYYY-MM-DD format
df['year'] = df['date'].str.slice(0, 4).astype(int)
df['month'] = df['date'].str.slice(4, 6).astype(int)
df['day'] = df['date'].str.slice(6, 8).astype(int)

# drop date column
df.drop(columns=['date'], inplace=True)

# add date column in YYYY-MM-DD format
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])

# Set the 'date' column as the index and convert it to datetime
df.set_index('date', inplace=True)

# Handle timestamp processing - check the format first
timestamp_sample = df['timestamp'].iloc[0]

# Handle different timestamp formats
if ':' in str(timestamp_sample):
    # If timestamp is already in HH:MM format
    df['timestamp'] = df['timestamp'].astype(str)
    # Split by colon
    time_parts = df['timestamp'].str.split(':', expand=True)
    df['hour'] = time_parts[0].astype(int)
    df['minute'] = time_parts[1].astype(int)
else:
    # If timestamp is in HHMM format (no colon)
    df['timestamp'] = df['timestamp'].astype(str)
    # Ensure timestamp is zero-padded to 4 digits
    df['timestamp'] = df['timestamp'].str.zfill(4)
    df['hour'] = df['timestamp'].str.slice(0, 2).astype(int)
    df['minute'] = df['timestamp'].str.slice(2, 4).astype(int)

# drop timestamp, year, month, day columns
df.drop(columns=['timestamp', 'year', 'month', 'day'], inplace=True)

# add time column in HH:MM:SS format
df['timestamp'] = pd.to_datetime(df[['hour', 'minute']]
                        .assign(second=0)
                        .astype(str)
                        .agg(':'.join, axis=1),
                        format='%H:%M:%S').dt.time

# drop hour	minute columns
df.drop(columns=['hour', 'minute'], inplace=True)

# reorder columns to have timestamp as the first column
cols = df.columns.tolist()
cols = ['timestamp'] + [col for col in cols if col != 'timestamp']
df = df[cols]

# Show the first few rows of the dataframe
df.head()

File found


Unnamed: 0_level_0,timestamp,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-09-30,02:00:00,2969.67,2971.17,2968.17,2969.67,1.64595
2019-09-30,03:00:00,2969.87,2975.67,2969.67,2973.67,2.23279
2019-09-30,04:00:00,2973.869,2977.17,2973.17,2975.37,0.99898
2019-09-30,05:00:00,2975.67,2976.17,2974.369,2975.67,0.6973
2019-09-30,06:00:00,2975.67,2975.67,2971.77,2972.17,0.68437
