### This script will concat the tick data and clean it for future backtest.

You can download the the Monthly tick data directly from Binance:\
https://data.binance.vision/?prefix=data/futures/um/monthly/trades/

Save downloaded data to:\
`/crypto_hedge_scalping_bot/backtest/raw_data`

In [None]:
import os
import pandas as pd 

In [None]:

# Get the path of the directory containing the currently running script:
dir = os.getcwd()

In [None]:
data = pd.DataFrame()

# Walk through the directory tree rooted at dir:
for root, directories, files in os.walk(dir+f'/raw_data'):
    # Iterate through each file in the current directory:
    for filename in files:
        # Construct the full path to the file:
        file_path = os.path.join(root, filename)
    
        # Get the asset name:
        path_parts = file_path.split('/')
        # Get the last part of the path which contains the filename
        filename = path_parts[-1]
        # Extract the asset name from the filename
        asset = filename.split('-')[0]

        # Concat monthly data:
        data_1 = pd.read_csv(file_path,usecols=[1,4]) 
        data_1.columns = ['price','time'] 
        data_1['time'] = pd.to_datetime(data_1['time'],unit='ms') 
        data_1['price'] = data_1['price'].astype('float32') 
        data = pd.concat([data,data_1])

# Cleaning the data - delete duplicates in tick data, because we need the price, not the volume:
data = data.sort_values(by='time')
data = data.loc[data['price'].ne(data['price'].shift())]
data.reset_index(drop=True,inplace=True)

# Writing to feather format: 
data.reset_index(drop=True,inplace=True)
data.to_feather(dir+f'/cleaned_data/{asset}.feather')

In [None]:
# Writing to other different formats if needed:
# data.to_csv(dir+f'/cleaned_data/{asset}.csv',index=False) 
# data.to_parquet(dir+f'/cleaned_data/{asset}.parquet') 
# data.to_hdf(dir+f'/cleaned_data/{asset}.hdf',key='data')

In [None]:
# Double Check Data:
check = pd.read_feather(dir+f'/cleaned_data/{asset}.feather') 
print(check.head()) 
print(check.tail(),'\n')
print(check.info())