In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm

In [None]:
ROOT_PATH = '/content/drive/MyDrive/MBAn/Sensorimotor/project'
directory = ROOT_PATH + '/data/'

# path to the files
files_tags = os.listdir(directory) #these are the differents pdf files

#this is here because hidden files are also shown in the list. 
for file in files_tags:
    if file[0] == '.':
        files_tags.remove(file)
    elif file[-4:] != '.csv':
        files_tags.remove(file)
stock_name = [file.split('USDT')[0] for file in files_tags]
stocks = [file for file in files_tags]
print(len(stock_name) == len(stocks))
print('There are {} different stocks.'.format(len(stock_name)))

True
There are 16 different stocks.


In [None]:
stock_name

['ADA',
 'ATOM',
 'AVAX',
 'BNB',
 'BTC',
 'DOGE',
 'DOT',
 'ETH',
 'LINK',
 'LTC',
 'MATIC',
 'SHIB',
 'SOL',
 'TRX',
 'UNI',
 'XRP']

In [None]:
df = pd.read_csv(directory + stocks[0])
df.head()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,open_time_ms,close_time_ms
0,1640995200000,1.308,1.318,1.307,1.314,1209224.3,1640995499999,2022-01-01 00:00:00,2022-01-01 00:04:59.999
1,1640995500000,1.315,1.32,1.313,1.316,298386.5,1640995799999,2022-01-01 00:05:00,2022-01-01 00:09:59.999
2,1640995800000,1.315,1.318,1.313,1.317,389329.2,1640996099999,2022-01-01 00:10:00,2022-01-01 00:14:59.999
3,1640996100000,1.317,1.318,1.315,1.317,221077.2,1640996399999,2022-01-01 00:15:00,2022-01-01 00:19:59.999
4,1640996400000,1.316,1.322,1.316,1.322,189448.2,1640996699999,2022-01-01 00:20:00,2022-01-01 00:24:59.999


In [None]:
def get_12_returns(df):
    rt = df['close'].pct_change().to_list()
    rt_12 = []

    for i in range(12):
        rt_12.append(np.nan)

    for i in range(len(df)-12):
        rt_12.append(rt[i:i+12])

    df['rt_12'] = rt_12
    return df

In [None]:
# store all the stock data
stocks_data = {}
for i, stock in enumerate(stocks):
    df = pd.read_csv(directory + stock)
    df['close_time_ms'] = pd.to_datetime(df['close_time_ms'])
    df['open_time_ms'] = pd.to_datetime(df['open_time_ms'])
    # drop columns
    df = df.drop(columns=['open_time', 'close_time'])

    stocks_data[stock_name[i]] = df

In [None]:
stocks_data['BTC']

Unnamed: 0,open,high,low,close,volume,open_time_ms,close_time_ms
0,46216.93,46391.49,46208.37,46321.34,185.67558,2022-01-01 00:00:00,2022-01-01 00:04:59.999
1,46321.34,46527.26,46280.00,46371.11,123.43577,2022-01-01 00:05:00,2022-01-01 00:09:59.999
2,46369.79,46394.00,46276.22,46332.51,77.54574,2022-01-01 00:10:00,2022-01-01 00:14:59.999
3,46332.52,46332.52,46236.27,46293.90,101.14315,2022-01-01 00:15:00,2022-01-01 00:19:59.999
4,46295.42,46421.27,46286.25,46395.53,135.32479,2022-01-01 00:20:00,2022-01-01 00:24:59.999
...,...,...,...,...,...,...,...
127995,28006.90,28081.96,27955.00,28058.54,2393.21134,2023-03-21 10:15:00,2023-03-21 10:19:59.999
127996,28059.92,28177.00,28037.79,28101.02,3460.08993,2023-03-21 10:20:00,2023-03-21 10:24:59.999
127997,28098.29,28167.54,28024.69,28044.52,2550.36149,2023-03-21 10:25:00,2023-03-21 10:29:59.999
127998,28044.52,28085.95,28018.44,28049.77,1776.41783,2023-03-21 10:30:00,2023-03-21 10:34:59.999


In [None]:
def get_features(feature):
    """
    feature = 'open', 'close', ' high', 'low', or 'volume'
    """

    df_close = stocks_data[stock_name[0]][['open_time_ms',feature]]
    df_close = df_close.rename(columns={feature:f'{stock_name[0]}_{feature}'})

    for stock in stock_name[1:]:
        df_a = stocks_data[stock][['open_time_ms',feature]]
        df_a = df_a.rename(columns={feature:f'{stock}_{feature}'})
        df_close = df_close.merge(df_a, on='open_time_ms')
    return df_close

In [None]:
df_open = get_features('open')
df_close = get_features('close')
df_volume = get_features('volume')

In [None]:
df_open['USD_open'] = np.ones(len(df_open))
df_close['USD_close'] = np.ones(len(df_close))
df_volume['USD_volume'] = np.zeros(len(df_volume))

In [None]:
df_open.to_csv(directory+'df_open.csv', index=False)
df_close.to_csv(directory+'df_close.csv', index=False)
df_close.to_csv(directory+'df_volume.csv', index=False)