# <span style="color:#ff5f27;">🛠 BTC timeseries Feature Engineering</span>
---

### <span style="color:#ff5f27;"> 📝 Imports</span>

In [1]:
import pandas as pd
import numpy as np


import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import datetime

### <span style="color:#ff5f27;"> 💽 Loading Data</span>

In [2]:
df = pd.read_csv("data/btc_raw_600.csv", index_col=0, parse_dates=["date"])

df.head()

Unnamed: 0,date,open,high,low,close,volume,quote_av,trades,tb_base_av,tb_quote_av,unix
0,2021-01-02,29331.7,33300.0,28946.53,32178.33,129993.873362,4073842000.0,2245922,67446.305246,2110335000.0,1609542000000
1,2021-01-03,32176.45,34778.11,31962.99,33000.05,120957.56675,4057598000.0,2369698,59750.332871,2004428000.0,1609628400000
2,2021-01-04,33000.05,33600.0,28130.0,31988.71,140899.88569,4429010000.0,2642408,69088.46923,2173435000.0,1609714800000
3,2021-01-05,31989.75,34360.0,29900.0,33949.53,116049.997038,3743617000.0,2526851,59691.754755,1927195000.0,1609801200000
4,2021-01-06,33949.53,36939.21,33288.0,36769.36,127139.20131,4431954000.0,2591783,63052.914652,2199632000.0,1609887600000


In [3]:
# Because we have tweets(from Kaggle dataset) for this specific timerange, we will cut redundant btc data pieces.
df = df[(df.date >= '2021-02-05 10:00:00') & (df.date <= '2022-06-04 23:00:00')] # hardcoded timerange because of tweets timeranges

df = df.reset_index(drop=True)

In [4]:
df

Unnamed: 0,date,open,high,low,close,volume,quote_av,trades,tb_base_av,tb_quote_av,unix
0,2021-02-06,38289.32,40955.51,38215.94,39186.94,98757.311183,3.922095e+09,2291646,52015.513362,2.065181e+09,1612566000000
1,2021-02-07,39181.01,39700.00,37351.00,38795.69,84363.679763,3.256521e+09,1976357,40764.388959,1.574483e+09,1612652400000
2,2021-02-08,38795.69,46794.45,37988.89,46374.87,138597.536914,5.881537e+09,3230961,72345.891568,3.069314e+09,1612738800000
3,2021-02-09,46374.86,48142.19,44961.09,46420.42,115499.861712,5.386255e+09,3119034,57429.564347,2.679451e+09,1612825200000
4,2021-02-10,46420.42,47310.00,43727.00,44807.58,97154.182200,4.431650e+09,2891592,47971.985731,2.190231e+09,1612911600000
...,...,...,...,...,...,...,...,...,...,...,...
479,2022-05-31,31734.23,32399.00,31200.01,31801.04,62433.116320,1.981259e+09,1161340,30046.955110,9.535746e+08,1653948000000
480,2022-06-01,31801.05,31982.97,29301.00,29805.83,103395.633820,3.171191e+09,1404611,47614.812050,1.460767e+09,1654034400000
481,2022-06-02,29805.84,30689.00,29594.55,30452.62,56961.429280,1.711653e+09,1086183,28555.066070,8.581935e+08,1654120800000
482,2022-06-03,30452.63,30699.00,29282.36,29700.21,54067.447270,1.615617e+09,993769,26583.251410,7.943548e+08,1654207200000


In [5]:
df.shape

(484, 11)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 484 entries, 0 to 483
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         484 non-null    datetime64[ns]
 1   open         484 non-null    float64       
 2   high         484 non-null    float64       
 3   low          484 non-null    float64       
 4   close        484 non-null    float64       
 5   volume       484 non-null    float64       
 6   quote_av     484 non-null    float64       
 7   trades       484 non-null    int64         
 8   tb_base_av   484 non-null    float64       
 9   tb_quote_av  484 non-null    float64       
 10  unix         484 non-null    int64         
dtypes: datetime64[ns](1), float64(8), int64(2)
memory usage: 41.7 KB


---

# <span style="color:#ff5f27;">🛠 Feature Engineering</span>

### <span style="color:#ff5f27;"> 📌 Functions Building</span>

In [7]:
def moving_average(df,window = 7):
    df[f'mean_{window}_days'] = df['close'].rolling(window = window).mean()
    return df

def moving_std(df,window):
    df[f'std_{window}_days'] = df.close.rolling(window = window).std()
    return df

def exponential_moving_average(df, window):
    df[f'exp_mean_{window}_days'] = df.close.ewm(span = window).mean()
    return df

def exponential_moving_std(df, window):
    df[f'exp_std_{window}_days'] = df.close.ewm(span = window).std()
    return df

def momentum_price(df,window):
    '''
     It is the rate of acceleration of a security's price or volume; the speed at which the price is changing.
    '''
    df[f'momentum_{window}_days'] = df.close.diff(window)
    return df

def rate_of_change(df,window):
    '''
     Assets with higher ROC values are considered more likely to be overbought;Lower - likely to be oversold.
    '''
    M = df.close.diff(window - 1)
    N = df.close.shift(window - 1)
    df[f'rate_of_change_{window}_days'] = (M / N) * 100
    return df

def strength_index(df, period):
    '''
     It is a momentum indicator that measures the magnitude of recent price changes
     to evaluate overbought or oversold conditions in the price of a stock or other asset.
     Ranging from [0,100].
     Asset -> 70: asset deemed overbought.
     Asset -> 30: asset getting undersold & undervalued.
    '''
    delta = df.close.diff().dropna()
    u = delta * 0
    d = u.copy()
    u[delta > 0] = delta[delta > 0]
    d[delta < 0] = -delta[delta < 0]
    u[u.index[period-1]] = np.mean( u[:period] )
    u = u.drop(u.index[:(period-1)])
    d[d.index[period-1]] = np.mean( d[:period] )
    d = d.drop(d.index[:(period-1)])
    rs = u.ewm(com = period-1, adjust = False).mean() / d.ewm(com = period-1, adjust = False).mean()
    df[f'strength_index_{period}_days'] = 100 - 100 / (1 + rs)
    return df

In [8]:
def process_btc_data(df):
    df = moving_average(df,7)
    df = moving_average(df,14)
    df = moving_average(df,56).fillna(0)

    df['signal'] = np.where(df['mean_7_days'] > df['mean_56_days'], 1.0, 0.0)

    for i in [7, 14, 56]:
        for func in [moving_std, exponential_moving_average,
                     exponential_moving_std,
                     momentum_price, rate_of_change,
                     strength_index]:
            df = func(df, i).fillna(0)

    df.date = pd.to_datetime(df.date) 
    return df

### <span style="color:#ff5f27;">〽️ Applying Functions </span>

In [9]:
df_processed = process_btc_data(df)

In [10]:
df_processed.tail(3)

Unnamed: 0,date,open,high,low,close,volume,quote_av,trades,tb_base_av,tb_quote_av,...,exp_std_14_days,momentum_14_days,rate_of_change_14_days,strength_index_14_days,std_56_days,exp_mean_56_days,exp_std_56_days,momentum_56_days,rate_of_change_56_days,strength_index_56_days
481,2022-06-02,29805.84,30689.0,29594.55,30452.62,56961.42928,1711653000.0,1086183,28555.06607,858193500.0,...,1679.699391,133.39,4.286187,46.348521,5039.300314,34124.984906,5597.739238,-12991.57,-27.926222,43.640162
482,2022-06-03,30452.63,30699.0,29282.36,29700.21,54067.44727,1615617000.0,993769,26583.25141,794354800.0,...,1576.588412,499.2,0.866529,43.654773,4996.379059,33969.729641,5559.688788,-12551.8,-30.532276,43.052983
483,2022-06-04,29700.21,29988.88,29485.0,29864.04,25617.90113,760874300.0,618037,12971.7246,385358200.0,...,1472.337144,418.98,-1.419096,44.412343,4925.337066,33825.670351,5514.223362,-12889.93,-29.163058,43.222349


## <span style='color:#ff5f27'> 📥 Save the results</span>

In [11]:
df_processed.to_csv("data/btc_processed.csv")