In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import pickle as pkl

In [2]:
with open('/Users/Codes/Bitcoin_trader/data/btcusd_15min.pkl', 'rb') as f:
    df = pkl.load(f)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: '/Users/Codes/Bitcoin_trader/data/btcusd_15min.pkl'

In [None]:
df.shape

(450018, 6)

In [None]:
def calculate_macd(df, fast=12, slow=26, signal=9):
    df['EMA_fast'] = df['close'].ewm(span=fast, adjust=False).mean()
    df['EMA_slow'] = df['close'].ewm(span=slow, adjust=False).mean()
    df['MACD'] = df['EMA_fast'] - df['EMA_slow']
    df['Signal_Line'] = df['MACD'].ewm(span=signal, adjust=False).mean()
    df['MACD_Histogram'] = df['MACD'] - df['Signal_Line']
    df.drop(columns=['EMA_fast', 'EMA_slow', 'MACD', 'Signal_Line'], inplace=True)
    return df


In [None]:
def bolinger_bands(df, window=20, std=2):
    df['SMA'] = df['close'].rolling(window=window).mean()
    df['BB_up'] = df['SMA'] + (df['close'].rolling(window=window).std() * std)
    df['BB_down'] = df['SMA'] - (df['close'].rolling(window=window).std() * std)
    return df

In [None]:
def momentum_osilator(df, window=10):
    df['pmo'] = (df['close']/df['close'].shift(window) - 1) * 100
    return df

In [None]:
def calculate_rsi(df, window=14):
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    return df


In [None]:
def on_balance_volume(df):
    df['OBV'] = np.where(df['close'] > df['close'].shift(1), df['Volume'], np.where(df['close'] < df['close'].shift(1), -df['Volume'], 0)).cumsum()
    return df

### since price a has changed multiple folds, volume has also decreaed , so . . 

In [None]:
def comodiity_channel_index(df, window=14):
    df['CCI'] = (df['close'] - df['close'].rolling(window=window).mean()) / (0.015 * df['close'].rolling(window=window).std())
    return df

In [None]:
def resistance(df):
    df['Pivot'] = (df['high'] + df['low'] + df['close']) / 3
    df['R1'] = 2 * df['Pivot'] - df['low']
    df['R2'] = df['Pivot'] + (df['high'] - df['low'])
    df['R3'] = df['high'] + 2 * (df['Pivot'] - df['low'])
    df['S1'] = 2 * df['Pivot'] - df['high']
    df['S2'] = df['Pivot'] - (df['high'] - df['low'])
    df['S3'] = df['low'] - 2 * (df['high'] - df['Pivot'])
    df.drop(columns=['Pivot'], inplace=True)
    return df

In [None]:
def adding_features(df):
    df = calculate_macd(df)
    df = bolinger_bands(df)
    df = calculate_rsi(df)
    df = on_balance_volume(df)
    df = comodiity_channel_index(df)
    df = momentum_osilator(df)
    df = resistance(df)
    return df

In [None]:
def preprocess_data(df):
    df = df.dropna()
    df = adding_features(df)
    df = df.dropna()
    return df

In [None]:
df = preprocess_data(df)

In [None]:
df[50000:50010]

Unnamed: 0,Timestamp,open,close,high,low,Volume,MACD_Histogram,SMA,BB_up,BB_down,RSI,OBV,CCI,pmo,R1,R2,R3,S1,S2,S3
56922,1376645000.0,96.7,96.6,97.19,96.6,55.76,-0.130665,97.8455,99.013016,96.677984,29.956897,39740.907789,-127.485971,-1.398387,96.993333,97.386667,97.583333,96.403333,96.206667,95.813333
56923,1376646000.0,97.33,96.86,97.38,96.65,17.952505,-0.120421,97.7785,99.012275,96.544725,35.25641,39758.860294,-81.71055,-1.102716,97.276667,97.693333,98.006667,96.546667,96.233333,95.816667
56924,1376647000.0,97.35,97.38,97.38,97.35,10.297823,-0.07244,97.7245,98.926822,96.522178,42.885375,39769.158117,-9.101921,-0.571779,97.39,97.4,97.42,97.36,97.34,97.33
56925,1376648000.0,97.38,97.49,97.49,96.91,27.563372,-0.029321,97.6745,98.824831,96.524169,47.798742,39796.721489,7.925819,-0.469627,97.683333,97.876667,98.263333,97.103333,96.716667,96.523333
56926,1376649000.0,97.49,97.5,97.5,97.49,2.296323,0.002176,97.6265,98.717405,96.535595,44.789357,39799.017812,14.626674,0.20555,97.503333,97.506667,97.513333,97.493333,97.486667,97.483333
56927,1376650000.0,97.54,97.94,97.95,97.07,183.176803,0.052173,97.5995,98.626309,96.572691,50.0,39982.194615,78.077596,0.163633,98.236667,98.533333,99.116667,97.356667,96.773333,96.476667
56928,1376651000.0,97.94,97.5,97.94,97.5,6.43144,0.054867,97.5515,98.495339,96.607661,45.895522,39975.763175,20.271782,-0.286357,97.793333,98.086667,98.233333,97.353333,97.206667,96.913333
56929,1376652000.0,97.5,96.67,97.5,96.52,402.55875,0.002757,97.473,98.436679,96.509321,39.644013,39573.204425,-91.881869,-0.453094,97.273333,97.876667,98.253333,96.293333,95.916667,95.313333
56930,1376653000.0,97.14,97.14,97.14,97.14,1.980029,0.001985,97.425,98.352123,96.497877,48.666667,39575.184453,-18.793338,0.496586,97.14,97.14,97.14,97.14,97.14,97.14
56931,1376654000.0,97.14,97.4,97.4,97.0,27.82,0.020025,97.41,98.328053,96.491947,46.712803,39603.004453,25.987337,0.113064,97.533333,97.666667,97.933333,97.133333,96.866667,96.733333


In [None]:

def calculate_future_price(df, window=3):
    def weight_average(x):
        return np.dot(x, [0.45, 0.3, 0.25])
    df['future_price'] = df['close'].rolling(window=window).apply(weight_average, raw=True).shift(-window)
    return df

In [None]:
df = calculate_future_price(df)

In [None]:
df[50000:50010]

Unnamed: 0,Timestamp,open,close,high,low,Volume,MACD_Histogram,SMA,BB_up,BB_down,...,OBV,CCI,pmo,R1,R2,R3,S1,S2,S3,future_price
56922,1376645000.0,96.7,96.6,97.19,96.6,55.76,-0.130665,97.8455,99.013016,96.677984,...,39740.907789,-127.485971,-1.398387,96.993333,97.386667,97.583333,96.403333,96.206667,95.813333,97.282
56923,1376646000.0,97.33,96.86,97.38,96.65,17.952505,-0.120421,97.7785,99.012275,96.544725,...,39758.860294,-81.71055,-1.102716,97.276667,97.693333,98.006667,96.546667,96.233333,95.816667,97.516
56924,1376647000.0,97.35,97.38,97.38,97.35,10.297823,-0.07244,97.7245,98.926822,96.522178,...,39769.158117,-9.101921,-0.571779,97.39,97.4,97.42,97.36,97.34,97.33,97.529333
56925,1376648000.0,97.38,97.49,97.49,96.91,27.563372,-0.029321,97.6745,98.824831,96.524169,...,39796.721489,7.925819,-0.469627,97.683333,97.876667,98.263333,97.103333,96.716667,96.523333,97.482667
56926,1376649000.0,97.49,97.5,97.5,97.49,2.296323,0.002176,97.6265,98.717405,96.535595,...,39799.017812,14.626674,0.20555,97.503333,97.506667,97.513333,97.493333,97.486667,97.483333,97.426
56927,1376650000.0,97.54,97.94,97.95,97.07,183.176803,0.052173,97.5995,98.626309,96.572691,...,39982.194615,78.077596,0.163633,98.236667,98.533333,99.116667,97.356667,96.773333,96.476667,97.166667
56928,1376651000.0,97.94,97.5,97.94,97.5,6.43144,0.054867,97.5515,98.495339,96.607661,...,39975.763175,20.271782,-0.286357,97.793333,98.086667,98.233333,97.353333,97.206667,96.913333,97.046667
56929,1376652000.0,97.5,96.67,97.5,96.52,402.55875,0.002757,97.473,98.436679,96.509321,...,39573.204425,-91.881869,-0.453094,97.273333,97.876667,98.253333,96.293333,95.916667,95.313333,97.232
56930,1376653000.0,97.14,97.14,97.14,97.14,1.980029,0.001985,97.425,98.352123,96.497877,...,39575.184453,-18.793338,0.496586,97.14,97.14,97.14,97.14,97.14,97.14,97.284667
56931,1376654000.0,97.14,97.4,97.4,97.0,27.82,0.020025,97.41,98.328053,96.491947,...,39603.004453,25.987337,0.113064,97.533333,97.666667,97.933333,97.133333,96.866667,96.733333,97.234


In [None]:
df.to_parquet('/Users/Codes/Bitcoin_trader/data/btcusd_15min_preprcd.parquet')