# Kernels - Market data 

### Import

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path 
import datatable as dt

### Get data

In [4]:
filepath = Path('../../data/csv/crypto_market_data.csv')

In [5]:
df = dt.fread(filepath).to_pandas() 
df = df.T.set_index([0, 1]).T
df = df.set_index('')
df = df.astype(float)
df.index.names = ['date']
df.head(3)

Unnamed: 0_level_0,USDT-USD,USDT-USD,USDT-USD,USDT-USD,USDT-USD,USDT-USD,BTC-USD,BTC-USD,BTC-USD,BTC-USD,...,ADA-USD,ADA-USD,ADA-USD,ADA-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD,BNT-USD
1,Adj Close,Close,High,Low,Open,Volume,Adj Close,Close,High,Low,...,High,Low,Open,Volume,Adj Close,Close,High,Low,Open,Volume
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-04-07 00:00:00+00:00,0.998488,0.998488,1.000555,0.990544,0.99349,0.0,7366.915527,7366.915527,7407.44873,7268.242188,...,0.036678,0.035576,0.035681,0.0,0.198634,0.198634,0.200798,0.196712,0.196712,0.0
2020-04-07 01:00:00+00:00,0.999708,0.999708,1.000824,0.994457,0.998814,681816100.0,7355.73291,7355.73291,7383.069824,7350.611816,...,0.036497,0.036083,0.036118,1867280.0,0.19731,0.19731,0.198583,0.197102,0.198583,5230.0
2020-04-07 02:00:00+00:00,1.000362,1.000362,1.004532,0.997544,1.00019,1158300000.0,7311.450195,7311.450195,7355.026367,7290.237793,...,0.036182,0.035433,0.036182,2287464.0,0.195936,0.195936,0.197029,0.195246,0.196872,49303.0


In [6]:
df_btc = df['BTC-USD']

### Compute kernels

In [7]:
def kernel_01(row):
    return np.log(row.High / row.Low)

def kernel_02(row):
    return np.log(row.High / row.Close)

def kernel_03(row):
    return np.log(row.High / row.Open)

def kernel_04(row):
    return np.log(row.Low / row.Open)

def kernel_05(row):
    return np.log(row.Low / row.Close)

def kernel_06(row):
    return np.log(row.Close / row.Open)

In [8]:
df_btc_copy = df_btc.copy()
df_btc_copy['Close_lag1'] = df_btc_copy.Close.shift()

def kernel_07(row):
    return np.log(row.Open/row.Close_lag1)

def kernel_08(row):
    return np.log(row.Close/row.Close_lag1)

In [9]:
df_btc_features = pd.DataFrame([])

df_btc_features['kernel_01'] = df_btc.apply(lambda row: kernel_01(row), axis=1)
df_btc_features['kernel_02'] = df_btc.apply(lambda row: kernel_02(row), axis=1)
df_btc_features['kernel_03'] = df_btc.apply(lambda row: kernel_03(row), axis=1)
df_btc_features['kernel_04'] = df_btc.apply(lambda row: kernel_04(row), axis=1)
df_btc_features['kernel_05'] = df_btc.apply(lambda row: kernel_05(row), axis=1)
df_btc_features['kernel_06'] = df_btc.apply(lambda row: kernel_06(row), axis=1)

df_btc_features['kernel_07'] = df_btc_copy.apply(lambda row: kernel_07(row), axis=1)
df_btc_features['kernel_08'] = df_btc_copy.apply(lambda row: kernel_08(row), axis=1)

In [10]:
df_btc_features = df_btc_features.dropna()
df_btc_features.head(3)

Unnamed: 0_level_0,kernel_01,kernel_02,kernel_03,kernel_04,kernel_05,kernel_06,kernel_07,kernel_08
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-04-07 01:00:00+00:00,0.004406,0.00371,0.002898,-0.001508,-0.000696,-0.000812,-0.000707,-0.001519
2020-04-07 02:00:00+00:00,0.008848,0.005942,0.0,-0.008848,-0.002905,-0.005942,-9.6e-05,-0.006038
2020-04-07 03:00:00+00:00,0.005107,0.0,0.00102,-0.004087,-0.005107,0.00102,-0.00018,0.00084


### Save features

In [11]:
filepath = Path('../csv/features_btc_kernels.csv')

In [12]:
df_btc_features.to_csv(filepath)