In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.naive_bayes import GaussianNB

In [None]:
data = pd.read_csv('/gdrive/MyDrive/Colab Notebooks/ML Project/AVAX-USDT_30m_22-09-2020_09-12-2022.csv')
data.describe

<bound method NDFrame.describe of                datetime_id     open     high      low    close      volume
0      2020-09-22 09:30:00   0.8500   6.0000   0.8500   4.8811  3198372.67
1      2020-09-22 10:00:00   4.8800   5.3500   4.2450   4.9312  1914636.29
2      2020-09-22 10:30:00   4.9141   5.1999   4.5662   4.9096  1256377.31
3      2020-09-22 11:00:00   4.9096   5.4499   4.8200   5.3951  1100528.85
4      2020-09-22 11:30:00   5.3917   6.9289   5.3373   6.8219  2783452.58
...                    ...      ...      ...      ...      ...         ...
38755  2022-12-09 15:30:00  13.5900  13.5900  13.4000  13.4700    80416.54
38756  2022-12-09 16:00:00  13.4600  13.4800  13.3600  13.4100    21615.24
38757  2022-12-09 16:30:00  13.4200  13.5100  13.3800  13.4900    19410.28
38758  2022-12-09 17:00:00  13.5000  13.5400  13.4500  13.5000    31878.00
38759  2022-12-09 17:30:00  13.5000  13.5300  13.5000  13.5100     4308.78

[38760 rows x 6 columns]>

In [None]:
def risefall(data: pd.DataFrame) -> pd.DataFrame:
    df = data.copy()

    for col in ['open', 'high', 'low', 'close', 'volume']:
        df[col] = 1.0 * (df[col] > df[col].shift(-1))

    return df
    
# columns: datetime_id, open, high, low, close, volume

def window_input(window_length: int, data: pd.DataFrame) -> pd.DataFrame:
    
    df = data.copy()
    
    i = 1
    while i < window_length:
        for col in ['open', 'high', 'low', 'close', 'volume']:
            df[f'{col}_{i}'] = df[col].shift(-i)
        i = i + 1
        
    if i == window_length:
        df['y'] = df['close'].shift(-i)
        #df['y'] = df['y'] > df[f'close_{i-1}']
        df['datetime_id'] = df['datetime_id'].shift(-i)
    
    # Drop rows where there is a NaN
    df = df.dropna(axis=0)
        
    return df

In [None]:
window_size = 36
risefall_df = risefall(data)
seq_df = window_input(window_size, risefall_df)

  df[f'{col}_{i}'] = df[col].shift(-i)
  df['y'] = df['close'].shift(-i)


In [None]:
X = seq_df[[f'close_{i+1}' for i in range(window_size - 1)]]
y = seq_df['y']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)

In [None]:
from collections import defaultdict

class NGramModel:
    def __init__(self, depth):
        self.ngrams = [defaultdict(lambda: 0) for i in range(depth)]
        self.depth = depth

    def _asints(ng):
        grams = [0 for _ in ng]
        grams[0] = ng[0]
        for i in range(1, len(ng)):
            grams[i] = 2 * grams[i - 1] + ng[i]

        return grams
            
    def encode(self, ng, y):
        ints = NGramModel._asints(ng)

        # Increment
        for i, n in enumerate(ints):
            if y == 1:
                self.ngrams[i][n] += 1

            else:
                self.ngrams[i][n] -= 1

    def train(self, X, y):
        for i in range(X.shape[0]):
            self.encode(X.iloc[i].to_numpy(), y.iloc[i])

    def get(self, ng):
        ints = NGramModel._asints(ng)

        result = []

        for i, n in enumerate(ints):
            result.append(1 * (self.ngrams[i][n] > 0))

        prediction = 0
        for i in range(len(result)):
            prediction += result[i]

        prediction /= len(result)

        if prediction > 0.5:
            return 1.0

        else:
            return 0.0

    def predict(self, X):
        r = []
        for i in range(X.shape[0]):
            r.append(self.get(X.iloc[i].to_numpy()))

        return np.array(r)

In [None]:
model = NGramModel(window_size)
model.train(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
print(f"Accuracy {np.sum(y_pred == y_test) / len(y_test) * 100:.2f}%")

Accuracy 50.02%
