In [1]:
import os
import psycopg2
import pandas as pd
from dotenv import load_dotenv
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def get_db_df():
    
    logging.info('Getting data from db')
    
    load_dotenv()
    
    conn = psycopg2.connect(os.getenv('DATABASE_URL'))
    
    # Open a cursor to perform database operations
    cur = conn.cursor()
    
    # Execute a query
    cur.execute("""
    SELECT * FROM united_trades
    where 
    abs(sol_delta) > 0.01
    --abs(sol_delta) > 0.01
    --and abs(token_delta) > 0.01
    order by timestamp desc 
    limit 50000
    """)
    
    # Retrieve query results
    records = cur.fetchall()
    
    # Convert to pandas DataFrame
    df_from_db = pd.DataFrame(records, columns=[desc[0] for desc in cur.description])
    
    # Close communication with the database
    cur.close()
    conn.close()
    
    logging.info(f'Got {len(df_from_db)} records from db')
    
    return df_from_db

df_from_db = get_db_df()

2024-01-04 12:45:34,078 - INFO - Getting data from db
2024-01-04 12:45:44,172 - INFO - Got 50000 records from db


In [2]:
df_from_db

Unnamed: 0,source,signature,program_ids,timestamp,mint,token_delta,sol_delta,trader
0,sol,3tB1UKUKxxCxJvckxQsBpnekNgx7sw5zoRHq7PrhyYsBuE...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-04 12:41:30,DH9CFyU13rGKdR6ohWkS7THfSQqetSBqnXFCzt93iqr7,-4.397729e+05,1.783756,7qp8So5tkqpJm3qhQZHPAxBj5T9QWkqTSeP8fx3gKcbY
1,sol,2Lo3981tj5panLb7PCdHvqHM3nD2xZYj6BA6LNizQawZdR...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-04 12:41:29,EdNiGWbkBwmmz7uhtdY3DqKZn1n5m1c61uLVgvXYZWGS,1.382082e+08,-0.010082,A1xZ6wXqutw53EGwc2Eu88vCtPqq4D35otUoUeyZaxok
2,sol,51yWzCgzRCD9RT4JFH9hJTahw7kCzd25QhJUZUNNgmZVRG...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-04 12:41:13,94sFWT94hg6qK9VtYwGz8VxbyEMaXf9H2U3HTTbofimy,-7.755679e+08,1.833103,BhCXGfyKVvpQypvACXqkLvEmjDdmqws5TPcjExM54Cxx
3,sol,prkNojeBGimChBRHVeNLvBLbzNeC7VXDhDGZU7Fh7KG97H...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-04 12:41:13,CUk8cQBHQFz35UQnY457ebNEPMTUgGc9QwiSDWJUGqqP,7.933989e+06,-1.990007,71UUEy1d8W4m5nGtN7FmqCGTkxaoaAEEDb4aKpFcH61f
4,sol,5Y7q4hUXGHwvUukx1V8u7QHzyveBviXZCBDxm6kxWuuApa...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-04 12:41:07,3FG9DAv3CDBEh1CQJhRM9BYe5thUMvP5GmW46524Qahv,5.461599e+03,-1.165005,5pyKssZ8322YC2LexSgdbVLVXGFjm5GEGyWNMuheA8CQ
...,...,...,...,...,...,...,...,...
49995,token,5RHEih9AJcDf6ESFS2pWM5C3mwRHqnZyV2pTEngYZe3ZJR...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-03 06:53:58,HZ1JovNiVvGrGNiiYvEozEVgZ58xaU3RKwX8eACQBCt3,2.893700e+02,-0.916925,252c9fkfFAMCjJM1vaVdLnn1hYHRRZNrqZY8fbJbAzTT
49996,token,5RHEih9AJcDf6ESFS2pWM5C3mwRHqnZyV2pTEngYZe3ZJR...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-03 06:53:58,EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v,-1.000000e+02,0.904549,252c9fkfFAMCjJM1vaVdLnn1hYHRRZNrqZY8fbJbAzTT
49997,sol,5AQeBm6Y9HvBPkbWPz9VYm2JpP5xKsaPEY4Yw5jjeNVLbx...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-03 06:53:57,39dmB1koRSHpnrnqQYB3Psbbo525Rviz4utg49HqqyrY,-1.396147e+04,2.193346,FhaeUYRWk8FdSyfWTZ4LuggfX8x8VvGnVCgnyXZznCDX
49998,token,4WVYNAtYAM6z8FvJk6oaYgaopn3uLhVpfihd2X2WxtL15x...,[JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4],2024-01-03 06:53:57,Es9vMFrzaCERmJfrF4H2FYD4KCoNkY11McCe8BenwNYB,-3.000000e+02,2.821848,61gP5t3MFDbPmUacrai8PhkqSyqmdB4Pn3fmabck53uQ


In [7]:
df_from_db.sort_values(by=['mint', 'timestamp'], inplace=True)
cnt = 0
for i, row in df_from_db.iterrows():
    cnt += 1
    print(row['timestamp'], row['mint'], row['sol_delta'], row['token_delta'])
    if cnt >= 10:
        break

2024-01-03 07:04:23 12AjTG16m3oWzjVy2kP2C1bUEzZv8PayQDM9csRR1obM 0.346824274 -101158227771.94292
2024-01-03 07:52:45 12AjTG16m3oWzjVy2kP2C1bUEzZv8PayQDM9csRR1obM 0.223280192 -64991741632.80637
2024-01-03 16:53:30 12AjTG16m3oWzjVy2kP2C1bUEzZv8PayQDM9csRR1obM -0.500005003 189478272346.01355
2024-01-04 10:45:20 12AjTG16m3oWzjVy2kP2C1bUEzZv8PayQDM9csRR1obM -0.300005408 79016579696.48972
2024-01-03 15:01:51 12FvihLoAzTtaA98YVuErGftCXtoc83t3YFk4WdKK79T 0.226812216 -700000.0
2024-01-03 15:22:23 12FvihLoAzTtaA98YVuErGftCXtoc83t3YFk4WdKK79T 0.170390857 -539806.965447244
2024-01-03 17:50:23 12FvihLoAzTtaA98YVuErGftCXtoc83t3YFk4WdKK79T 0.109741257 -266930.142651348
2024-01-03 21:36:47 12FvihLoAzTtaA98YVuErGftCXtoc83t3YFk4WdKK79T 0.086776927 -220708.505947388
2024-01-04 01:42:17 12FvihLoAzTtaA98YVuErGftCXtoc83t3YFk4WdKK79T 1.006459559 -1880906.2656870717
2024-01-04 04:09:44 12FvihLoAzTtaA98YVuErGftCXtoc83t3YFk4WdKK79T -0.400082 709725.7356539108


In [192]:
df_from_db.describe()

Unnamed: 0,timestamp,token_delta,sol_delta
count,50000,50000.0,50000.0
mean,2024-01-03 09:32:24.591880192,-1645981000000.0,0.399118
min,2024-01-02 17:40:32,-7.82531e+16,-1419.814702
25%,2024-01-03 02:34:58.500000,-19252.16,-0.891305
50%,2024-01-03 10:32:33.500000,-0.829113,0.013577
75%,2024-01-03 16:36:52,12827.4,0.783524
max,2024-01-03 23:22:37,477038700000000.0,2207.287738
std,,350197800000000.0,36.405884


In [193]:
def filter_by_program_id(df_from_db):
    # The value to filter by, wrapped in a list to match the column's data type
    value_to_filter = ['JUP6LkbZbjS1jKKwapdHNy74zcZ3tLUZoi5QNyVTaV4']
    # Use the `.apply()` method with a lambda function to check for equality
    filtered_df = df_from_db[df_from_db['program_ids'].apply(lambda x: x == value_to_filter)]
    logging.info(f'Filtered df by program_id, got {len(filtered_df)} records')
    return filtered_df

df_from_db = filter_by_program_id(df_from_db)


2024-01-03 23:26:49,239 - INFO - Filtered df by program_id, got 49076 records


In [194]:
df_from_db.memory_usage(deep=True).sum()

26269710

In [195]:
trades_df = df_from_db.copy()

In [196]:
from datetime import timedelta
TIME_QUANTIZATION = '10min'
QUANT_TIMEDELTA = timedelta(minutes=10)

def add_time_quantization(trades_df):
    trades_df['time_quant'] = trades_df['timestamp'].dt.floor('10min')

add_time_quantization(trades_df)

In [199]:
def remove_rare_tokens(trades_df, min_trades=20, min_quants=5):
    # mints that appears in at least 10 trades
    mints_counts = trades_df.groupby('mint').size()
    mints_with_n_trades = mints_counts[mints_counts >= min_trades].index
    # print(mints_with_10_trades)
    # mints that appears in at least 3 pairs (mint, time_quant)
    keys_counts = trades_df.groupby(['mint', 'time_quant']).size()
    mints_with_n_quants = keys_counts[keys_counts >= min_quants].index.get_level_values(0).unique()
    # print(mints_with_3_pairs)
    # intersection of these mints
    mints_to_keep = mints_with_n_trades.intersection(mints_with_n_quants)
    # filter df by these mints
    trades_df = trades_df[trades_df.mint.isin(mints_to_keep)]
    return trades_df
    
print(len(trades_df))
print(trades_df.mint.nunique())
trades_df = remove_rare_tokens(trades_df)
print(len(trades_df))
print(trades_df.mint.nunique())


44193
265
38784
136


In [219]:
from typing import Dict
import numpy as np

class PriceTables:
    quants: pd.DatetimeIndex
    sol_prices: Dict[str, np.array]
    united_prices: Dict[str, np.array]
    
    def __init__(self, sol_prices, united_prices, quants):
        self.sol_prices = sol_prices
        self.united_prices = united_prices
        self.quants = quants
    
    def get_price(self, mint, quant, price_type):
        quant_index_float = (quant - self.quants[0]) / QUANT_TIMEDELTA
        # convert to int, but check that it's close to integer
        quant_index = round(quant_index_float)
        if abs(quant_index_float - quant_index) > 0.01:
            raise ValueError(f'Quant {quant} is not close to quantization')
        
        if quant_index < 0 or quant_index >= len(self.quants):
            return -1

        if price_type == 'sol':
            return self.sol_prices[mint][quant_index]
        elif price_type == 'united':
            return self.united_prices[mint][quant_index]
        else:
            raise ValueError(f'Unknown price type: {price_type}')
    
def get_exponential_price_average(df, alpha=0.5) -> float:
    # calculates exponential moving average of price,
    # with sol_delta / token_delta as price,
    # and sol_delta as weight
    prices = abs(df.sol_delta / df.token_delta)
    weights = abs(df.sol_delta)
    # multiply weight exponentially
    exp = (1 - alpha) ** np.arange(len(df))
    # reverse last series
    exp = exp[::-1]
    # multiply weight by exp and make sum equal to 1
    weights = weights * exp
    weights = weights / weights.sum()
    # calc weighted average price
    price = (prices * weights).sum()
    return price

def get_price_tables(trades_df) -> PriceTables:
    min_quant = trades_df.time_quant.min()
    max_quant = trades_df.time_quant.max()
    global_quants = pd.date_range(min_quant, max_quant, freq=TIME_QUANTIZATION)
    quants_len = len(global_quants)
    
    trades_df.sort_values(by=['mint', 'time_quant'], inplace=True)
    sol_prices = {}
    united_prices = {}
    for mint in trades_df.mint.unique():
        sol_prices[mint] = np.zeros(quants_len)
        united_prices[mint] = np.zeros(quants_len)
        mint_df = trades_df[trades_df.mint == mint]
        mint_df.set_index('time_quant', inplace=True)
        last_sol_price = 0.
        last_united_price = 0.
        quants_in_df = mint_df.index.unique()
        for i, quant in enumerate(global_quants):
            if quant in quants_in_df:
                # mint_quant_df = mint_df[['source', 'sol_delta', 'token_delta']].loc[[quant]]
                mint_quant_df = mint_df.loc[[quant]]
                last_united_price = get_exponential_price_average(mint_quant_df)
                # get only sol trades which means source is 'sol
                sol_df = mint_quant_df[mint_quant_df['source'] == 'sol'] 
                # if it's non-empty, calc price
                if len(sol_df) > 0:
                    last_sol_price = get_exponential_price_average(sol_df)
            sol_prices[mint][i] = last_sol_price
            united_prices[mint][i] = last_united_price
    logging.info(f'Got price tables for {len(sol_prices)} mints')
    return PriceTables(sol_prices, united_prices, global_quants)

price_tables = get_price_tables(trades_df)

2024-01-03 23:56:00,479 - INFO - Got price tables for 136 mints


In [220]:
mint = 'bSo13r4TkiE4KumL71LsHTPpL2euBYLFx6h9HP3piy1'

up = price_tables.united_prices[mint]
sp = price_tables.sol_prices[mint]
quants = price_tables.quants
# unite them in one df
pdf = pd.DataFrame({
    'quant': quants,
    'united': up, 'sol': sp,
    'united/sol': up / sp,
})
pdf

Unnamed: 0,quant,united,sol,united/sol
0,2024-01-02 17:40:00,1.101182,1.101159,1.000021
1,2024-01-02 17:50:00,1.102057,1.102188,0.999882
2,2024-01-02 18:00:00,1.102057,1.102188,0.999882
3,2024-01-02 18:10:00,1.096199,1.102188,0.994567
4,2024-01-02 18:20:00,1.102761,1.102913,0.999863
...,...,...,...,...
174,2024-01-03 22:40:00,1.100666,1.100666,1.000000
175,2024-01-03 22:50:00,1.100666,1.100666,1.000000
176,2024-01-03 23:00:00,1.100666,1.100666,1.000000
177,2024-01-03 23:10:00,1.100993,1.100993,1.000000


In [202]:
pdf.describe()

Unnamed: 0,quant,united,sol,united/sol
count,179,179.0,179.0,179.0
mean,2024-01-03 08:30:00,1.101395,1.102949,0.998649
min,2024-01-02 17:40:00,1.061562,1.086432,0.959704
25%,2024-01-03 01:05:00,1.100653,1.100852,0.999711
50%,2024-01-03 08:30:00,1.100993,1.101015,1.0
75%,2024-01-03 15:55:00,1.10133,1.101272,1.000078
max,2024-01-03 23:20:00,1.128551,1.143234,1.029346
std,,0.007073,0.008459,0.009949


In [203]:
interesting_quant = '2024-01-02 17:30:00'
# filter by quant and mint
trades_df[(trades_df.mint == mint) & (trades_df.time_quant == interesting_quant)]


Unnamed: 0,source,signature,program_ids,timestamp,mint,token_delta,sol_delta,trader,time_quant


In [231]:
def get_dataset_keys(trades_df):
    # get all pairs (mint, time_quant)
    keys = trades_df[['mint', 'time_quant']].drop_duplicates()
    # sort by mint, time_quant
    keys.sort_values(by=['mint', 'time_quant'], inplace=True)
    # reset index
    keys.reset_index(drop=True, inplace=True)
    return keys

dataset = get_dataset_keys(trades_df)
dataset

Unnamed: 0,mint,time_quant
0,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 18:10:00
1,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 19:10:00
2,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 21:50:00
3,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 22:00:00
4,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 22:10:00
...,...,...
9876,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:10:00
9877,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:20:00
9878,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:30:00
9879,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:50:00


In [232]:
def assign_labels(dataset: pd.DataFrame, price_tables, forcast_timedelta, change_magnitude):
    dataset['price'] = 0.
    dataset['future_price'] = 0.
    dataset['label'] = 'na'
    
    for i, row in dataset.iterrows():
        mint = row['mint']
        quant = row['time_quant']
        price_now = price_tables.get_price(mint, quant, 'sol')
        dataset.at[i, 'price'] = price_now
        price_future = price_tables.get_price(mint, quant + forcast_timedelta, 'sol')
        if price_future == -1:
            continue
        dataset.at[i, 'future_price'] = price_future
        if price_future > price_now * change_magnitude:
            dataset.at[i, 'label'] = 'up'
        elif price_future < price_now / change_magnitude:
            dataset.at[i, 'label'] = 'down'
        else:
            dataset.at[i, 'label'] = 'flat'
    
    logging.info(f'Assigned labels to {len(dataset)} records')
    logging.info(dataset.label.value_counts())
    
assign_labels(dataset, price_tables, timedelta(hours=2), change_magnitude=2)
dataset

2024-01-04 00:11:47,102 - INFO - Assigned labels to 9881 records
2024-01-04 00:11:47,103 - INFO - label
flat    8507
na       720
down     378
up       276
Name: count, dtype: int64


Unnamed: 0,mint,time_quant,price,future_price,label
0,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 18:10:00,5.193299e-08,1.642084e-08,down
1,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 19:10:00,1.642084e-08,1.642084e-08,flat
2,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 21:50:00,5.657210e-08,5.638049e-08,flat
3,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 22:00:00,5.657210e-08,5.638049e-08,flat
4,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 22:10:00,1.498326e-07,5.638049e-08,down
...,...,...,...,...,...
9876,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:10:00,1.389651e-04,0.000000e+00,na
9877,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:20:00,1.389651e-04,0.000000e+00,na
9878,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:30:00,1.389651e-04,0.000000e+00,na
9879,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:50:00,1.389651e-04,0.000000e+00,na


In [240]:
from dataclasses import dataclass
from datetime import datetime, timedelta


@dataclass
class FeatureAggr:
    name: str
    delta: timedelta
    num_points: int


class FeatureCalculator:
    
    def __init__(
            self, 
            dataset: pd.DataFrame, 
            price_tables: PriceTables, 
            trades_df: pd.DataFrame, 
            periods: int
    ):
        self.dataset = dataset
        self.price_tables = price_tables
        self.trades_df = trades_df
        self.periods = periods
    
        self.aggregations = [
            FeatureAggr('10m', timedelta(minutes=10), self.periods),
            FeatureAggr('1h', timedelta(hours=1), self.periods),
            FeatureAggr('4h', timedelta(hours=4), self.periods),
            FeatureAggr('1d', timedelta(hours=24), self.periods),
        ]
    
    def assign_features(self):
        dataset['day_of_week'] = dataset['time_quant'].dt.dayofweek
        dataset['hour'] = dataset['time_quant'].dt.hour

        # sol price
        for a in self.aggregations:
            dataset['s_price_sma_' + a.name] = [-1.] * len(dataset)
            dataset['s_price_ema_' + a.name] = [-1.] * len(dataset)
            dataset['s_price_rsi_' + a.name] = [-1.] * len(dataset)
            for index, row in dataset.iterrows():
                mint = row['mint']
                time_quant = row['time_quant']
                dataset.loc[index, 's_price_sma_' + a.name] = self.sma_price('sol', mint, time_quant, a.delta, a.num_points)
                dataset.loc[index, 's_price_ema_' + a.name] = self.ema_price('sol', mint, time_quant, a.delta, a.num_points)
                dataset.loc[index, 's_price_rsi_' + a.name] = self.relative_strength_index('sol', mint, time_quant, a.delta, a.num_points)

        # united price
        for a in self.aggregations:
            dataset['u_price_sma_' + a.name] = [-1.] * len(dataset)
            dataset['u_price_ema_' + a.name] = [-1.] * len(dataset)
            dataset['u_price_rsi_' + a.name] = [-1.] * len(dataset)
            for index, row in dataset.iterrows():
                mint = row['mint']
                time_quant = row['time_quant']
                dataset.loc[index, 'u_price_sma_' + a.name] = self.sma_price('united', mint, time_quant, a.delta, a.num_points)
                dataset.loc[index, 'u_price_ema_' + a.name] = self.ema_price('united', mint, time_quant, a.delta, a.num_points)
                dataset.loc[index, 'u_price_rsi_' + a.name] = self.relative_strength_index('united', mint, time_quant, a.delta, a.num_points)
        
        # volume        
        for a in self.aggregations:
            dataset['sma_vol_' + a.name] = [-1.] * len(dataset)
            dataset['ema_vol_' + a.name] = [-1.] * len(dataset)
            for index, row in dataset.iterrows():
                mint = row['mint']
                time_quant = row['time_quant']
                dataset.loc[index, 'sma_vol_' + a.name] = self.sma_volume('united', mint, time_quant, a.delta, a.num_points)
                dataset.loc[index, 'ema_vol_' + a.name] = self.ema_volume('united', mint, time_quant, a.delta, a.num_points)

    def get_base_price(self, mode, inp_mint, inp_quant):
        ans = self.price_tables.get_price(inp_mint, inp_quant, mode)
        assert(ans != -1)
        return ans    
    
    def get_price_points(self, mode, inp_mint, last_quant, interval, num_points):
        ans = []
        for i in range(num_points):
            quant = last_quant - i * interval
            if quant < self.price_tables.quants[0]:
                break
            ans.append(self.get_base_price(mode, inp_mint, quant))
        # reverse list
        ans = ans[::-1]
        return ans
    
    def sma_price(self, mode, inp_mint, last_quant, interval, num_points):
        price_points = self.get_price_points(mode, inp_mint, last_quant, interval, num_points)
        if len(price_points) == 0 or sum(price_points) == 0:
            return 0.
        return sum(price_points) / len(price_points) / self.get_base_price(mode, inp_mint, last_quant)
    
    def ema_price(self, mode, inp_mint, last_quant, interval, num_points):
        price_points = self.get_price_points(mode, inp_mint, last_quant, interval, num_points)
        if len(price_points) == 0 or sum(price_points) == 0:
            return 0.
        k = 2 / (num_points + 1)
        ans = price_points[0]
        for i in range(1, len(price_points)):
            ans = k * price_points[i] + (1 - k) * ans
        return ans / self.get_base_price(mode, inp_mint, last_quant)
    
    def relative_strength_index(self, mode, inp_mint, last_quant, interval, num_points):
        price_points = self.get_price_points(mode, inp_mint, last_quant, interval, num_points)
        if len(price_points) <= 1 or sum(price_points) == 0:
            return -1.
        gains = []
        losses = []
        for i in range(1, len(price_points)):
            diff = price_points[i] - price_points[i - 1]
            if diff > 0:
                gains.append(diff)
            elif diff < 0:
                losses.append(-diff)
        if len(gains) == 0:
            return 0.
        if len(losses) == 0:
            return 100.
        avg_gain = sum(gains) / len(gains)
        avg_loss = sum(losses) / len(losses)
        return 100 - 100 / (1 + avg_gain / avg_loss)
    
    def get_volume_points(self, mode, inp_mint, last_quant, interval, num_points):
        ans = []
        for i in range(num_points):
            quant = last_quant - i * interval
            prev_quant = quant - interval
            if prev_quant < self.price_tables.quants[0]:
                break
            price = self.get_base_price(mode, inp_mint, quant)
            prev_price = self.get_base_price(mode, inp_mint, prev_quant)
            ans.append(price - prev_price)
        # reverse list
        ans = ans[::-1]
        return ans
    
    def sma_volume(self, mode, inp_mint, last_quant, interval, num_points):
        volume_points = self.get_volume_points(mode, inp_mint, last_quant, interval, num_points)
        if len(volume_points) == 0:
            return 0.
        return sum(volume_points) / len(volume_points)
    
    def ema_volume(self, mode, inp_mint, last_quant, interval, num_points):
        volume_points = self.get_volume_points(mode, inp_mint, last_quant, interval, num_points)
        if len(volume_points) == 0:
            return 0.
        k = 2 / (num_points + 1)
        ans = volume_points[0]
        for i in range(1, len(volume_points)):
            ans = k * volume_points[i] + (1 - k) * ans
        return ans

# 
# 

feature_calculator = FeatureCalculator(dataset, price_tables, trades_df, periods=14)
feature_calculator.assign_features()

dataset

Unnamed: 0,mint,time_quant,price,future_price,label,day_of_week,hour,s_price_sma_10m,s_price_sma_1h,s_price_sma_4h,...,u_price_ema_1d,u_price_rsi_1d,sma_vol_10m,ema_vol_10m,sma_vol_1h,ema_vol_1h,sma_vol_4h,ema_vol_4h,sma_vol_1d,ema_vol_1d
0,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 18:10:00,5.193299e-08,1.642084e-08,down,1,18,0.250000,1.000000,1.000000,...,1.000000,-1.0,1.731100e-08,6.924399e-09,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000
1,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 19:10:00,1.642084e-08,1.642084e-08,flat,1,19,1.997576,2.081313,1.000000,...,1.000000,-1.0,1.824538e-09,-1.800720e-09,-3.551215e-08,-3.551215e-08,0.000000e+00,0.000000e+00,0.000000,0.000000
2,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 21:50:00,5.657210e-08,5.638049e-08,flat,1,21,0.340959,0.499705,0.500000,...,1.000000,-1.0,2.867947e-09,5.353501e-09,1.414303e-08,3.560348e-08,5.657210e-08,5.657210e-08,0.000000,0.000000
3,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 22:00:00,5.657210e-08,5.638049e-08,flat,1,22,0.391655,0.499705,0.500000,...,1.000000,-1.0,5.090411e-09,8.788301e-09,2.192165e-08,3.975208e-08,8.768660e-08,8.768660e-08,0.000000,0.000000
4,14JnYcbAooDAZVb72DgmFGVZGDon5Ko4SN6ELrpU6ood,2024-01-02 22:10:00,1.498326e-07,5.638049e-08,down,1,22,0.211477,0.335078,0.673303,...,1.000000,-1.0,9.529411e-09,1.590266e-08,2.447490e-08,-5.328861e-09,9.789960e-08,9.789960e-08,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9876,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:10:00,1.389651e-04,0.000000e+00,na,2,22,1.000897,0.969467,0.714106,...,0.644697,100.0,2.436320e-07,1.912093e-07,5.341264e-07,-6.675696e-07,1.984730e-05,3.937838e-05,0.000057,0.000057
9877,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:20:00,1.389651e-04,0.000000e+00,na,2,22,1.000828,0.969467,0.714106,...,0.639317,100.0,3.491388e-07,7.539475e-08,8.178068e-07,-1.721228e-07,2.005831e-05,3.947312e-05,0.000058,0.000058
9878,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:30:00,1.389651e-04,0.000000e+00,na,2,22,1.000759,0.965458,0.707092,...,0.639896,100.0,4.898165e-07,5.256088e-07,1.072443e-06,3.871543e-07,2.003540e-05,3.938599e-05,0.000058,0.000058
9879,zebeczgi5fSEtbpfQKVZKCJ3WgYXxjkMUkNNx7fLKAF,2024-01-03 22:50:00,1.389651e-04,0.000000e+00,na,2,22,1.000621,0.967190,0.707092,...,0.642247,100.0,1.731550e-07,1.803487e-07,1.255768e-06,9.734387e-07,1.994284e-05,3.894917e-05,0.000058,0.000058
