In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from utils.pnl import pnl
import seaborn as sns
from features_clustering import *

In [None]:
token0 = 'WETH'
token1 = 'USDC'
decimal_0 = 18
decimal_1 = 6
fee_tier = 0.0005
tickspacing = 10

## Resample and generating features


* hourly EWMA return for momentum $\mu_t$.

* hourly EWM return volatility $\sigma_t$

We set decay rate $\alpha = 0.05$ for EWM calculation.

* swap event : 
    1. hourly number of swaps $N^{(swap)}_t$

    2. hourly mean arrival time of swap event $\Delta_t$ 

    3. hourly total volume(buy + sell) normalized by liquidity $\hat{V}_t$
    
    4. hourly buy/sell volume imbalance  $$\mathcal{I}_t = \frac{V_t^{buy}-V_t^{sell}}{V_t^{buy}+V_t^{sell}}$$ 

* mint/burnt event:
    1. hourly number of mints+burns $N^{(mb)}_t$





# Feature OS labeling 有問題 look ahead

In [None]:
df_swap = pd.read_csv('swap_with_liq.csv').drop(columns='Unnamed: 0')
df_swap['time'] = pd.to_datetime(df_swap['time'])

df_mb = pd.read_csv('mb.csv').drop(columns='Unnamed: 0')
df_mb['time'] = pd.to_datetime(df_mb['time'])

df_features=features_resample(df_swap,df_mb)
# df_features.to_csv('./data/hourly_features.csv')


In [None]:

L = int(2.6162685701074442e+17)
gas = 5
result = pnl(df_features.reset_index(),L , gas)
result = result[result['time']>=df_features.index[0]]
df_features['reward'] = result['reward'].values
df_features['reward_next_hr'] = result['reward'].shift(-1).values



## Features check

In [None]:
IS_end='2024-01-29 19:00'
df_features_IS = df_features[df_features.index<=IS_end]
df_features_IS = df_features_IS[['volume_imbalance', 'scaled_total_volume', 'n_swap','n_mb' ,'closed_price',
       'interval_swap', 'liquidity', 'tick', 'R_ewma', 'volatility_ewm',
       'ma24', 'ma168', 'bb_upper', 'bb_middle', 'bb_lower', 'adxr', 'dx',
       'reward', 'reward_next_hr']]

### In sampel correlation with reward

we can choose features according theri relation w.r.t LP reward.

In [None]:
# def last(x):
#     if x.empty:
#         return np.nan   
#     return x.ffill().iloc[-1]
# result_IS =result[ result['time']<='2024-01-29 18:00'].copy()
# df = df_swap[df_swap['time']<='2024-01-29 19:00'].set_index('time').resample('1h').apply({'price':last})
# df['R']  = np.log(df['price'].shift(1)/df['price'])
# df['ewma_R_std'] = df['R'].ewm(alpha=0.05).std()
# df['ma24'] = df['price'].rolling(24).mean()
# df['ma168'] = df['price'].rolling(168).mean()
# hourly_ewma_R,hourly_ewm_std = ewm_features(df,'1h',39)
# df['R_ewma'] = hourly_ewma_R.values

# df['ewma_price_std'] = hourly_ewm_std.values
# df['reward']= result_IS['reward'].values
# df['next_reward']= result_IS['reward'].shift(1).values
# sns.heatmap( df.dropna().corr() ,cmap='coolwarm',cbar=True,annot=True,
#                  linewidth=0.01,linecolor='k')

### In sample features correlation

In [None]:
plt.figure(figsize=(16,10))
sns.heatmap( df_features_IS.corr()
                    ,cmap='coolwarm',cbar=True,annot=True,
                 linewidth=0.01,linecolor='k')
plt.title('In sample correlation')


* n_swp and total volume has high correlation.

* n_mb are correlated to n_swap.

* interval and reward are negative correlated.

* price is negative correlated with n_buy, which is weird.

* price volatility is postive correlated to  absolute volume and price.

* liquidity,volume imbalance seems to be irrelavent to others


## Clustering

In [None]:
features_list_all =['volume_imbalance','scaled_total_volume', 'n_swap',
       'interval_swap', 'R_ewma', 'volatility_ewm',
       'ma24', 'ma168', 'bb_upper', 'bb_middle', 'bb_lower', 'adxr', 'dx',
       'n_mb']

features_list_micro =['volume_imbalance', 'scaled_total_volume', 'n_swap',
       'interval_swap', 'R_ewma', 'volatility_ewm',
       'n_mb']

In [None]:
km_all,df_all = clustering(df_features,features_list_all,3)


In [None]:
df_all_IS = df_all[df_all.index<=IS_end].copy()
plot_pca(df_all_IS,'all')
    

In [None]:
plot_cen_hm(df_all_IS,km_all,'all features')

In [None]:
km_micro,df_micro = clustering(df_features,features_list_micro,3)
df_micro_IS = df_micro[df_micro.index<=IS_end].copy()
plot_pca(df_micro,'micro only')


In [None]:
plot_cen_hm(df_micro_IS,km_micro,'micro only')

In [None]:
keep_list =  ['scaled_volume_WETH','scaled_volume_USDC','closed_price','km_label']

In [None]:
df_all[features_list_all+keep_list].to_csv('./data/hourly_features_all.csv')

In [None]:
df_micro[features_list_micro+keep_list].to_csv('./data/hourly_features_micro.csv')