### Custom Returns Check

In [112]:
### 라이브러리
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

import datetime

### 환경설정
np.random.seed(4)

def trim_date(df, start, end):
    ### df date 형식 맞춰주기
    df['date'] = df['date'].astype(str)
    if len(df['date'][0]) == 8:
        df['date'] = df['date'].apply(lambda x: x[0:4]+'-'+x[4:6]+'-'+x[6:])


    return df[(df['date']>=start) & (df['date']<=end)]

In [113]:
### 학습용 데이터 읽기
test_df = pd.read_csv("data/test/test_df.csv")
test_df.set_index('date', inplace=True)

### target df 읽기
target_event_receive_df = pd.read_csv("./data/target/event_2_day_transactions_over_100_receive.csv")
target_event_receive_df.columns = ['date', 'transaction_count', 'transaction_amount', 'transaction_flag']
target_event_receive_df = trim_date(target_event_receive_df, (test_df.index)[0], (test_df.index)[-1])



### Bitcoin Event Flag 추가 / 긍정, 부정, 애매 이벤트 필터링
# pos, neg, neu, all
event_sentiment = 'neg'

event_df = pd.read_csv("./data/event/bitcoin_event_details_sentiment.csv")
event_df.index = event_df['Date'].astype(str).apply(lambda x: x[0:4]+'-'+x[4:6]+'-'+x[6:])

if event_sentiment == 'all':
    pass
elif event_sentiment == 'pos':
    event_df = event_df[event_df['classification']==1]
elif event_sentiment == 'neg':
    event_df = event_df[event_df['classification']==0]
elif event_sentiment == 'neu':
    event_df = event_df[event_df['classification']==2]
event_df['classification'] = 1
event_df = event_df['classification']
event_df = event_df[~event_df.index.duplicated(keep='first')]
test_df['event_flag'] = event_df
test_df['event_flag'] = test_df['event_flag'].fillna(0)
event_flag_copy = test_df['event_flag'].values


### 분석 데이터 선택
target_df = target_event_receive_df.copy().set_index('date')

### test_df에 target_df 데이터 적용
test_df['transaction_count'] = target_df['transaction_count']
test_df['transaction_amount'] = target_df['transaction_amount']
test_df['transaction_flag'] = target_df['transaction_flag']
test_df['transaction_amount_usd'] = test_df['transaction_amount'] * test_df['close']

### test_df의 target 선택
test_df['target'] = test_df['transaction_amount_usd']
test_df['target'] = np.where(test_df['target'] == 0, 1e-10, test_df['target'])

### target 관련 파생변수 생성
test_df['target_delta'] = test_df['target'].diff(1)
test_df['target_returns'] = test_df['target'].pct_change(1)

### VPIN 관련 변수 추가
vpin = pd.read_csv('./data/vpin/vpin.csv')
vpin['ma_10'] = vpin['vpin'].rolling(10).mean()
vpin['ma_10'] = vpin['ma_10'].fillna(vpin['ma_10'].mean())
vpin = vpin.set_index('date')
test_df['vpin'] = vpin['vpin']
test_df['vpin_ma_10'] = vpin['ma_10']

### 수익률 관련 변수
returns_copy = test_df['returns'].copy()

##### 전처리 관련
test_df = test_df.replace([np.inf, -np.inf], np.nan)
test_df = test_df.fillna(test_df.mean())

  test_df['target_returns'] = test_df['target'].pct_change(1)


### Custom Testing용 파생변수 생성

In [114]:
# trading flag 생성
test_df['transaction_amount_returns'] = test_df['transaction_amount_usd'].pct_change(1).fillna(test_df.mean())
threshold = 0
test_df['transaction_amount_trading_flag'] = test_df['transaction_amount_returns'].apply(lambda x: 1 if x > threshold else (-1 if x < -threshold else (-1 if x == np.inf else 0)))
test_df['transaction_amount_trading_reversion'] = test_df['transaction_amount_returns'].apply(lambda x: -1 if x > 0 else (1 if x < -0 else 0))

# 지연 수익률 계산, carrying 수익률 
test_df['returns'] = test_df['close'].pct_change(1).fillna(test_df.mean())
test_df['returns_after_1day'] = test_df['returns'].shift(-1).fillna(test_df.mean())
test_df['returns_after_2day'] = test_df['returns'].shift(-2).fillna(test_df.mean())
test_df['returns_after_3day'] = test_df['returns'].shift(-3).fillna(test_df.mean())
test_df['returns_during_2day'] = test_df['close'].pct_change(2).shift(-2).fillna(test_df.mean())
test_df['returns_during_3day'] = test_df['close'].pct_change(3).shift(-3).fillna(test_df.mean())

## 변화율에서 inf값 나올텐데 그거 어떻게 처리할지
test_df = test_df.replace([np.inf, -np.inf], np.nan)
test_df = test_df.fillna(test_df.mean())


# check test df 날짜 인덱싱
check_test_df = test_df.copy()
check_test_df = check_test_df[check_test_df.index>='2023-01-01']

# Trading 결과 확인하기 위해 adjusted_returns 생성
check_test_df['adjusted_returns'] = 0 # 새로운 열 'adjusted_returns'를 생성하여 기본값을 0으로 설정

trading_price_df = check_test_df[check_test_df['transaction_amount_trading_flag']==-1]

# 'adjusted_returns' 열을 float으로 변환 후 값 할당
check_test_df['adjusted_returns'] = check_test_df['adjusted_returns'].astype(float)
check_test_df.loc[trading_price_df.index, 'adjusted_returns'] = trading_price_df['returns_after_3day'].astype(float).copy()

# trading_price_df = check_test_df[check_test_df['transaction_amount_trading_flag']==1]
# check_test_df.loc[trading_price_df.index, 'adjusted_returns'] = trading_price_df['returns_after_2day'].apply(lambda x: -x) # trading_price_df의 index에 해당하는 행만 원래 'returns' 값을 유지

In [115]:
### Returns의 기본적인 평균값
print("The mean of returns:", test_df['returns'].mean())


### 만약 상위 1% threshold를 넘기면?
threshold = test_df.target.quantile(0.99)
returns_check_df = test_df[test_df['target']>=threshold]
print("The mean of Top 1% returns:", returns_check_df['returns_after_3day'].mean())

### 만약 하위 1% threshold를 넘기면?
threshold = test_df.target.quantile(0.01)
returns_check_df = test_df[test_df['target']<=threshold]
print("The mean of Bottom 1% returns:", returns_check_df['returns_after_2day'].mean())

The mean of returns: 0.001747848142226556
The mean of Top 1% returns: -0.012996536484602424
The mean of Bottom 1% returns: 0.0063179081554285196


In [116]:
### During 2 days의 기본적인 평균값
print("The mean of returns:", test_df['returns_during_2day'].mean())


### 만약 상위 1% threshold를 넘기면?
threshold = test_df.target.quantile(0.99)
returns_check_df = test_df[test_df['target']>=threshold]
print("The mean of Top 1% returns:", returns_check_df['returns_during_2day'].mean())

### 만약 하위 1% threshold를 넘기면?
threshold = test_df.target.quantile(0.01)
returns_check_df = test_df[test_df['target']<=threshold]
print("The mean of Bottom 1% returns:", returns_check_df['returns_during_2day'].mean())

The mean of returns: 0.0034761099831929886
The mean of Top 1% returns: -0.01407071342656633
The mean of Bottom 1% returns: 0.010804024530239764


In [117]:
### During 3 days의 기본적인 평균값
print("The mean of returns:", test_df['returns_during_3day'].mean())



### 만약 하위 1% threshold를 넘기면?
threshold = test_df.target.quantile(0.01)
returns_check_df = test_df[test_df['target']<=threshold]
print("The mean of Bottom 1% returns:", returns_check_df['returns_during_3day'].mean())

The mean of returns: 0.00521905678927385
The mean of Bottom 1% returns: 0.023830265026681487


In [118]:
### Target Delta
print("The mean of returns:", test_df['returns_during_3day'].mean())

### 만약 상위 1% threshold를 넘기면?
threshold = test_df.target_delta.quantile(0.99)
returns_check_df = test_df[test_df['target_delta']>=threshold]
print("The mean of Top 1% returns:", returns_check_df['returns_during_3day'].mean())

### 만약 하위 1% threshold를 넘기면?
threshold = test_df.target_delta.quantile(0.05)
returns_check_df = test_df[test_df['target_delta']<=threshold]
print("The mean of Bottom 1% returns:", returns_check_df['returns_during_3day'].mean())

The mean of returns: 0.00521905678927385
The mean of Top 1% returns: -0.010974466268708781
The mean of Bottom 1% returns: -0.0020215576386497652


In [119]:
### Target Delta
print("The mean of returns:", test_df['returns_during_3day'].mean())

### 만약 0 threshold를 넘기면?
threshold = 0
returns_check_df = test_df[test_df['target_delta']>=threshold]
print("The mean of over 0 returns:", returns_check_df['returns_during_3day'].mean())

### 만약 0 이하면?
threshold = 0
returns_check_df = test_df[test_df['target_delta']<=threshold]
print("The mean of under 0 returns:", returns_check_df['returns_during_3day'].mean())

The mean of returns: 0.00521905678927385
The mean of over 0 returns: 0.007600824838020841
The mean of under 0 returns: 0.003358909433392784
