# 7장. 멀티 팩터 전략

In [1]:
from matplotlib import rc
from collections import defaultdict
from typing import Optional, Dict

import FinanceDataReader as fdr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import seaborn as sns

from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.manifold import TSNE
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from tqdm import tqdm

from data.data_loader import PykrxDataLoader

## 1. 거시 경기 데이터

In [2]:
# 한글 폰트 설정
import platform
os_system = platform.system()
# 시스템에 딸 폰트 설정
if os_system == 'Windows':
    font_name = 'Malgun Gothic'
elif os_system == 'Darwin':
    font_name = 'AppleGothic'
else:
    font_name = 'Arial'
rc('font', family=font_name)

plt.rcParams['axes.unicode_minus'] = False
eng_to_kor = {'relative': '모멘텀', 'per': 'PER', 'pbr': 'PBR', 'dividend': '배당',
              'small': '소형주', 'lowvol': '로우볼', 'individual': '개인 수급 주체',
              'institutional': '기관 수급 주체', 'foreign': '외국인 수급 주체',
              'cluster': '군집', 'factor': '전략', 'date': '날짜', 'real': '실제',
              'pred': '예측', 'accuracy': '정확도'}

### 1.1. 거시 경기 데이터 불러오기

In [3]:
fromdate = '2012-11-01'
todate = '2021-12-30'
macro_name = {
    # 주요 지수
    'KS200',     # 코스피 200
    'US500',
    'SSEC',
    'VIX',

    'CL',
    'GC',
    'HG=F',

    'KRW/USD',
    'KRW/CNY',

    'US5YT',
    'US30YT',
    'FRED:T10Y3M',

    'FRED:M1SL',
    'FRED:M2',
    'FRED:HSN1F',
    'FRED:T5YIFR',
    'FRED:UNRATE',

    'FRED:MANMM101KRM189S',
    'FRED:MYAGM2KRM189S',
    'FRED:KORCPIALLMINMEI'
    'FRED:KORLOLITONOSTSAM',
    'FRED:XTEXVA01KRM664S',
    'FRED:XTIMVA01KRM667S',
}

In [4]:
def macro_data_loader(fromdate: str, todate: str,
                      date_list: list) -> pd.DataFrame:
    df = pd.DataFrame({'DATE': pd.date_range(start=fromdate, end=todate)})
    for date_name in date_list:
        # 데이트 로드하기
        df_sub = fdr.DataReader(date_name, fromdate, todate)
        # OHLCV 데이터면 Close만 사용
        if 'Close' in df_sub.columns:
            df_sub = df_sub[['Close']]
            df_sub.rename(columns={'Close': date_name}, inplace=True)
        df = df.merge(df_sub, how='left', left_on='DATE', right_index=True)

    return df.rename(columns={'DATE': 'date'})

### 1.2. 거시 경기 데이터 전처리

In [5]:
def macro_preprocess(df: pd.DataFrame, fromdate: str,
                     todate:str) -> pd.DataFrame:
    # 업무일 데이터로 ffill하기
    business_day_list = pd.to_datetime(PykrxDataLoader(fromdate=fromdate, todate=todate).get_business_days())
    df = df[df['date'].isin(business_day_list)]

    return df.ffill().dropna()

### 1.3. 거시 경기 데이터 증강

In [1]:
def macro_direction(df: pd.DataFrame, days: int) -> pd.DataFrame:
    def _feature_direction(df: pd.DataFrame):
        # 선형 회귀 기울기를 구하는 함수 정의
        line_fitter = LinearRegression()
        fit_result = line_fitter.fit(X=np.arange(len(df)).reshape(-1,1), y=df)

        return fit_result.coef_ / abs(df).mean()

    valid_columns = df.columns.drop('date')
    # 선형 회귀 계산 윈도우
    feature_direction_df = df[valid_columns].rolling(days).apply(_feature_direction)

    return feature_direction_df.add_suffix(f'_{days}').ffill()

NameError: name 'pd' is not defined