In [1]:
"""상대 모멘텀 전략"""
import os
import glob
import pandas as pd
import numpy as np
import datetime

# 전처리 함수
def data_preprocessing(sample, ticker, base_date):
    sample['CODE'] = ticker
    sample = sample[sample['Date'] >= base_date][['Date', 'CODE', 'Close']].copy()
    sample.reset_index(inplace=True, drop=True)  # 기존 인덱스열 삭제
    sample['STD_YM'] = sample['Date'].map(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d').strftime('%Y-%m'))
    sample['1M_RET'] = 0.0  # 기준 연월, 1개월간 수익률 추가
    ym_keys = list(sample['STD_YM'].unique())  
    return sample, ym_keys

In [4]:
# 데이터 불러와 가공하기

files = glob.glob('../data2/*.csv')  # glob()함수로 주어진 경로의 모든 csv파일을 하나씩 불러올 수 있다
month_last_df = pd.DataFrame(columns=['Date', 'CODE', '1M_RET'])
stock_df = pd.DataFrame(columns=['Date', 'CODE', 'Close'])

for file in files:
    if os.path.isdir(file):
        continue
    else:
        folder, name = os.path.split(file)  # 디렉터리 부분이랑 파일 부분으로 나눈다
        head, tail = os.path.splitext(name)  # 확장자 부분이랑 그 나머지로 나눈다(tail은 '.csv'가 되겠지)
        read_df = pd.read_csv(file)
        
        price_df, ym_keys = data_preprocessing(read_df, head, base_date='2000-03-12')
        stock_df = stock_df.append(price_df.loc[:, ['Date', 'CODE', 'Close']], sort=False)
        
        for ym in ym_keys:
            m_ret = price_df.loc[price_df[price_df['STD_YM'] == ym].index[-1], 'Close'] / price_df.loc[price_df[price_df['STD_YM'] == ym].index[0], 'Close']
            price_df.loc[price_df['STD_YM'] == ym, ['1M_RET']] == m_ret
            month_last_df = month_last_df.append(price_df.loc[price_df[price_df['STD_YM'] == ym].index[-1], ['Date', 'CODE', '1M_RET']])
    
print(stock_df)  # 일자별 종목의 수정 종가
print(month_last_df)  # 과거 1개월간 수익률

           Date CODE   Close
0    2019-01-02  ABT   69.50
1    2019-01-03  ABT   66.22
2    2019-01-04  ABT   68.11
3    2019-01-07  ABT   69.13
4    2019-01-08  ABT   68.30
..          ...  ...     ...
543  2021-03-01  MMM  176.41
544  2021-03-02  MMM  177.11
545  2021-03-03  MMM  178.06
546  2021-03-04  MMM  177.63
547  2021-03-05  MMM  180.80

[3836 rows x 3 columns]
           Date CODE  1M_RET
20   2019-01-31  ABT     0.0
39   2019-02-28  ABT     0.0
60   2019-03-29  ABT     0.0
81   2019-04-30  ABT     0.0
103  2019-05-31  ABT     0.0
..          ...  ...     ...
482  2020-11-30  MMM     0.0
504  2020-12-31  MMM     0.0
523  2021-01-29  MMM     0.0
542  2021-02-26  MMM     0.0
547  2021-03-05  MMM     0.0

[189 rows x 3 columns]


In [6]:
# 상대 모멘텀 지수 계산

month_ret_df = month_last_df.pivot('Date', 'CODE', '1M_RET').copy()  # 데이터 재구조화
month_ret_df = month_ret_df.rank(axis=1, ascending=False, method='max', pct=True)
month_ret_df = month_ret_df.where(month_ret_df < 0.4, np.nan)
month_ret_df.fillna(0, inplace=True)
month_ret_df[month_ret_df != 0] = 1
stock_codes = list(stock_df['CODE'].unique())
month_ret_df

CODE,ABT,ACN,ADBE,AES,AMD,ATVI,MMM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-02-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-03-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-04-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-05-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-06-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-07-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-08-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-09-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0
