In [1]:
import pandas as pd
import numpy as np

In [2]:
kospi = pd.read_csv('kospi.csv')
columns_to_keep = ['회사명', '종목코드', '회계년도', '거래월', '종가']
kospi = kospi[columns_to_keep]
kospi

Unnamed: 0,회사명,종목코드,회계년도,거래월,종가
0,(주)DB하이텍,990,2013/12,12.0,7530.0
1,(주)DB하이텍,990,2014/12,12.0,4615.0
2,(주)DB하이텍,990,2015/12,12.0,14300.0
3,(주)DB하이텍,990,2016/12,12.0,15900.0
4,(주)DB하이텍,990,2017/12,12.0,12000.0
...,...,...,...,...,...
1550,효성티앤씨(주),298020,2018/12,12.0,185000.0
1551,효성티앤씨(주),298020,2019/12,12.0,154500.0
1552,효성티앤씨(주),298020,2020/12,12.0,211000.0
1553,효성티앤씨(주),298020,2021/12,12.0,521000.0


In [3]:
# 컬럼 값의 뒤에서 "/12"를 제거하는 함수
def remove_suffix(value):
    if isinstance(value, str) and value.endswith("/12"):
        return value[:-3]  # 마지막 세 글자 제거
    else:
        return value

# 모든 컬럼에 함수 적용
kospi = kospi.applymap(remove_suffix)
kospi

Unnamed: 0,회사명,종목코드,회계년도,거래월,종가
0,(주)DB하이텍,990,2013,12.0,7530.0
1,(주)DB하이텍,990,2014,12.0,4615.0
2,(주)DB하이텍,990,2015,12.0,14300.0
3,(주)DB하이텍,990,2016,12.0,15900.0
4,(주)DB하이텍,990,2017,12.0,12000.0
...,...,...,...,...,...
1550,효성티앤씨(주),298020,2018,12.0,185000.0
1551,효성티앤씨(주),298020,2019,12.0,154500.0
1552,효성티앤씨(주),298020,2020,12.0,211000.0
1553,효성티앤씨(주),298020,2021,12.0,521000.0


In [87]:

# '회계년도'와 '거래월'을 합쳐서 'Date' 컬럼 생성
kospi['Date'] = pd.to_datetime(kospi['회계년도'] + '/' + kospi['거래월'].astype(int).astype(str), format='%Y/%m')
kospi = kospi.drop(['회계년도', '거래월'], axis=1)  # '회계년도'와 '거래월' 컬럼 삭제

# 'Date' 컬럼을 인덱스로 설정
kospi = kospi.set_index('Date')
kospi


Unnamed: 0_level_0,회사명,종목코드,종가
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01,(주)DB하이텍,990,6552.0
2013-02-01,(주)DB하이텍,990,6890.0
2013-03-01,(주)DB하이텍,990,6552.0
2013-04-01,(주)DB하이텍,990,6552.0
2013-05-01,(주)DB하이텍,990,8490.0
...,...,...,...
2022-08-01,효성티앤씨(주),298020,313500.0
2022-09-01,효성티앤씨(주),298020,259000.0
2022-10-01,효성티앤씨(주),298020,261500.0
2022-11-01,효성티앤씨(주),298020,357000.0


In [88]:
kospi['YearMonth'] = kospi.index.to_period('M')

In [89]:
# 회사명과 Date 컬럼을 인덱스로 설정
# kospi.set_index(['회사명', 'YearMonth'], inplace=True)

# 각 회사별로 월별 수익률 계산
kospi['수익률'] = kospi.groupby('회사명')['종가'].pct_change() * 10

# 결과 출력
kospi.iloc[200:250]

Unnamed: 0_level_0,회사명,종목코드,종가,YearMonth,수익률
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-09-01,(주)강원랜드,35250,29550.0,2019-09,0.460177
2019-10-01,(주)강원랜드,35250,31350.0,2019-10,0.609137
2019-11-01,(주)강원랜드,35250,29350.0,2019-11,-0.637959
2019-12-01,(주)강원랜드,35250,29600.0,2019-12,0.085179
2020-01-01,(주)강원랜드,35250,27550.0,2020-01,-0.692568
2020-02-01,(주)강원랜드,35250,22800.0,2020-02,-1.724138
2020-03-01,(주)강원랜드,35250,19750.0,2020-03,-1.337719
2020-04-01,(주)강원랜드,35250,25050.0,2020-04,2.683544
2020-05-01,(주)강원랜드,35250,24200.0,2020-05,-0.339321
2020-06-01,(주)강원랜드,35250,21500.0,2020-06,-1.115702


In [90]:
kospi.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 18345 entries, 2013-01-01 to 2022-12-01
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype    
---  ------     --------------  -----    
 0   회사명        18345 non-null  object   
 1   종목코드       18345 non-null  int64    
 2   종가         18345 non-null  float64  
 3   YearMonth  18345 non-null  period[M]
 4   수익률        18166 non-null  float64  
dtypes: float64(2), int64(1), object(1), period[M](1)
memory usage: 859.9+ KB


In [91]:
kospi.dropna(inplace=True)
kospi.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 18166 entries, 2013-02-01 to 2022-12-01
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype    
---  ------     --------------  -----    
 0   회사명        18166 non-null  object   
 1   종목코드       18166 non-null  int64    
 2   종가         18166 non-null  float64  
 3   YearMonth  18166 non-null  period[M]
 4   수익률        18166 non-null  float64  
dtypes: float64(2), int64(1), object(1), period[M](1)
memory usage: 851.5+ KB


In [92]:
kospi

Unnamed: 0_level_0,회사명,종목코드,종가,YearMonth,수익률
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-02-01,(주)DB하이텍,990,6890.0,2013-02,0.515873
2013-03-01,(주)DB하이텍,990,6552.0,2013-03,-0.490566
2013-04-01,(주)DB하이텍,990,6552.0,2013-04,0.000000
2013-05-01,(주)DB하이텍,990,8490.0,2013-05,2.957875
2013-06-01,(주)DB하이텍,990,6870.0,2013-06,-1.908127
...,...,...,...,...,...
2022-08-01,효성티앤씨(주),298020,313500.0,2022-08,-0.899855
2022-09-01,효성티앤씨(주),298020,259000.0,2022-09,-1.738437
2022-10-01,효성티앤씨(주),298020,261500.0,2022-10,0.096525
2022-11-01,효성티앤씨(주),298020,357000.0,2022-11,3.652008


In [93]:
kospi['전달 수익률'] = kospi['수익률'].shift()

In [94]:
kospi

Unnamed: 0_level_0,회사명,종목코드,종가,YearMonth,수익률,전달 수익률
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-02-01,(주)DB하이텍,990,6890.0,2013-02,0.515873,
2013-03-01,(주)DB하이텍,990,6552.0,2013-03,-0.490566,0.515873
2013-04-01,(주)DB하이텍,990,6552.0,2013-04,0.000000,-0.490566
2013-05-01,(주)DB하이텍,990,8490.0,2013-05,2.957875,0.000000
2013-06-01,(주)DB하이텍,990,6870.0,2013-06,-1.908127,2.957875
...,...,...,...,...,...,...
2022-08-01,효성티앤씨(주),298020,313500.0,2022-08,-0.899855,0.043732
2022-09-01,효성티앤씨(주),298020,259000.0,2022-09,-1.738437,-0.899855
2022-10-01,효성티앤씨(주),298020,261500.0,2022-10,0.096525,-1.738437
2022-11-01,효성티앤씨(주),298020,357000.0,2022-11,3.652008,0.096525


In [95]:
# 전달 수익률이 높은 순서로 정렬
kospi.sort_values(by='전달 수익률', ascending=False)


Unnamed: 0_level_0,회사명,종목코드,종가,YearMonth,수익률,전달 수익률
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-02-01,대우조선해양(주),42660,44800.0,2017-02,0.000000,58.376068
2016-06-01,KG스틸(주),16380,16600.0,2016-06,-0.828729,17.625153
2016-06-01,에이치엠엠(주),11200,14700.0,2016-06,-1.833333,17.472527
2020-08-01,신풍제약(주),19170,136500.0,2020-08,9.782609,12.847682
2015-04-01,한미사이언스(주),8930,43000.0,2015-04,2.536443,12.640264
...,...,...,...,...,...,...
2018-06-01,에이치엘만도(주),204320,38400.0,2018-06,-0.013004,-8.306167
2018-06-01,삼성전자(주),5930,46650.0,2018-06,-0.798817,-9.057270
2021-10-01,한일시멘트(주),300720,19300.0,2021-10,0.842697,-9.245763
2017-08-01,(주)오리온홀딩스,1800,27500.0,2017-08,-0.036232,-9.486798
