In [43]:
import pandas as pd
import numpy as np

In [44]:
kospi = pd.read_csv('kospi.csv')
corps_code = kospi['종목코드'].unique().tolist()
corps_name = kospi['회사명'].unique().tolist()
corps = pd.DataFrame({'corp_code':corps_code, 'corp_name':corps_name})
corps

Unnamed: 0,corp_code,corp_name
0,990,(주)DB하이텍
1,35250,(주)강원랜드
2,6280,(주)녹십자
3,5250,(주)녹십자홀딩스
4,4370,(주)농심
...,...,...
174,11210,현대위아(주)
175,5380,현대자동차(주)
176,4020,현대제철(주)
177,298050,효성첨단소재(주)


In [45]:
corps['corp_code'] = corps['corp_code'].astype(str).str.zfill(6)
corps['corp_name'] = [name.replace('(주)', '') for name in corps['corp_name']]
corps

Unnamed: 0,corp_code,corp_name
0,000990,DB하이텍
1,035250,강원랜드
2,006280,녹십자
3,005250,녹십자홀딩스
4,004370,농심
...,...,...
174,011210,현대위아
175,005380,현대자동차
176,004020,현대제철
177,298050,효성첨단소재


In [46]:
from dateutil.relativedelta import relativedelta
import requests as rq
from io import BytesIO
from datetime import date


adjust_price = pd.DataFrame(columns=['날짜', '종목', '종목코드', '종가'])

for i in range(179):

    ticker = corps['corp_code'][i]
    name = corps['corp_name'][i]
    # fr = ("20131230").strptime("%Y%m%d")
    # to = ("20221230").strptime("%Y%m%d")

    url = f'''https://fchart.stock.naver.com/siseJson.nhn?symbol={ticker}&requestType=1
    &startTime=20131101&endTime=20221231&timeframe=day'''

    data = rq.get(url).content
    data_price = pd.read_csv(BytesIO(data))


    price = data_price.iloc[:, 0:6]
    price.columns = ['날짜', '시가', '고가', '저가', '종가', '거래량']
    price = price.dropna()
    price['날짜'] = price['날짜'].str.extract('(\d+)')
    price['날짜'] = pd.to_datetime(price['날짜'])
    price['종목코드'] = ticker
    price['종목'] = name

    columns_to_keep = ['날짜', '종목', '종목코드', '종가']
    price = price[columns_to_keep]


    #4월만 추출
    price = price[price['날짜'].dt.month == 4]
    #첫 날만 추출
    price = price.sort_values('날짜').groupby(price['날짜'].dt.year).first()

    adjust_price = pd.concat([adjust_price, price], ignore_index=True)

adjust_price


Unnamed: 0,날짜,종목,종목코드,종가
0,2014-04-01,DB하이텍,000990,6640.0
1,2015-04-01,DB하이텍,000990,5880.0
2,2016-04-01,DB하이텍,000990,18100.0
3,2017-04-03,DB하이텍,000990,20350.0
4,2018-04-02,DB하이텍,000990,14150.0
...,...,...,...,...
1407,2022-04-01,효성첨단소재,298050,522000.0
1408,2019-04-01,효성티앤씨,298020,179000.0
1409,2020-04-01,효성티앤씨,298020,104000.0
1410,2021-04-01,효성티앤씨,298020,558000.0


In [None]:
adjust_price['수익률'] = adjust_price.groupby('종목')['종가'].pct_change() * 10
adjust_price

In [48]:
adjust_price.to_csv("./adjust_price.csv")

In [49]:
adjust_price['전년 수익률'] = adjust_price['수익률'].shift()
adjust_price.head(15)

Unnamed: 0,날짜,종목,종목코드,종가,수익률,전년 수익률
0,2014-04-01,DB하이텍,990,6640.0,,
1,2015-04-01,DB하이텍,990,5880.0,-1.144578,
2,2016-04-01,DB하이텍,990,18100.0,20.782313,-1.144578
3,2017-04-03,DB하이텍,990,20350.0,1.243094,20.782313
4,2018-04-02,DB하이텍,990,14150.0,-3.046683,1.243094
5,2019-04-01,DB하이텍,990,13000.0,-0.812721,-3.046683
6,2020-04-01,DB하이텍,990,20900.0,6.076923,-0.812721
7,2021-04-01,DB하이텍,990,57700.0,17.607656,6.076923
8,2022-04-01,DB하이텍,990,73400.0,2.720971,17.607656
9,2014-04-01,강원랜드,35250,30850.0,,2.720971


In [50]:
adjust_price = adjust_price.dropna()
adjust_price.head(15)

Unnamed: 0,날짜,종목,종목코드,종가,수익률,전년 수익률
2,2016-04-01,DB하이텍,990,18100.0,20.782313,-1.144578
3,2017-04-03,DB하이텍,990,20350.0,1.243094,20.782313
4,2018-04-02,DB하이텍,990,14150.0,-3.046683,1.243094
5,2019-04-01,DB하이텍,990,13000.0,-0.812721,-3.046683
6,2020-04-01,DB하이텍,990,20900.0,6.076923,-0.812721
7,2021-04-01,DB하이텍,990,57700.0,17.607656,6.076923
8,2022-04-01,DB하이텍,990,73400.0,2.720971,17.607656
11,2016-04-01,강원랜드,35250,41100.0,1.827338,1.264182
12,2017-04-03,강원랜드,35250,37400.0,-0.900243,1.827338
13,2018-04-02,강원랜드,35250,28500.0,-2.379679,-0.900243


In [51]:
# 날짜를 년도로 변환
adjust_price['날짜'] = pd.to_datetime(adjust_price['날짜'])
adjust_price['년도'] = adjust_price['날짜'].dt.year
adjust_price

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adjust_price['날짜'] = pd.to_datetime(adjust_price['날짜'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adjust_price['년도'] = adjust_price['날짜'].dt.year


Unnamed: 0,날짜,종목,종목코드,종가,수익률,전년 수익률,년도
2,2016-04-01,DB하이텍,000990,18100.0,20.782313,-1.144578,2016
3,2017-04-03,DB하이텍,000990,20350.0,1.243094,20.782313,2017
4,2018-04-02,DB하이텍,000990,14150.0,-3.046683,1.243094,2018
5,2019-04-01,DB하이텍,000990,13000.0,-0.812721,-3.046683,2019
6,2020-04-01,DB하이텍,000990,20900.0,6.076923,-0.812721,2020
...,...,...,...,...,...,...,...
1403,2022-04-01,현대제철,004020,40750.0,-1.208198,17.426036,2022
1406,2021-04-01,효성첨단소재,298050,371500.0,51.001642,-5.297297,2021
1407,2022-04-01,효성첨단소재,298050,522000.0,4.051144,51.001642,2022
1410,2021-04-01,효성티앤씨,298020,558000.0,43.653846,-4.189944,2021


In [52]:
momentum_list = [pd.DataFrame() for _ in range(7)]

for i, (year, group_df) in enumerate(adjust_price.groupby('년도')):
    sorted_df = group_df.sort_values('전년 수익률', ascending=False)
    momentum_list[i] = sorted_df  # sorted_df를 momentum_list에 추가
    print(f"---- {year} ----")
    print(sorted_df)

---- 2016 ----
             날짜         종목    종목코드        종가        수익률     전년 수익률    년도
701  2016-04-01     동원시스템즈  014820   74900.0   9.867374  30.537634  2016
947  2016-04-01     아이에스동서  010780   46550.0  -3.683853  20.901468  2016
158  2016-04-01   아모레퍼시픽그룹  002790  146163.0   0.054896  18.908798  2016
1255 2016-04-01     한미사이언스  008930  128671.0  33.447915  18.613527  2016
149  2016-04-01     아모레퍼시픽  090430  390000.0   1.684133  16.379363  2016
...         ...        ...     ...       ...        ...        ...   ...
683  2016-04-01       동국제강  001230    6970.0   1.793570  -4.744798  2016
840  2016-04-01    삼성엔지니어링  028050   10550.0  -5.540432  -4.751054  2016
460  2016-04-01     현대미포조선  010620   31985.0  -1.447632  -5.155884  2016
1325 2016-04-01  한화에어로스페이스  012450   39950.0   7.369565  -5.986038  2016
543  2016-04-01       KG스틸  016380   97087.0  -5.689677  -6.317760  2016

[140 rows x 7 columns]
---- 2017 ----
             날짜       종목    종목코드        종가       수익률     전년 수익률    년도


In [53]:
return_mean = [pd.DataFrame() for _ in range(7)]

for i in range(7):
    momentum_list[i]['quantile'] = pd.qcut(momentum_list[i]['전년 수익률'], 5, labels=False)
    quantile_means = momentum_list[i].groupby('quantile')['수익률'].mean()
    return_mean[i] = quantile_means
    print(quantile_means)

quantile
0   -0.302716
1    1.240096
2   -0.325351
3    1.812264
4    2.425580
Name: 수익률, dtype: float64
quantile
0    0.347587
1    1.106275
2    0.575415
3   -0.543599
4   -0.639684
Name: 수익률, dtype: float64
quantile
0    2.558393
1    2.272341
2    1.151040
3    0.300808
4    2.349232
Name: 수익률, dtype: float64
quantile
0   -0.435595
1   -0.406907
2   -0.490291
3   -1.447201
4    0.589336
Name: 수익률, dtype: float64
quantile
0   -3.222173
1   -2.900000
2   -2.975705
3   -3.109897
4   -2.879008
Name: 수익률, dtype: float64
quantile
0    12.200156
1    12.325975
2     7.825903
3     8.600144
4    13.076486
Name: 수익률, dtype: float64
quantile
0   -0.271050
1    0.850626
2   -0.353365
3   -0.055903
4   -0.863771
Name: 수익률, dtype: float64


In [54]:
re = pd.concat([return_mean[0],return_mean[1],return_mean[2],return_mean[3],return_mean[4],return_mean[5],return_mean[6]], axis=1)
re.columns = ['2016', '2017', '2018', '2019', '2020', '2021', '2022']
re


Unnamed: 0_level_0,2016,2017,2018,2019,2020,2021,2022
quantile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,-0.302716,0.347587,2.558393,-0.435595,-3.222173,12.200156,-0.27105
1,1.240096,1.106275,2.272341,-0.406907,-2.9,12.325975,0.850626
2,-0.325351,0.575415,1.15104,-0.490291,-2.975705,7.825903,-0.353365
3,1.812264,-0.543599,0.300808,-1.447201,-3.109897,8.600144,-0.055903
4,2.42558,-0.639684,2.349232,0.589336,-2.879008,13.076486,-0.863771


In [55]:
re.mean(axis=1)

quantile
0    1.553515
1    2.069772
2    0.772521
3    0.793802
4    2.008310
dtype: float64