<a href="https://colab.research.google.com/github/shinjangwoon/TIL/blob/master/Small_cap_low_pbr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 소형주 + 저PBR 전략

|레벨|스타일|기대CAGR|종목개수|매수전략|
|:--:|:--:|:--:|:--:|:--:|
|초,중급|밸류|20% 이상|20 ~ 30개|- 소형주(시가총액 하위 20%) <br/><br/> - PBR 하위 부터<br/>(PBR > 0.2)

# Load Dataset

In [None]:
import pandas as pd
from urllib import request as rq
from bs4 import BeautifulSoup
from tqdm import tqdm

## Get All Stock code

In [None]:
company = pd.read_csv('company.csv')
company.head()

Unnamed: 0,분류,회사명,종목코드,업종,주요제품,상장일,결산월,대표자명,홈페이지,지역,구글코드
0,코스피,두산우,000155,회사 본부 및 경영 컨설팅 서비스업,"청주,소주,인쇄회로기판(인쇄회로용동박적층판),관련전자부품,기계 제조,판매/출판사업,...",1973-06-29,12월,"박정원, 이재경",http://www.doosancorp.co.kr,서울특별시,KRX:000155
1,코스피,한화3우B,00088K,기타 화학제품 제조업,"화약,화공품,자동선반류,공작기계 제조,전자교환기,건설,정보통신,종합무역/임직원 및 ...",1976-06-24,12월,"최양수, 이태종, 김연철, 이민석",http://www.hanwhacorp.co.kr,서울특별시,KRX:00088K
2,코스피,S-Oil우,010955,석유 정제품 제조업,"석유제품,가스,윤활기유,윤활유,그리스,석유화학제품 제조,도매",1987-05-27,12월,Othman Al-Ghamdi,http://www.s-oil.com,서울특별시,KRX:010955
3,코스피,LG화학우,051915,기초 화학물질 제조업,"유화/기능/합성수지,재생섬유소,산업재,리튬이온전지,평광판,PVC 제조,도매",2001-04-25,12월,박진수,http://www.lgchem.com,서울특별시,KRX:051915
4,코스피,한국금융지주우,071055,기타 금융업,금융지주회사,2003-07-21,12월,김남구,http://www.koreaholdings.com,서울특별시,KRX:071055


In [None]:
code_list = company['종목코드'].dropna()
code_list.head(10)

0    000155
1    00088K
2    010955
3    051915
4    071055
5    078935
6    096775
7    120115
8    000815
9    005387
Name: 종목코드, dtype: object

In [None]:
kospi_code = company[company['분류'] == '코스피']['종목코드']
kosdaq_code = company[company['분류'] == '코스닥']['종목코드']

## Get Market Value

In [None]:
code = '005930'

snap_url = 'http://comp.fnguide.com/SVO2/ASP/SVD_Main.asp?pGB=1&gicode=A%s&cID=&MenuYn=Y&ReportGB=&NewMenuID=101&stkGb=701' % code

snap = rq.urlopen(snap_url).read()
snap_soup = BeautifulSoup(snap, 'html.parser')

In [None]:
mv_cells = snap_soup.find('div', {'id':'svdMainGrid1'}).find_all('td', {'class':'r'})

float(mv_cells[6].string.replace(',',''))

5455577.0

## Get PBR

In [None]:
pbr_cells = snap_soup.find('div', {'id':'corp_group2'}).find_all('dd')

float(pbr_cells[7].string.replace(',',''))

2.06

# Create Index DataFrame

In [None]:
def filteringDf(li):
    filtering = {}
    
    for i in tqdm(li):
        try:
            snap_url = 'http://comp.fnguide.com/SVO2/ASP/SVD_Main.asp?pGB=1&gicode=A%s&cID=&MenuYn=Y&ReportGB=&NewMenuID=101&stkGb=701' % i

            snap = rq.urlopen(snap_url).read()
            snap_soup = BeautifulSoup(snap, 'html.parser')
            
            # get market value
            mv_cells = snap_soup.find('div', {'id':'svdMainGrid1'}).find_all('td', {'class':'r'})

            mv = float(mv_cells[6].string.replace(',',''))
            
            # get pbr
            pbr_cells = snap_soup.find('div', {'id':'corp_group2'}).find_all('dd')

            pbr = float(pbr_cells[7].string.replace(',',''))
            
            # get company name
            name = company[company['종목코드'] == i]['회사명'].values[0]
            
            filtering[name] = [i, mv, pbr]
            
        except (TypeError, IndexError, AttributeError, ValueError) as err:
            pass

    # create DataFrame
    column_name = ['id', 'Market_Value', 'PBR']
    filtering = pd.DataFrame(filtering)
    filtering = filtering.transpose()
    filtering.columns = column_name
    
    return filtering

In [None]:
# takes a long time

filtering = filteringDf(code_list[1000:1030])
filtering.head(20)


100%|██████████| 30/30 [00:08<00:00,  3.52it/s]


Unnamed: 0,id,Market_Value,PBR
퓨쳐스트림네트웍스,214270,1619,2.18
포시에스,189690,799,1.41
덕산네오룩스,213420,13518,7.01
국일신동,60480,599,1.27
바디텍메드,206640,5273,4.2
썸에이지,208640,4391,18.33
와이제이엠게임즈,193250,1427,2.15
화이브라더스코리아,204630,997,3.11
하이셈,200470,1260,1.87
휴메딕스,200670,3595,2.53


In [None]:
# multiprocessing for parallel processing

from multiprocessing import Pool
import numpy as np

num_partitions = 12
num_cores = 4

splitted_code_list = np.array_split(code_list, num_partitions)
pool = Pool(num_cores)

filtering = pd.concat(pool.map(filteringDf, splitted_code_list))
pool.close()
pool.join()

filtering.head(20)

100%|██████████| 169/169 [08:08<00:00,  2.89s/it]
100%|██████████| 169/169 [08:37<00:00,  3.06s/it]
100%|██████████| 169/169 [08:41<00:00,  3.09s/it]
100%|██████████| 169/169 [08:50<00:00,  3.14s/it]
100%|██████████| 169/169 [04:41<00:00,  1.67s/it]
100%|██████████| 168/168 [04:24<00:00,  1.58s/it]
100%|██████████| 168/168 [04:21<00:00,  1.56s/it]
100%|██████████| 168/168 [04:19<00:00,  1.54s/it]
100%|██████████| 168/168 [05:21<00:00,  1.91s/it]
100%|██████████| 168/168 [05:15<00:00,  1.88s/it]
100%|██████████| 168/168 [05:18<00:00,  1.89s/it]
100%|██████████| 168/168 [05:14<00:00,  1.87s/it]


Unnamed: 0,id,Market_Value,PBR
AJ네트웍스,95570,2772,0.96
AJ렌터카,68400,2503,1.01
BGF리테일,27410,10586,0.51
BNK금융지주,138930,31648,0.45
CJ헬로비전,37560,8016,0.81
DGB금융지주,139130,18268,0.47
DSR,155660,840,0.6
GKL,114090,16608,3.12
GS리테일,7070,32109,1.58
JB금융지주,175330,9202,0.38


# Searching Stocks

In [None]:
# PBR > 0.2
filtering = filtering[filtering['PBR'] > 0.2]
filtering.head()

Unnamed: 0,id,Market_Value,PBR
국일신동,60480,599,1.27
셀바스헬스케어,208370,610,2.43
지란지교시큐리티,208350,624,1.51
포시에스,189690,799,1.41
디티앤씨,187220,838,1.03


In [None]:
filtering = filtering.sort_values(by='Market_Value')
filtering.head(20)

Unnamed: 0,id,Market_Value,PBR
국일신동,60480,599,1.27
셀바스헬스케어,208370,610,2.43
지란지교시큐리티,208350,624,1.51
포시에스,189690,799,1.41
디티앤씨,187220,838,1.03
서전기전,189860,862,2.15
하이로닉,149980,939,1.69
화이브라더스코리아,204630,997,3.11
하이셈,200470,1260,1.87
아이티센,124500,1264,2.24


In [None]:
copy_df = filtering.copy()
copy_df.head()

Unnamed: 0,id,Market_Value,PBR
국일신동,60480,599,1.27
셀바스헬스케어,208370,610,2.43
지란지교시큐리티,208350,624,1.51
포시에스,189690,799,1.41
디티앤씨,187220,838,1.03


In [None]:
# eliminate SPAC
copy_df = copy_df[~copy_df.index.str.contains('스팩|SPAC')]
print(copy_df.shape)
copy_df.head(20)

(28, 3)


Unnamed: 0,id,Market_Value,PBR
국일신동,60480,599,1.27
셀바스헬스케어,208370,610,2.43
지란지교시큐리티,208350,624,1.51
포시에스,189690,799,1.41
디티앤씨,187220,838,1.03
서전기전,189860,862,2.15
하이로닉,149980,939,1.69
화이브라더스코리아,204630,997,3.11
하이셈,200470,1260,1.87
아이티센,124500,1264,2.24


In [None]:
# 20% small cap
num_quantile = int(len(copy_df) * 0.2)

copy_df = copy_df[:num_quantile + 1]
copy_df.head(20)

Unnamed: 0,id,Market_Value,PBR
국일신동,60480,599,1.27
셀바스헬스케어,208370,610,2.43
지란지교시큐리티,208350,624,1.51
포시에스,189690,799,1.41
디티앤씨,187220,838,1.03
서전기전,189860,862,2.15


In [None]:
# Low PBR
copy_df = copy_df.sort_values(by='PBR')
copy_df = copy_df.head(30)
copy_df

Unnamed: 0,id,Market_Value,PBR
디티앤씨,187220,805,0.99
국일신동,60480,571,1.21
지란지교시큐리티,208350,618,1.5
포시에스,189690,847,1.5
서전기전,189860,623,1.56
셀바스헬스케어,208370,599,2.39
