# KOSDAQ Analysis.

### KOSDAQ index uses yfinance lib. 
### KOSDAQ stock price information uses finaceDataReader

In [13]:
import pandas as pd
import matplotlib.pyplot as plt

import yfinance as yf
import FinanceDataReader as fdr

### Get KOSDAQ index since 2000

In [14]:
kosdaq_ticker = '^KQ11'
kosdaq_data = yf.Ticker(kosdaq_ticker)
kosdaq_hist = kosdaq_data.history(period="max")

kosdaq_hist.reset_index(drop=False, inplace=True)

kosdaq_hist['Date'] = pd.to_datetime(kosdaq_hist['Date']).dt.strftime('%Y-%m-%d')
kosdaq_hist['Date'] = pd.to_datetime(kosdaq_hist['Date'])

print(kosdaq_hist.head())

        Date        Open        High         Low       Close  Volume  \
0 2000-10-16  858.500000  873.900024  851.299988  867.099976  193900   
1 2000-10-17  849.599976  860.099976  791.099976  804.000000  218200   
2 2000-10-18  788.500000  811.400024  758.200012  802.599976  241000   
3 2000-10-19  793.599976  820.400024  786.900024  798.400024  204500   
4 2000-10-20  834.700012  840.900024  822.500000  826.200012  221900   

   Dividends  Stock Splits  
0        0.0           0.0  
1        0.0           0.0  
2        0.0           0.0  
3        0.0           0.0  
4        0.0           0.0  


### Check imported data columns

In [3]:
# 특정 날짜의 데이터만 필터링하여 출력
specific_date = '2024-04-02'
filtered_data = kosdaq_hist.loc[kosdaq_hist.index == specific_date]
print(filtered_data)

Empty DataFrame
Columns: [Date, Open, High, Low, Close, Volume, Dividends, Stock Splits]
Index: []


### Get interest rate information from the Bank of Korea site
- 한국 금리 가져오기.

In [4]:
#https://www.bok.or.kr/portal/main/contents.do?menuNo=200096
fund = pd.read_csv('./data/kbf.csv')

fund

Unnamed: 0,통계표,계정항목,단위,변환,2000/01/04,2000/01/05,2000/01/06,2000/01/07,2000/01/08,2000/01/10,...,2024/04/18,2024/04/19,2024/04/22,2024/04/23,2024/04/24,2024/04/25,2024/04/26,2024/04/29,2024/04/30,2024/05/02
0,1.3.1. 한국은행 기준금리 및 여수신금리,한국은행 기준금리,연%,원자료,4.75,4.75,4.75,4.75,4.75,4.75,...,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5,3.5


### Reorder interest rate data

In [5]:
kbf_fund = fund.melt(id_vars=['통계표', '계정항목', '단위', '변환'], var_name='Date', value_name='KBF')
kbf_fund = kbf_fund.drop(columns=['통계표', '계정항목', '단위', '변환'])
kbf_fund['Date'] = kbf_fund['Date'].str.replace('/', '-').astype('datetime64[ns]')
kbf_fund['Date'] = pd.to_datetime(kbf_fund['Date'])

kbf_fund


Unnamed: 0,Date,KBF
0,2000-01-04,4.75
1,2000-01-05,4.75
2,2000-01-06,4.75
3,2000-01-07,4.75
4,2000-01-08,4.75
...,...,...
6073,2024-04-25,3.50
6074,2024-04-26,3.50
6075,2024-04-29,3.50
6076,2024-04-30,3.50


### Merge interest rate data into KOSDAQ index data

In [6]:
kosdak_fund_data = pd.merge(kosdaq_hist, kbf_fund, on='Date', how='left')
kosdak_fund_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,KBF
0,2000-10-16,858.500000,873.900024,851.299988,867.099976,193900,0.0,0.0,5.25
1,2000-10-17,849.599976,860.099976,791.099976,804.000000,218200,0.0,0.0,5.25
2,2000-10-18,788.500000,811.400024,758.200012,802.599976,241000,0.0,0.0,5.25
3,2000-10-19,793.599976,820.400024,786.900024,798.400024,204500,0.0,0.0,5.25
4,2000-10-20,834.700012,840.900024,822.500000,826.200012,221900,0.0,0.0,5.25
...,...,...,...,...,...,...,...,...,...
5808,2024-04-30,872.080017,875.640015,867.320007,868.929993,800,0.0,0.0,3.50
5809,2024-05-02,865.780029,870.330017,864.979980,867.479980,900,0.0,0.0,3.50
5810,2024-05-03,872.270020,875.109985,865.210022,865.590027,800,0.0,0.0,
5811,2024-05-07,874.690002,876.940002,869.750000,871.260010,800,0.0,0.0,


### KOSDAQ index and interest rate data plotting
- 금리 : 파란색 라인 그래프
- 녹색/적색 : 코스닥 지수 그래프
- 그리드의 회색 배경 : 지수 폭락 구간 (1. 벤처 거품 붕괴 구간, 2. 미국 금융위기 구간, 3. 코로나 위기 구간)

### Chart
- plotly 의 캔들 챠트 활용
- adjusted_kbf 값은 금리와 지수를 한 챠트에 그리기 위한 vaule 보정

In [7]:
import plotly.graph_objects as go


# 캔들스틱 차트 생성
candlestick = go.Candlestick(x=kosdak_fund_data['Date'],
                             open=kosdak_fund_data['Open'],
                             high=kosdak_fund_data['High'],
                             low=kosdak_fund_data['Low'],
                             close=kosdak_fund_data['Close'],
                             name='Kosdak')


ratio = kosdak_fund_data['Close'] / kosdak_fund_data['KBF']
adjusted_kbf = kosdak_fund_data['KBF'] * ratio.mean() / kosdak_fund_data['Close'].mean()

adjusted_line_chart = go.Scatter(x=kosdak_fund_data['Date'], y=adjusted_kbf*kosdak_fund_data['Close'].mean()/ratio.mean(), mode='lines', line=dict(color='darkblue'), yaxis='y2', name = 'korea bank fund')



# 그래프 레이아웃 설정
layout = go.Layout(title='KOSDAQ Candlestick Chart with Korea Bank Fund', title_x=0.5,
                   xaxis=dict(title='Date'),
                   yaxis=dict(title='Price', side='left'),
                   yaxis2=dict(title='Korea Bank Fund', side='right', overlaying='y', showgrid=False),
                   legend=dict(x=0.85, y=1.1),
                   width=1200, 
                   height=800)



fig = go.Figure(data=[candlestick, adjusted_line_chart], layout=layout)


periods = [('2002-03-25', '2004-08-05'), ('2007-10-10', '2008-11-05'), ('2020-02-27', '2020-04-01')]

# 각 기간에 대해 투명한 회색 박스를 추가
for start, end in periods:
    fig.add_vrect(x0=start, x1=end, 
                  fillcolor='rgba(128,128,128,0.35)',  # 회색에 20% 투명도
                  line=dict(color='rgba(0,0,0,0)', width=0))

fig.show()

  v = v.dt.to_pydatetime()


### Get list of current KOSDAQ listed companies

In [8]:
# 코스닥 상장 종목 목록 가져오기
kosdaq_list = fdr.StockListing('KOSDAQ')
#print(kosdaq_list.head())
kosdaq_list.info

<bound method DataFrame.info of         Code        ISU_CD          Name         Market         Dept   Close  \
0     247540  KR7247540008        에코프로비엠  KOSDAQ GLOBAL        우량기업부  223000   
1     028300  KR7028300002           HLB         KOSDAQ        중견기업부  106900   
2     086520  KR7086520004          에코프로         KOSDAQ        우량기업부  101100   
3     196170  KR7196170005          알테오젠  KOSDAQ GLOBAL      기술성장기업부  174100   
4     348370  KR7348370008            엔켐         KOSDAQ        벤처기업부  268500   
...      ...           ...           ...            ...          ...     ...   
1724  475240  KR7475240008       하나32호스팩         KOSDAQ  SPAC(소속부없음)    2220   
1725  438580  KR7438580003     엔에이치스팩25호         KOSDAQ  SPAC(소속부없음)    2320   
1726  473000  KR7473000008  에스케이증권제12호스팩         KOSDAQ  SPAC(소속부없음)    2100   
1727  032685  KR7032681009         소프트센우         KOSDAQ        중견기업부    9820   
1728  021045  KR7021041009        대호특수강우         KOSDAQ        중견기업부    7970   

     Ch

### Find stocks that rose during the KOSDAQ decline period
- 검색된 종목은 './data/rising_stocks_by_periods.csv' 로 저장
- 전체 종목을 검색하므로 시간이 오래 걸립니다.

In [9]:
import pandas as pd
import FinanceDataReader as fdr

periods = [('2002-03-25', '2004-08-05'), ('2007-10-10', '2008-11-05'), ('2020-02-27', '2020-04-01')]

rising_stocks_total = pd.DataFrame()

# kosdaq_list는 코스닥 종목 리스트를 포함하는 DataFrame 예상
for start_date, end_date in periods:
    rising_stocks = pd.DataFrame()  # 각 기간별로 분석할 빈 데이터프레임
    for idx, row in kosdaq_list.iterrows():
        try:
            # 종목 코드로 주가 데이터 가져오기
            stock_data = fdr.DataReader(row['Code'], start_date, end_date)
            if not stock_data.empty:
                # 시작일과 종료일의 가격 비교
                opening_price = stock_data.iloc[0]['Close']
                closing_price = stock_data.iloc[-1]['Close']
                if closing_price > opening_price:
                    # 가격이 상승한 경우 리스트에 추가
                    rising_stocks = pd.concat([rising_stocks, pd.DataFrame({
                        'Code': [str(row['Code'])],
                        'Name': [row['Name']],
                        'Start Price': [opening_price],
                        'End Price': [closing_price],
                        'Change': [closing_price - opening_price]
                    })], axis=0)
        except Exception as e:
            print(f"Error loading data for {row['Code']} ({row['Name']}): {e}")

    # 결과 데이터프레임에 해당 기간의 데이터 추가
    rising_stocks['Period'] = f"{start_date} to {end_date}"
    rising_stocks_total = pd.concat([rising_stocks_total, rising_stocks], axis=0)

# 최종 결과 출력
print(rising_stocks_total)

# 결과를 CSV 파일로 저장



      Code       Name  Start Price  End Price  Change  \
0   036930    주성엔지니어링       6770.0     7336.0   566.0   
0   074600      원익QnC       1123.0     1343.0   220.0   
0   036810      에프에스티       1806.0     2000.0   194.0   
0   044490         태웅       2224.0     2680.0   456.0   
0   033640        네패스       2592.0     6676.0  4084.0   
..     ...        ...          ...        ...     ...   
0   070300       엑스큐어       7730.0    11133.0  3403.0   
0   016670        디모아       1300.0     1360.0    60.0   
0   070590     한솔인티큐브       3615.0     4560.0   945.0   
0   204630  스튜디오산타클로스       1695.0     1831.0   136.0   
0   106520      노블엠앤비       7342.0     8779.0  1437.0   

                      Period  
0   2002-03-25 to 2004-08-05  
0   2002-03-25 to 2004-08-05  
0   2002-03-25 to 2004-08-05  
0   2002-03-25 to 2004-08-05  
0   2002-03-25 to 2004-08-05  
..                       ...  
0   2020-02-27 to 2020-04-01  
0   2020-02-27 to 2020-04-01  
0   2020-02-27 to 2020-04-01  
0   2

### Stocks that have risen are grouped by stock using Naver stock information
- 시간이 오래 걸립니다.(네이버 스탁에서 종목 및 테마 크롤링)

In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Crawling Function for Industry Classification
def get_industry_name(stock_code:str):
    url = f"https://finance.naver.com/item/main.naver?code={stock_code}"
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        section = soup.find('div', class_='section trade_compare')
        if section:
            industry_name_tag = section.find('em')
            if industry_name_tag:
                industry_text = industry_name_tag.get_text()
                industry_name = industry_text.split(':')[1].split('｜')[0].strip()
                return industry_name
    return "Industry Unknown"

# 업종명을 크롤링하여 새 열에 추가
rising_stocks_total['Industry'] = rising_stocks_total['Code'].apply(get_industry_name)

rising_stocks['Code'] = "'" + rising_stocks['Code'].astype(str)
rising_stocks_total.to_csv("./data/rising_stocks_by_periods.csv")
rising_stocks_total

Unnamed: 0,Code,Name,Start Price,End Price,Change,Period,Industry
0,036930,주성엔지니어링,6770.0,7336.0,566.0,2002-03-25 to 2004-08-05,반도체와반도체장비
0,074600,원익QnC,1123.0,1343.0,220.0,2002-03-25 to 2004-08-05,반도체와반도체장비
0,036810,에프에스티,1806.0,2000.0,194.0,2002-03-25 to 2004-08-05,반도체와반도체장비
0,044490,태웅,2224.0,2680.0,456.0,2002-03-25 to 2004-08-05,에너지장비및서비스
0,033640,네패스,2592.0,6676.0,4084.0,2002-03-25 to 2004-08-05,반도체와반도체장비
...,...,...,...,...,...,...,...
0,070300,엑스큐어,7730.0,11133.0,3403.0,2020-02-27 to 2020-04-01,핸드셋
0,016670,디모아,1300.0,1360.0,60.0,2020-02-27 to 2020-04-01,소프트웨어
0,070590,한솔인티큐브,3615.0,4560.0,945.0,2020-02-27 to 2020-04-01,IT서비스
0,204630,스튜디오산타클로스,1695.0,1831.0,136.0,2020-02-27 to 2020-04-01,방송과엔터테인먼트


### Drawing pie charts by industry by period

In [11]:
import plotly.express as px

for period, group in rising_stocks_total.groupby('Period'):
    industry_count = group['Industry'].value_counts().reset_index()
    industry_count.columns = ['Industry', 'Count']
    total_count = industry_count['Count'].sum()
    industry_count['Percentage'] = (industry_count['Count'] / total_count) * 100

    # Pie Chart
    fig = px.pie(industry_count, values='Percentage', names='Industry', 
                 title=f'Industry Share by Count for {period}')
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()

### Industries rise during overall decline
- 하락한 모든 구간에서의 산업별 비교
- 코로나 시기에 상승한 제약사들이 너무 많아서 비교하기는 적당하지 않음.

In [12]:
industry_count = rising_stocks_total['Industry'].value_counts().reset_index()
industry_count.columns = ['Industry', 'Count']
total_count = industry_count['Count'].sum()
industry_count['Percentage'] = (industry_count['Count'] / total_count) * 100

# Pie Chart
fig = px.pie(industry_count, values='Percentage', names='Industry', 
             title='Industry Share by Count Across All Periods')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [31]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots


bbs_trend = pd.read_csv('./data/code_032800.csv')

# 날짜 형식 변환
bbs_trend['create_date'] = pd.to_datetime(bbs_trend['create_date'])
kosdaq_hist['Date'] = pd.to_datetime(kosdaq_hist['Date'])

# 게시글 시작 날짜 찾기
start_date = bbs_trend['create_date'].min()

# 코스닥 데이터에서 게시글 시작 날짜 이후 데이터만 필터링
kosdaq_hist_filtered = kosdaq_hist[kosdaq_hist['Date'] >= start_date]

# RSI 계산
delta = kosdaq_hist_filtered['Close'].diff()
gain = (delta.where(delta > 0, 0)).ewm(span=14, adjust=False).mean()
loss = (-delta.where(delta < 0, 0)).ewm(span=14, adjust=False).mean()
rsi = 100 - (100 / (1 + gain / loss))
kosdaq_hist_filtered['RSI'] = rsi

# 게시글 수 계산
posts_per_day = bbs_trend.groupby(bbs_trend['create_date'].dt.date).size().reset_index(name='count')

# 다중 서브플롯 생성
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
                    vertical_spacing=0.1, subplot_titles=('Kosdaq and Post Count', 'RSI'),
                    specs=[[{"secondary_y": True}], [{}]])

# 코스닥 캔들스틱 차트
candlestick = go.Candlestick(x=kosdaq_hist_filtered['Date'],
                             open=kosdaq_hist_filtered['Open'],
                             high=kosdaq_hist_filtered['High'],
                             low=kosdaq_hist_filtered['Low'],
                             close=kosdaq_hist_filtered['Close'],
                             name='Kosdaq')

# 게시글 수를 일자별로 나타내는 라인 차트
bbs_line_chart = go.Scatter(x=posts_per_day['create_date'], y=posts_per_day['count'],
                            mode='lines', line=dict(color='darkblue'), name='Post Count')

# RSI 지표 그래프
rsi_chart = go.Scatter(x=kosdaq_hist_filtered['Date'], y=kosdaq_hist_filtered['RSI'],
                       mode='lines', line=dict(color='red'), name='RSI')

# 상단 차트에 코스닥 캔들스틱과 게시글 수 라인 차트 추가
fig.add_trace(candlestick, row=1, col=1, secondary_y=False)
fig.add_trace(bbs_line_chart, row=1, col=1, secondary_y=True)

# 하단 차트에 RSI 그래프 추가
fig.add_trace(rsi_chart, row=2, col=1)

# 레이아웃 설정
fig.update_layout(xaxis_rangeslider_visible=False, height=1200, showlegend=False,
                  yaxis=dict(title='Kosdaq Price'),
                  yaxis2=dict(title='Post Count', overlaying='y', side='right'),
                  yaxis3=dict(title='RSI'))

# 서브플롯 각각에 대한 축 속성 설정
fig.update_yaxes(title_text="Kosdaq Price", row=1, col=1)
fig.update_yaxes(title_text="Post Count", secondary_y=True, row=1, col=1)
fig.update_yaxes(title_text="RSI", row=2, col=1)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

