In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://en.wikipedia.org/wiki/KOSPI_200"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# 테이블 추출
table = soup.find("table", {"id": "constituents"})
rows = table.find_all("tr")[1:]  # 헤더 제외

data = []
for row in rows:
    cols = row.find_all("td")
    if len(cols) < 3:
        continue
    company = cols[0].text.strip()
    symbol = cols[1].text.strip()
    gics_sector = cols[2].text.strip()
    data.append([company, symbol, gics_sector])

# DataFrame 생성
df = pd.DataFrame(data, columns=["Company", "Symbol", "GICS Sector"])
print(df.head())  # 상위 5개 미리보기

# CSV로 저장 (선택)
df.to_csv("kospi200_gics.csv", index=False)


              Company  Symbol       GICS Sector
0        Amorepacific  090430  Consumer Staples
1  Amorepacific Group  002790  Consumer Staples
2                 APR  278470  Consumer Staples
3          BGF Retail  282330  Consumer Staples
4       BNK Financial  138930        Financials


In [19]:
krx = pd.read_csv('csv_files/KRX_종목명.csv', encoding= 'cp949')
krx = krx[['단축코드', '한글 종목약명', '영문 종목명']]
filtered_krx = krx[krx['단축코드'].isin(df['Symbol'])]

df = df.rename(columns={'Symbol': '단축코드'})

# 병합 수행: df (KOSPI200 + GICS) + krx_df (한글 종목명 등)
kospi200_sector_df = pd.merge(df, filtered_krx, on='단축코드', how='left')
kospi200_sector_df = kospi200_sector_df[['단축코드' , 'GICS Sector', '한글 종목약명', '영문 종목명']]
kospi200_sector_df


Unnamed: 0,단축코드,GICS Sector,한글 종목약명,영문 종목명
0,090430,Consumer Staples,아모레퍼시픽,AMOREPACIFIC CORPORATION
1,002790,Consumer Staples,아모레퍼시픽홀딩스,AMOREPACIFIC Holdings
2,278470,Consumer Staples,에이피알,APR
3,282330,Consumer Staples,BGF리테일,BGF Retail
4,138930,Financials,BNK금융지주,BNK Financial Group Inc.
...,...,...,...,...
195,316140,Financials,우리금융지주,WooriFinancialGroup
196,008730,Steels & Materials,율촌화학,YoulchonChemical
197,111770,Consumer Discretionary,영원무역,Youngone Corporation
198,009970,Consumer Discretionary,영원무역홀딩스,YoungoneHoldings
