In [27]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

import json
from google.cloud import storage
from google.colab import auth

auth.authenticate_user()
storage_client = storage.Client(project='sayonzone-ai')
bucket_name = 'sayouzone-ai-stocks'
bucket = storage_client.get_bucket(bucket_name)

import io
import re

In [28]:
url = 'https://comp.fnguide.com/SVO2/ASP/SVD_main.asp?pGB=1&gicode=A005930'

request = requests.get(url)
soup = BeautifulSoup(request.text, 'html.parser')

In [29]:
tables = soup.find_all('table')

market_conditions = tables[0]

In [30]:
df = pd.read_html(io.StringIO(str(market_conditions)))
original_dict = df[0].dropna().to_dict(orient='tight')
my_list = original_dict['data']
values_list = []
for text in my_list:
  first_values = text[1::2]
  final_values = []
  for value in first_values:
    if '/' in value:
      split_values = value.replace(' ', '').split('/')
      final_values.extend(split_values)
    else:
      final_values.append(value)
  values_list.append(final_values)
values = [item for sublist in values_list for item in sublist]
json_dict = {
    "종가(원)": values[0],
    "전일대비(원)": values[1],
    "수익률(%)": values[2],
    "거래량(주))": values[3],
    "최고가(52주)": values[4],
    "최저가(52주)": values[5],
    "거래대금(억원)": values[6],
    "수익률(1M)": values[7],
    "수익률(3M)": values[8],
    "수익률(6M)": values[9],
    "수익률(1Y)": values[10],
    "외국인지분율(%)": values[11],
    "시가총액(상장예정포함,억원)": values[12],
    "베타(1년)": values[13],
    "시가총액(보통주,억원)": values[14],
    "액면가(원)": values[15],
    "발행주식수(보통주)": values[16],
    "발행주식수(우선주)": values[17],
    "종가(NXT)": values[18],
    "유동주식수(주)": values[19],
    "유동비율(%)": values[20],
    "거래량(NXT, 주)": values[21],
    "거래대금(NXT,억원)": values[22],
}
blob = bucket.blob('FnGuide/005930/005930_market_status_20250915.json')
market_status = json.dumps(json_dict, ensure_ascii=False, indent=4)
blob.upload_from_string(market_status, content_type='application/json')

In [31]:
df = pd.read_html(io.StringIO(str(tables[1])))[0]
blob = bucket.blob('FnGuide/005930/005930_earning_issue_20250915.json')
earning_issue = df.to_json(orient='records', force_ascii=False, indent=4)
blob.upload_from_string(earning_issue, content_type='application/json')

In [32]:
df = pd.read_html(io.StringIO(str(tables[2])))[0]
df.set_index('운용사명', inplace=True)
file_name = '005930_holdings_status.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df.to_csv(gcs_path, index=False, encoding='utf-8-sig')

In [33]:
df = pd.read_html(io.StringIO(str(tables[3])))[0]
df.dropna(inplace=True)
df.set_index('항목', inplace=True)
file_name = '005930_governance.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df.to_csv(gcs_path, index=False, encoding='utf-8-sig')

In [34]:
df = pd.read_html(io.StringIO(str(tables[4])))[0]
df = df.fillna(0)
df.set_index('주주구분', inplace=True)
file_name = '005930_shareholders.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df.to_csv(gcs_path, index=False, encoding='utf-8-sig')

In [35]:
# tables[5] (기업어음)은 데이터 부족으로 건너뜀
df = pd.read_html(io.StringIO(str(tables[6])))[0].T
df['bond_rating'] = df[0].str[:3]
df['rating_date'] = df[0].str[3:].apply(lambda x: re.sub(r'[^0-9]', '', x))
df.drop(columns=[0], inplace=True)
file_name = '005930_bond_rating.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df.to_csv(gcs_path, index=False, encoding='utf-8-sig')

In [36]:
df = pd.read_html(io.StringIO(str(tables[7])))[0]
analysis = df.to_json(orient='records', force_ascii=False, indent=4)
blob = bucket.blob('FnGuide/005930/005930_analysis_20250915.json')
blob.upload_from_string(analysis, content_type='application/json')

In [37]:
df = pd.read_html(io.StringIO(str(tables[8])))[0]
df.set_index('구분', inplace=True)
file_name = '005930_industry_comparison.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df.to_csv(gcs_path, index=False, encoding='utf-8-sig')

In [38]:
# tables[9]은 tables[8]과 동일한 테이블
df = pd.read_html(io.StringIO(str(tables[10])))[0]
df = df.fillna('없음')
df['IFRS'] = df['IFRS(연결)']
df.set_index('IFRS', inplace=True)
df_annual = df['Annual']
df_netquarter = df['Net Quarter']

In [39]:
df = pd.read_html(io.StringIO(str(tables[11])))[0]
df = df.fillna('없음')
df['IFRS'] = df['IFRS(연결)']
df.set_index('IFRS', inplace=True)
df_annual = df['Annual']
file_name = '005930_financialhighlight_annual.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df_annual.to_csv(gcs_path, index=False, encoding='utf-8-sig')

In [40]:
df = pd.read_html(io.StringIO(str(tables[12])))[0]
df = df.fillna('없음')
df['IFRS'] = df['IFRS(연결)']
df.set_index('IFRS', inplace=True)
df_netquarter = df['Net Quarter']
file_name = '005930_financialhighlight_netquarter.csv'
gcs_path = f'gs://{bucket_name}/FnGuide/005930/{file_name}'
df_netquarter.to_csv(gcs_path, index=False, encoding='utf-8-sig')