In [8]:
import  pandas as pd
import numpy as np
from sqlalchemy import create_engine, text

In [7]:
# 1. 파일 로드
path = r"C:\Users\MetaM\PycharmProjects\investment\data\DataGuide_Ratio.xlsx"
raw_df = pd.read_excel(path, sheet_name='PSR', header=None)

# 2. Symbol, Name, Indicator 추출
symbol_row = 8
name_row = 9
item_name_row = 12
start_data_row = 13

symbols = raw_df.iloc[symbol_row, 2:].values
names = raw_df.iloc[name_row, 2:].values
indicators = raw_df.iloc[item_name_row, 2:].values

# 3. 날짜 데이터 추출 + to_datetime 처리 (오류 무시)
raw_dates = raw_df.iloc[start_data_row:, 0]
parsed_dates = pd.to_datetime(raw_dates, format="%Y-%m-%d", errors='coerce')

# 4. long format으로 정리
records = []
for col_idx, symbol in enumerate(symbols):
    if pd.isna(symbol):
        continue
    name = names[col_idx]
    indicator = indicators[col_idx]
    values = raw_df.iloc[start_data_row:, col_idx + 2].values
    for date, value in zip(parsed_dates, values):
        if pd.isna(date) or pd.isna(value):
            continue
        records.append({
            'date': date,
            'symbol': symbol,
            'company_name': name,
            'indicator': indicator,
            'value': value
        })

# 5. 결과 DataFrame
clean_df = pd.DataFrame(records)
print(clean_df.head())


# [["symbol", "company_name", "date", "indicator", "value"]]

        date   symbol company_name      indicator    value
0 2004-01-31  A000660       SK하이닉스  수정PSR(연율화)(배)  0.76583
1 2004-02-29  A000660       SK하이닉스  수정PSR(연율화)(배)  0.76754
2 2004-03-31  A000660       SK하이닉스  수정PSR(연율화)(배)  1.04821
3 2004-04-30  A000660       SK하이닉스  수정PSR(연율화)(배)  0.78614
4 2004-05-31  A000660       SK하이닉스  수정PSR(연율화)(배)  0.74320


In [11]:
## valuation 지표 DB 업로드
def upload_fs_data_to_db(df: pd.DataFrame, db_info: dict, table_name: str = "korea_fs_data", chunk_size: int = 1000):
    # ✅ 날짜 변환
    df['date'] = pd.to_datetime(df['date'])

    # ✅ 결측치 및 inf 처리
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.where(pd.notnull(df), None)

    # ✅ DB 연결
    engine = create_engine(
        f"mysql+pymysql://{db_info['user']}:{db_info['password']}@{db_info['host']}:{db_info['port']}/{db_info['database']}"
    )
    conn = engine.raw_connection()
    cursor = conn.cursor()

    # ✅ 테이블 생성 쿼리
    create_table_sql = f"""
    CREATE TABLE IF NOT EXISTS `{table_name}` (
        `date` DATE,
        `symbol` VARCHAR(20),
        `company_name` VARCHAR(50),
        `indicator` VARCHAR(100),
        `value` FLOAT,
        PRIMARY KEY (`date`, `symbol`, `indicator`)
    );
    """
    cursor.execute(create_table_sql)
    conn.commit()

    # ✅ 데이터 삽입 쿼리
    insert_sql = f"""
    INSERT INTO `{table_name}` (`date`, `symbol`, `ccmpany_name`, `indicator`, `value`)
    VALUES (%s, %s, %s, %s, %s)
    ON DUPLICATE KEY UPDATE
        `name` = VALUES(`name`),
        `value` = VALUES(`value`);
    """

    data_tuples = df[['date', 'symbol', 'company_name', 'indicator', 'value']].values.tolist()

    # ✅ Chunk 단위로 업로드
    for i in range(0, len(data_tuples), chunk_size):
        chunk = data_tuples[i:i+chunk_size]
        cursor.executemany(insert_sql, chunk)
        conn.commit()

    cursor.close()
    conn.close()
    print(f"✅ {len(df)} rows uploaded to `{table_name}`.")


# DB 정보
db_info = {
    'host': 'hystox74.synology.me',
    # 'host': '192.168.0.230',
    'port': 3307,
    'user': 'stox7412',
    'password': 'Apt106503!~',
    'database': 'investar'
}

upload_fs_data_to_db(clean_df, db_info)

OperationalError: (1054, "Unknown column 'ccmpany_name' in 'field list'")