In [None]:
from flask import Flask, jsonify
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.engine import URL

def get_recruitment_analysis_data():
    # 你原本的 db_config
    db_config = {
        "host": "localhost",
        "user": "root",
        "password": "password",
        "database": "project_db",
        "port": 3306,
        "charset": "utf8mb4",
    }

    # 建議用 URL.create，避免密碼特殊字元造成連線字串解析問題
    url = URL.create(
        drivername="mysql+pymysql",
        username=db_config["user"],
        password=db_config["password"],
        host=db_config["host"],
        port=db_config["port"],
        database=db_config["database"],
        query={"charset": db_config["charset"]},
    )

    engine = create_engine(
        url,
        pool_pre_ping=True,  # 避免連線閒置被 MySQL 斷線
        future=True,
    )

    # 1. 每個國家自己的合法與虛假招募
    sql_country = """
    SELECT country, 
           SUM(CASE WHEN fraudulent = 0 THEN 1 ELSE 0 END) AS legitimate,
           SUM(CASE WHEN fraudulent = 1 THEN 1 ELSE 0 END) AS fraudulent,
           COUNT(*) AS total
    FROM temp_raw_data 
    WHERE country IS NOT NULL AND country != 'Unknown'
    GROUP BY country
    ORDER BY total DESC 
    LIMIT 10;
    """

    # 2. 是否遠距、有無公司標誌各自合法及虛假招募
    sql_features = """
    SELECT 
        SUM(CASE WHEN telecommuting = 1 AND fraudulent = 0 THEN 1 ELSE 0 END) as tele_legit,
        SUM(CASE WHEN telecommuting = 1 AND fraudulent = 1 THEN 1 ELSE 0 END) as tele_fraud,
        SUM(CASE WHEN telecommuting = 0 AND fraudulent = 0 THEN 1 ELSE 0 END) as office_legit,
        SUM(CASE WHEN telecommuting = 0 AND fraudulent = 1 THEN 1 ELSE 0 END) as office_fraud,
        SUM(CASE WHEN has_company_logo = 1 AND fraudulent = 0 THEN 1 ELSE 0 END) as logo_legit,
        SUM(CASE WHEN has_company_logo = 1 AND fraudulent = 1 THEN 1 ELSE 0 END) as logo_fraud,
        SUM(CASE WHEN has_company_logo = 0 AND fraudulent = 0 THEN 1 ELSE 0 END) as no_logo_legit,
        SUM(CASE WHEN has_company_logo = 0 AND fraudulent = 1 THEN 1 ELSE 0 END) as no_logo_fraud
    FROM temp_raw_data;
    """

    # 3. 學歷、產業在合法與在虛假之間的關係
    sql_edu_ind = """
    SELECT edu_level, industry_group, 
           SUM(CASE WHEN fraudulent = 0 THEN 1 ELSE 0 END) AS legitimate_count,
           SUM(CASE WHEN fraudulent = 1 THEN 1 ELSE 0 END) AS fraudulent_count
    FROM temp_raw_data 
    WHERE edu_level != 'Unknown' AND industry_group != 'Unknown'
    GROUP BY edu_level, industry_group;
    """

    # 4. 就業類型在合法與虛假招募資料比例
    sql_emp_type = """
    SELECT employment_type, 
           SUM(CASE WHEN fraudulent = 0 THEN 1 ELSE 0 END) AS legitimate,
           SUM(CASE WHEN fraudulent = 1 THEN 1 ELSE 0 END) AS fraudulent
    FROM temp_raw_data 
    WHERE employment_type != 'Unknown'
    GROUP BY employment_type;
    """

    # 5. 虛假招募資料裡前五名高的產業別
    sql_top_industries = """
    SELECT industry_group, COUNT(*) as fraud_count 
    FROM temp_raw_data 
    WHERE fraudulent = 1 AND industry_group != 'Unknown'
    GROUP BY industry_group 
    ORDER BY fraud_count DESC 
    LIMIT 5;
    """

    try:
        # pandas 會自行管理 connection；你也可以 with engine.connect() 更顯式
        df_country = pd.read_sql_query(sql_country, engine)
        df_features = pd.read_sql_query(sql_features, engine)
        df_edu_ind = pd.read_sql_query(sql_edu_ind, engine)
        df_emp_type = pd.read_sql_query(sql_emp_type, engine)
        df_top_industries = pd.read_sql_query(sql_top_industries, engine)

        result = {
            "data": {
                "country": df_country.to_dict(orient='columns'),
                "features": df_features.to_dict(orient='columns'),
                "education_industry": df_edu_ind.to_dict(orient='columns'),
                "employment_type": df_emp_type.to_dict(orient='columns'),
                "top_industries": df_top_industries.to_dict(orient='columns')
            }
        }
        return jsonify(result)
    
    except Exception as e:
        print(f"錯誤: {e}")
        return None

    finally:
        engine.dispose()  # 關閉連線池（腳本型程式建議做）

# 測試
data_to_json = get_recruitment_analysis_data()

print(data_to_json)



錯誤: (pymysql.err.OperationalError) (2003, "Can't connect to MySQL server on 'localhost' ([Errno 111] Connection refused)")
(Background on this error at: https://sqlalche.me/e/20/e3q8)
None
None
None
None
None
