In [4]:
# PostgreSQLからさまざまなデータを取得する

import os
import pandas as pd
import psycopg2
from psycopg2.extras import RealDictCursor
from sqlalchemy import create_engine
from dotenv import load_dotenv
import json
from datetime import datetime

# 環境変数読み込み
load_dotenv()



True

In [5]:
# =============================================================================
# 1. 接続設定
# =============================================================================

print("🔌 データベース接続を設定中...")

# PostgreSQL接続設定
pg_config = {
    'host': os.getenv('POSTGRES_HOST', 'localhost'),
    'database': os.getenv('POSTGRES_DB', 'stallion_db'),
    'user': os.getenv('POSTGRES_USER', 'stallion_user'),
    'password': os.getenv('POSTGRES_PASSWORD'),
    'port': int(os.getenv('POSTGRES_PORT', '5432'))
}

# SQLAlchemy エンジン作成（pandasで使用）
DATABASE_URL = f"postgresql://{pg_config['user']}:{pg_config['password']}@{pg_config['host']}:{pg_config['port']}/{pg_config['database']}"
engine = create_engine(DATABASE_URL)

print("✅ 接続設定完了")

🔌 データベース接続を設定中...
✅ 接続設定完了


In [8]:
# =============================================================================
# pandas.read_sql
# =============================================================================

def query(sql):
    """SQLクエリを実行してDataFrameで結果を返す"""
    return pd.read_sql(sql, engine)

def show_tables():
    """全テーブル一覧を表示"""
    sql = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';"
    return query(sql)

def desc(table_name):
    """テーブル構造を表示"""
    sql = f"""
    SELECT column_name, data_type, is_nullable, column_default 
    FROM information_schema.columns 
    WHERE table_name = '{table_name}' 
    ORDER BY ordinal_position;
    """
    return query(sql)

def count_all(table_name):
    """テーブルの行数を取得"""
    sql = f"SELECT COUNT(*) as count FROM {table_name};"
    return query(sql).iloc[0, 0]

---
以下、単発のSQLを実行

In [98]:
# queryの中にSQLを書いて実行する

query ("""
      
    SELECT
        jockey_name,
        COUNT(*) AS win_count
    FROM race_results
    WHERE finish_position = 1
    GROUP BY jockey_name
    ORDER BY win_count DESC
    LIMIT 20;
      
    """)

Unnamed: 0,jockey_name,win_count
0,ルメール,46
1,川田将雅,27
2,Ｍ．デム,24
3,福永祐一,19
4,武豊,18
5,岩田康誠,16
6,池添謙一,16
7,戸崎圭太,12
8,蛯名正義,12
9,横山典弘,9


---
以下SQL練習 with claude

In [99]:
query("""
    WITH jockey_horse_wins AS (
        SELECT 
            jockey_name,
            horse_name,
            COUNT(*) as wins_together
        FROM race_results 
        WHERE finish_position = 1
        GROUP BY jockey_name, horse_name
    ),
    ranked_combinations AS (
        SELECT 
            jockey_name,
            horse_name,
            wins_together,
            ROW_NUMBER() OVER (PARTITION BY jockey_name ORDER BY wins_together DESC) as rank
        FROM jockey_horse_wins
    )
    SELECT 
        jockey_name,
        horse_name,
        wins_together
    FROM ranked_combinations
    WHERE rank = 1
    ORDER BY wins_together DESC
    LIMIT 20;
""")

Unnamed: 0,jockey_name,horse_name,wins_together
0,ルメール,アーモンドアイ,8
1,武豊,キタサンブラック,6
2,池添謙一,オルフェーヴル,6
3,福永祐一,コントレイル,5
4,蛯名正義,アパパネ,4
5,川田将雅,リバティアイランド,4
6,岩田康誠,ロードカナロア,4
7,内田博幸,ゴールドシップ,4
8,武幸四郎,メイショウマンボ,3
9,松山弘平,デアリングタクト,3


In [103]:
# 各競馬場での騎手勝利ランキング
# 各競馬場で最も勝利数の多い騎手TOP3を取得してください

query("""
    
    WITH track_jockey_wins AS (
        SELECT
            r.track_name,
            rr.jockey_name,
            COUNT(*) AS wins
        FROM races r
        JOIN race_results rr ON r.race_id = rr.race_id
        WHERE rr.finish_position = 1
        GROUP BY r.track_name, rr.jockey_name
    ),
    ranked_jockeys AS (
        SELECT
            track_name,
            jockey_name,
            wins,
            ROW_NUMBER() OVER (PARTITION BY track_name ORDER BY wins DESC) AS rank
        FROM track_jockey_wins
    )
    SELECT
        track_name,
        jockey_name,
        wins
    FROM ranked_jockeys
    WHERE rank <= 3
    ORDER BY track_name, rank;
      
""")

Unnamed: 0,track_name,jockey_name,wins
0,中京,福永祐一,3
1,中京,藤田伸二,3
2,中京,武豊,2
3,中京1,幸英明,1
4,中山,武豊,8
5,中山,福永祐一,7
6,中山,ルメール,7
7,京都,武豊,21
8,京都,ルメール,12
9,京都,横山典弘,9


In [112]:
# 17.馬の連続勝利記録
# 各馬の最新5戦の成績（着順）を時系列順で取得してください

query("""
    WITH horse_total_prize AS (
        SELECT 
            id as horse_id,
            name_ja,
            COALESCE((profile->>'total_prize_central')::bigint, 0) as total_prize
        FROM horses
        WHERE profile->>'total_prize_central' IS NOT NULL
    ),
    horse_prize_ranking AS (
        SELECT 
            horse_id,
            name_ja,
            total_prize,
            ROW_NUMBER() OVER (ORDER BY total_prize DESC) as prize_rank
        FROM horse_total_prize
        ORDER BY total_prize DESC
        LIMIT 20
    ),
    latest_5_races_per_horse AS (
        SELECT
            rr.horse_id,
            rr.horse_name,
            r.track_name,
            r.race_name,
            r.race_date,
            rr.finish_position,
            rr.popularity,
            rr.jockey_name,
            ROW_NUMBER() OVER(PARTITION BY rr.horse_id ORDER BY r.race_date DESC) AS race_rank
        FROM race_results rr
        JOIN races r ON rr.race_id = r.race_id
        WHERE rr.finish_position IS NOT NULL
    )
    SELECT
        hpr.prize_rank,
        hpr.name_ja,
        hpr.total_prize,
        lr.track_name,
        lr.race_name,
        lr.race_date,
        lr.finish_position,
        lr.popularity,
        lr.jockey_name,
        lr.race_rank
    FROM horse_prize_ranking hpr
    JOIN latest_5_races_per_horse lr ON hpr.horse_id = lr.horse_id
    WHERE lr.race_rank <= 5
    ORDER BY hpr.prize_rank, lr.race_rank;
""")

Unnamed: 0,prize_rank,name_ja,total_prize,track_name,race_name,race_date,finish_position,popularity,jockey_name,race_rank
0,1,キタサンブラック,180684,中山,第62回有馬記念(GI),2017-12-24,1,1,武豊,1
1,1,キタサンブラック,180684,東京,第37回ジャパンカップ(GI),2017-11-26,3,1,武豊,2
2,1,キタサンブラック,180684,東京,第156回天皇賞(秋)(GI),2017-10-29,1,1,武豊,3
3,1,キタサンブラック,180684,阪神,第58回宝塚記念(GI),2017-06-25,9,1,武豊,4
4,1,キタサンブラック,180684,京都,第155回天皇賞(春)(GI),2017-04-30,1,1,武豊,5
...,...,...,...,...,...,...,...,...,...,...
95,20,ダイワメジャー,100223,中山,第52回有馬記念(GI),2007-12-23,3,6,Ｍデムー,1
96,20,ダイワメジャー,100223,京都,第24回マイルチャンピオンS(GI),2007-11-18,1,1,安藤勝己,2
97,20,ダイワメジャー,100223,東京,第136回天皇賞(秋)(GI),2007-10-28,9,3,安藤勝己,3
98,20,ダイワメジャー,100223,阪神,第48回宝塚記念(GI),2007-06-24,12,5,安藤勝己,4


In [115]:

# 18. レースの人気と着順の関係
# 各レースで人気順位と着順の差を計算してください（大波乱度分析）

query ("""

    WITH upset_analysis AS (
        SELECT
            rr.race_id,
            r.race_name,
            r.race_date,
            rr.horse_name,
            rr.popularity,
            rr.finish_position, 
            rr.popularity - rr.finish_position AS upset_degree,
            ABS(rr.popularity - rr.finish_position) AS upset_magnitude,
            CASE
                WHEN rr.finish_position < rr.popularity THEN '好走'
                WHEN rr.finish_position = rr.popularity THEN '人気通り'
                ELSE '凡走'
            END AS performance_type
        FROM race_results rr
        JOIN races r ON r.race_id = rr.race_id
        WHERE rr.popularity IS NOT NULL
            AND rr.finish_position IS NOT NULL
    )
    SELECT
        *,
        CASE
            WHEN upset_magnitude >= 6 THEN '大波乱'
            WHEN upset_magnitude >= 3 THEN '波乱'
            ELSE '順当'
        END AS upset_level
    FROM upset_analysis
    WHERE upset_analysis.finish_position = 1
    ORDER BY race_date DESC
    LIMIT 50;
       


    """)

Unnamed: 0,race_id,race_name,race_date,horse_name,popularity,finish_position,upset_degree,upset_magnitude,performance_type,upset_level
0,202509030411,第66回宝塚記念(GI),2025-06-15,メイショウタバル,7,1,6,6,好走,大波乱
1,202505030211,第75回安田記念(GI),2025-06-08,ジャンタルマンタル,2,1,1,1,好走,順当
2,202505021211,第92回東京優駿(GI),2025-06-01,クロワデュノール,1,1,0,0,人気通り,順当
3,202505021011,第86回優駿牝馬(GI),2025-05-25,カムニャック,4,1,3,3,好走,波乱
4,202505020811,第20回ヴィクトリアマイル(GI),2025-05-18,アスコリピチェーノ,1,1,0,0,人気通り,順当
5,202505020611,第30回NHKマイルカップ(GI),2025-05-11,パンジャタワー,9,1,8,8,好走,大波乱
6,202508020411,第171回天皇賞(春)(GI),2025-05-04,ヘデントール,1,1,0,0,人気通り,順当
7,202506030811,第85回皐月賞(GI),2025-04-20,ミュージアムマイル,3,1,2,2,好走,順当
8,202509020611,第85回桜花賞(GI),2025-04-13,エンブロイダリー,3,1,2,2,好走,順当
9,202509020411,第69回大阪杯(GI),2025-04-06,ベラジオオペラ,2,1,1,1,好走,順当


In [185]:
# 📊 レベル8: 複雑な集計・分析
# 距離適性分析
# 各馬が最も勝率の高い距離帯（1200m以下、1201-1600m、1601-2000m、2001m以上）を判定してください

# 17.馬の連続勝利記録
# 各馬の最新5戦の成績（着順）を時系列順で取得してください

query("""
      
    WITH horse_distance_performance AS (
        SELECT
            rr.horse_id,
            rr.horse_name, 
            CASE
                WHEN r.distance::INTEGER <= 1400 THEN '短距離'
                WHEN r.distance::INTEGER <= 1800 THEN 'マイル'
                WHEN r.distance::INTEGER <= 2400 THEN '中距離'
                ELSE '長距離'
            END AS distance_category,
            COUNT (*) AS total_runs,
            COUNT (CASE WHEN rr.finish_position = 1 THEN 1 END) AS wins,
            ROUND (
                COUNT(CASE WHEN rr.finish_position = 1 THEN 1 END) * 100.0 / COUNT(*),
                2
            ) AS win_rate
        FROM race_results rr
        JOIN races r ON rr.race_id = r.race_id
        GROUP BY rr.horse_id, rr.horse_name, distance_category
        HAVING COUNT(*) >= 3
    ),
    best_distance_per_horse AS (
        SELECT
            horse_id,
            horse_name,
            distance_category,
            total_runs,
            wins,
            win_rate,
            ROW_NUMBER() OVER (PARTITION BY horse_id ORDER BY win_rate DESC, total_runs DESC) AS rank
        FROM horse_distance_performance
    )
    SELECT
        h.name_ja AS horse_name,
        bdph.distance_category AS best_distance,
        bdph.total_runs,
        bdph.wins,
        bdph.win_rate,
        CONCAT((h.profile->>'total_prize_central')::bigint, '円') AS total_prize
    FROM best_distance_per_horse bdph
    JOIN horses h ON bdph.horse_id = h.id
    WHERE bdph.rank = 1
        AND (h.profile->>'total_prize_central') IS NOT NULL
        AND (h.profile->>'total_prize_central')::bigint != '0'
    ORDER BY (h.profile->>'total_prize_central')::BIGINT DESC
    LIMIT 20;
    
""")

Unnamed: 0,horse_name,best_distance,total_runs,wins,win_rate,total_prize
0,キタサンブラック,長距離,6,4,66.67,180684円
1,テイエムオペラオー,中距離,14,8,57.14,180518円
2,イクイノックス,中距離,6,4,66.67,170655円
3,ドウデュース,中距離,9,4,44.44,170347円
4,アーモンドアイ,中距離,6,6,100.0,150956円
5,ディープインパクト,中距離,6,6,100.0,140455円
6,ゴールドシップ,長距離,11,6,54.55,130776円
7,ブエナビスタ,マイル,5,4,80.0,130643円
8,ジェンティルドンナ,マイル,4,3,75.0,130621円
9,ウオッカ,マイル,9,5,55.56,130487円


In [155]:
# 20. 騎手×馬×競馬場の最強組み合わせ
# 騎手・馬・競馬場の3要素の組み合わせで最も勝率の高いTOP10を取得してください

query ("""

    WITH jockey_horse_track_performance AS (
        SELECT
            rr.jockey_name,
            rr.horse_id,
            r.track_name,
            COUNT(*) AS total_runs,
            COUNT(CASE WHEN finish_position = 1 THEN 1 END) AS wins,
            COUNT(CASE WHEN finish_position = 1 THEN 1 END) * 100.0 / COUNT(*) AS win_rate
        FROM race_results rr
        JOIN races r ON r.race_id = rr.race_id
        WHERE jockey_name IS NOT NULL AND horse_id IS NOT NULL AND track_name IS NOT NULL
        GROUP BY rr.jockey_name, rr.horse_id, r.track_name
        HAVING COUNT(*) >= 3
        )
    SELECT
        h.name_ja AS horse_name,
        (h.profile->>'total_prize_central')::bigint AS total_prize,
        jhtp.jockey_name,
        jhtp.track_name,
        jhtp.total_runs,
        jhtp.wins,
        ROUND(jhtp.win_rate, 2) AS win_rate
    FROM jockey_horse_track_performance AS jhtp
    JOIN horses h ON jhtp.horse_id = h.id
    WHERE (h.profile->>'total_prize_central') IS NOT NULL
        AND (h.profile->>'total_prize_central')::bigint != '0'
        AND win_rate >= 30.0
    ORDER BY win_rate DESC, (h.profile->>'total_prize_central')::bigint DESC
       
""")

Unnamed: 0,horse_name,total_prize,jockey_name,track_name,total_runs,wins,win_rate
0,イクイノックス,170655,ルメール,東京,3,3,100.00
1,ディープインパクト,140455,武豊,京都,3,3,100.00
2,スペシャルウィーク,100262,武豊,東京,3,3,100.00
3,ナリタブライアン,90742,南井克巳,中山,3,3,100.00
4,フィエールマン,70926,ルメール,京都,3,3,100.00
...,...,...,...,...,...,...,...
83,エルウェーウィン,20560,南井克巳,中山,3,1,33.33
84,マイネルマックス,20279,佐藤哲三,中山,3,1,33.33
85,アカイイト,20002,幸英明,阪神,3,1,33.33
86,シャンパンカラー,10548,内田博幸,東京,3,1,33.33


In [164]:
# 🏆 重賞データ分析 - 本格難易度問題集
# 📊 問題1: 時代変遷分析
# 各年代（1990年代、2000年代、2010年代、2020年代）で最も活躍した上位3騎手を特定し、時代を通じて一貫して活躍している騎手を分析してください。
# 条件：
# G1-G3重賞での勝利数で評価
# 各年代で最低10勝以上
# 4つすべての年代にランクインした騎手がいるかチェック

query ("""

    WITH decade_jockey_in_board AS (
       SELECT
            rr.jockey_name,
            rr.horse_name,
            r.track_name,
            rr.finish_position,
            r.grade,
            r.race_date,
            EXTRACT(YEAR FROM r.race_date) AS race_year,
            CASE
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 1990 AND 1999 THEN '1990s'
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2000 AND 2009 THEN '2000s'
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2010 AND 2019 THEN '2010s'
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2020 AND 2029 THEN '2020s'
                ELSE 'その他'
            END AS decade
       FROM race_results rr
       JOIN races r ON rr.race_id = r.race_id
       WHERE rr.finish_position <= 5
    ),
    decade_jockey_points AS (
        SELECT
            jockey_name,
            decade,
            COUNT(*) AS top5_finishes,
            SUM(CASE WHEN finish_position = 1 THEN 10
                     WHEN finish_position = 2 THEN 7
                     WHEN finish_position = 3 THEN 4
                     WHEN finish_position = 4 THEN 2
                     WHEN finish_position = 5 THEN 1
                     ELSE 0 END) 
                AS finish_position_points,
            SUM(CASE WHEN grade = 'G1' THEN 5
                     WHEN grade = 'G2' THEN 2
                     WHEN grade = 'G3' THEN 1
                     ELSE 0 END) 
                AS grade_points
        FROM decade_jockey_in_board
        WHERE decade != 'その他'
        GROUP BY jockey_name, decade
    ),
    ranked_jockeys AS (
        SELECT
            jockey_name,
            decade,
            top5_finishes,
            finish_position_points,
            grade_points,
            grade_points * finish_position_points AS total_points,
            ROW_NUMBER() OVER (PARTITION BY decade ORDER BY grade_points * finish_position_points DESC) AS rank
        FROM decade_jockey_points
        WHERE top5_finishes >= 10
        ORDER BY decade, finish_position_points DESC, grade_points DESC
    )
    SELECT
        decade,
        rank,
        jockey_name,
        top5_finishes,
        finish_position_points,
        grade_points,
        total_points
    FROM ranked_jockeys
    WHERE rank <= 5
    ORDER BY decade, rank;
    
""")


Unnamed: 0,decade,rank,jockey_name,top5_finishes,finish_position_points,grade_points,total_points
0,1990s,1,武豊,308,1918,766,1469188
1,1990s,2,岡部幸雄,259,1396,594,829224
2,1990s,3,横山典弘,204,1032,499,514968
3,1990s,4,河内洋,213,1118,454,507572
4,1990s,5,南井克巳,165,905,378,342090
5,2000s,1,武豊,316,1872,800,1497600
6,2000s,2,福永祐一,261,1279,600,767400
7,2000s,3,横山典弘,239,1318,551,726218
8,2000s,4,安藤勝己,211,1175,509,598075
9,2000s,5,四位洋文,196,940,484,454960


In [169]:
# 1. 一貫性スーパースター発掘
# 4年代すべてTOP5の騎手を特定
query("""
WITH your_decade_query AS (
    WITH decade_jockey_in_board AS (
       SELECT
            rr.jockey_name,
            rr.horse_name,
            r.track_name,
            rr.finish_position,
            r.grade,
            r.race_date,
            EXTRACT(YEAR FROM r.race_date) AS race_year,
            CASE
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 1990 AND 1999 THEN '1990s'
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2000 AND 2009 THEN '2000s'
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2010 AND 2019 THEN '2010s'
                WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2020 AND 2029 THEN '2020s'
                ELSE 'その他'
            END AS decade
       FROM race_results rr
       JOIN races r ON rr.race_id = r.race_id
       WHERE rr.finish_position <= 5
    ),
    decade_jockey_points AS (
        SELECT
            jockey_name,
            decade,
            COUNT(*) AS top5_finishes,
            SUM(CASE WHEN finish_position = 1 THEN 10
                     WHEN finish_position = 2 THEN 7
                     WHEN finish_position = 3 THEN 4
                     WHEN finish_position = 4 THEN 2
                     WHEN finish_position = 5 THEN 1
                     ELSE 0 END) 
                AS finish_position_points,
            SUM(CASE WHEN grade = 'G1' THEN 5
                     WHEN grade = 'G2' THEN 2
                     WHEN grade = 'G3' THEN 1
                     ELSE 0 END) 
                AS grade_points
        FROM decade_jockey_in_board
        WHERE decade != 'その他'
        GROUP BY jockey_name, decade
    ),
    ranked_jockeys AS (
        SELECT
            jockey_name,
            decade,
            top5_finishes,
            finish_position_points,
            grade_points,
            grade_points * finish_position_points AS total_points,
            ROW_NUMBER() OVER (PARTITION BY decade ORDER BY grade_points * finish_position_points DESC) AS rank
        FROM decade_jockey_points
        WHERE top5_finishes >= 10
        ORDER BY decade, finish_position_points DESC, grade_points DESC
    )
    SELECT
        decade,
        rank,
        jockey_name,
        top5_finishes,
        finish_position_points,
        grade_points,
        total_points
    FROM ranked_jockeys
    WHERE rank <= 5
    ORDER BY decade, rank
), 
consistency_check AS (
    SELECT 
        jockey_name,
        COUNT(DISTINCT decade) as active_decades,
        STRING_AGG(decade || ':' || rank::text, ' → ' ORDER BY decade) as career_trajectory,
        AVG(total_points) as avg_points_per_decade
    FROM your_decade_query
    GROUP BY jockey_name
)
SELECT * FROM consistency_check 
WHERE active_decades >= 1
ORDER BY avg_points_per_decade DESC;
""")

Unnamed: 0,jockey_name,active_decades,career_trajectory,avg_points_per_decade
0,武豊,4,1990s:1 → 2000s:1 → 2010s:3 → 2020s:5,972026.25
1,岩田康誠,1,2010s:2,870276.0
2,福永祐一,2,2000s:2 → 2010s:1,846732.0
3,岡部幸雄,1,1990s:2,829224.0
4,ルメール,2,2010s:5 → 2020s:1,687179.0
5,横山典弘,2,1990s:3 → 2000s:3,620593.0
6,川田将雅,2,2010s:4 → 2020s:2,609483.0
7,安藤勝己,1,2000s:4,598075.0
8,河内洋,1,1990s:4,507572.0
9,四位洋文,1,2000s:5,454960.0


In [175]:
# 騎手キャリア曲線分析
# 各騎手のピーク年代とキャリア推移
query("""
    WITH your_results AS (
        WITH decade_jockey_in_board AS (
            SELECT
                rr.jockey_name,
                rr.horse_name,
                r.track_name,
                rr.finish_position,
                r.grade,
                r.race_date,
                EXTRACT(YEAR FROM r.race_date) AS race_year,
                CASE
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 1990 AND 1999 THEN '1990s'
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2000 AND 2009 THEN '2000s'
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2010 AND 2019 THEN '2010s'
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2020 AND 2029 THEN '2020s'
                    ELSE 'その他'
                END AS decade
            FROM race_results rr
            JOIN races r ON rr.race_id = r.race_id
            WHERE rr.finish_position <= 5
        ),
        decade_jockey_points AS (
            SELECT
                jockey_name,
                decade,
                COUNT(*) AS top5_finishes,
                SUM(CASE WHEN finish_position = 1 THEN 10
                        WHEN finish_position = 2 THEN 7
                        WHEN finish_position = 3 THEN 4
                        WHEN finish_position = 4 THEN 2
                        WHEN finish_position = 5 THEN 1
                        ELSE 0 END) 
                    AS finish_position_points,
                SUM(CASE WHEN grade = 'G1' THEN 5
                        WHEN grade = 'G2' THEN 2
                        WHEN grade = 'G3' THEN 1
                        ELSE 0 END) 
                    AS grade_points
            FROM decade_jockey_in_board
            WHERE decade != 'その他'
            GROUP BY jockey_name, decade
        ),
        ranked_jockeys AS (
            SELECT
                jockey_name,
                decade,
                top5_finishes,
                finish_position_points,
                grade_points,
                grade_points * finish_position_points AS total_points,
                ROW_NUMBER() OVER (PARTITION BY decade ORDER BY grade_points * finish_position_points DESC) AS rank
            FROM decade_jockey_points
            WHERE top5_finishes >= 10
            ORDER BY decade, finish_position_points DESC, grade_points DESC
        )
        SELECT
            decade,
            rank,
            jockey_name,
            top5_finishes,
            finish_position_points,
            grade_points,
            total_points
        FROM ranked_jockeys
        WHERE rank <= 5
        ORDER BY decade, rank
    ),
    career_analysis AS (
        SELECT 
            jockey_name,
            decade,
            total_points,
            ROW_NUMBER() OVER (PARTITION BY jockey_name ORDER BY total_points DESC) as peak_rank
        FROM your_results
    )
    SELECT 
        jockey_name,
        STRING_AGG(decade || '(' || total_points || ')', ' → ' ORDER BY decade) as career_curve,
        (SELECT decade FROM career_analysis ca2 WHERE ca2.jockey_name = ca.jockey_name AND peak_rank = 1) as peak_decade
    FROM career_analysis ca
    GROUP BY jockey_name
    HAVING COUNT(*) >= 1  -- 3年代以上活躍
    ORDER BY peak_decade, MAX(total_points) DESC;
""")

Unnamed: 0,jockey_name,career_curve,peak_decade
0,岡部幸雄,1990s(829224),1990s
1,河内洋,1990s(507572),1990s
2,南井克巳,1990s(342090),1990s
3,武豊,1990s(1469188) → 2000s(1497600) → 2010s(753279...,2000s
4,横山典弘,1990s(514968) → 2000s(726218),2000s
5,安藤勝己,2000s(598075),2000s
6,四位洋文,2000s(454960),2000s
7,福永祐一,2000s(767400) → 2010s(926064),2010s
8,岩田康誠,2010s(870276),2010s
9,川田将雅,2010s(752250) → 2020s(466716),2010s


In [180]:
# 新世代 vs ベテラン対決
# 2020年代での新世代とベテランの成績比較
query("""
    WITH your_results AS (
        WITH decade_jockey_in_board AS (
            SELECT
                rr.jockey_name,
                rr.horse_name,
                r.track_name,
                rr.finish_position,
                r.grade,
                r.race_date,
                EXTRACT(YEAR FROM r.race_date) AS race_year,
                CASE
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 1990 AND 1999 THEN '1990s'
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2000 AND 2009 THEN '2000s'
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2010 AND 2019 THEN '2010s'
                    WHEN EXTRACT(YEAR FROM r.race_date) BETWEEN 2020 AND 2029 THEN '2020s'
                    ELSE 'その他'
                END AS decade
            FROM race_results rr
            JOIN races r ON rr.race_id = r.race_id
            WHERE rr.finish_position <= 5
        ),
        decade_jockey_points AS (
            SELECT
                jockey_name,
                decade,
                COUNT(*) AS top5_finishes,
                SUM(CASE WHEN finish_position = 1 THEN 10
                        WHEN finish_position = 2 THEN 7
                        WHEN finish_position = 3 THEN 4
                        WHEN finish_position = 4 THEN 2
                        WHEN finish_position = 5 THEN 1
                        ELSE 0 END) 
                    AS finish_position_points,
                SUM(CASE WHEN grade = 'G1' THEN 5
                        WHEN grade = 'G2' THEN 2
                        WHEN grade = 'G3' THEN 1
                        ELSE 0 END) 
                    AS grade_points
            FROM decade_jockey_in_board
            WHERE decade != 'その他'
            GROUP BY jockey_name, decade
        ),
        ranked_jockeys AS (
            SELECT
                jockey_name,
                decade,
                top5_finishes,
                finish_position_points,
                grade_points,
                grade_points * finish_position_points AS total_points,
                ROW_NUMBER() OVER (PARTITION BY decade ORDER BY grade_points * finish_position_points DESC) AS rank
            FROM decade_jockey_points
            WHERE top5_finishes >= 10
            ORDER BY decade, finish_position_points DESC, grade_points DESC
        )
        SELECT
            decade,
            rank,
            jockey_name,
            top5_finishes,
            finish_position_points,
            grade_points,
            total_points
        FROM ranked_jockeys
        WHERE rank <= 20
        ORDER BY decade, rank
    )
    SELECT 
        decade,
        jockey_name,
        total_points as points_2020s,
        CASE 
            WHEN jockey_name IN (SELECT jockey_name FROM your_results WHERE decade = '1990s') THEN 'レジェンド'
            WHEN jockey_name IN (SELECT jockey_name FROM your_results WHERE decade = '2000s') THEN 'ベテラン'
            WHEN jockey_name IN (SELECT jockey_name FROM your_results WHERE decade = '2010s') THEN '中堅'
            ELSE '新世代'
        END as generation_type
    FROM your_results 
    WHERE decade = '2020s'
    ORDER BY total_points DESC;
""")

Unnamed: 0,decade,jockey_name,points_2020s,generation_type
0,2020s,ルメール,682407,中堅
1,2020s,川田将雅,466716,中堅
2,2020s,松山弘平,211200,新世代
3,2020s,横山武史,169043,新世代
4,2020s,武豊,168038,レジェンド
5,2020s,戸崎圭太,154714,中堅
6,2020s,福永祐一,127205,ベテラン
7,2020s,Ｍ．デム,111946,中堅
8,2020s,横山典弘,79800,レジェンド
9,2020s,池添謙一,75440,ベテラン


In [196]:
"""
🚀 問題4: 黄金コンビ発掘システム
騎手×調教師の組み合わせで「隠れた黄金コンビ」を発見してください。以下の条件をすべて満たすコンビを抽出：

過去5年間で20回以上タッグを組んでいる
重賞勝率25%以上
一般的な勝率より統計的に有意に高い成績
G1での勝利経験がある

さらに、各コンビの「得意距離帯」「得意競馬場」も分析。
"""

query ("""

    WITH jockey_trainer_stats AS (
        SELECT
            rr.jockey_name,
            rr.trainer_name,
            COUNT (*) AS total_runs,
            COUNT (CASE WHEN rr.finish_position = 1 THEN 1 END) AS wins,
            COUNT (CASE WHEN rr.finish_position = 1 THEN 1 END) * 100.0 / COUNT(*) AS win_rate,
       
            -- 距離帯別勝率を計算
            COUNT(CASE WHEN r.distance::INTEGER <= 1200 AND rr.finish_position = 1 THEN 1 END) * 100.0 
                / NULLIF(COUNT(CASE WHEN r.distance::INTEGER <= 1200 THEN 1 END), 0) AS sprint_rate,
            COUNT(CASE WHEN r.distance::INTEGER BETWEEN 1201 AND 1800 AND rr.finish_position = 1 THEN 1 END) * 100.0 
                / NULLIF(COUNT(CASE WHEN r.distance::INTEGER BETWEEN 1201 AND 1800 THEN 1 END), 0) AS mile_rate,
            COUNT(CASE WHEN r.distance::INTEGER BETWEEN 1801 AND 2400 AND rr.finish_position = 1 THEN 1 END) * 100.0 
                / NULLIF(COUNT(CASE WHEN r.distance::INTEGER BETWEEN 1801 AND 2400 THEN 1 END), 0) AS middle_rate,
            COUNT(CASE WHEN r.distance::INTEGER > 2400 AND rr.finish_position = 1 THEN 1 END) * 100.0 
                / NULLIF(COUNT(CASE WHEN r.distance::INTEGER > 2400 THEN 1 END), 0) AS long_rate
       
        FROM race_results rr
        JOIN races r ON r.race_id = rr.race_id
        GROUP BY rr.jockey_name, rr.trainer_name
        HAVING COUNT(*) >= 20
            AND COUNT (CASE WHEN r.grade = 'G1' AND rr.finish_position = 1 THEN 1 END) >= 1
    )
    SELECT
        *,
        CASE 
            WHEN GREATEST(COALESCE(sprint_rate, 0), COALESCE(mile_rate, 0), COALESCE(long_rate, 0)) = COALESCE(sprint_rate, 0) THEN '短距離'
            WHEN GREATEST(COALESCE(sprint_rate, 0), COALESCE(mile_rate, 0), COALESCE(long_rate, 0)) = COALESCE(mile_rate, 0) THEN 'マイル'
            WHEN GREATEST(COALESCE(sprint_rate, 0), COALESCE(mile_rate, 0), COALESCE(long_rate, 0)) = COALESCE(middle_rate, 0) THEN '中距離'
            ELSE '長距離'
        END AS best_distance_type
    FROM jockey_trainer_stats
    WHERE win_rate >= 25.0
    ORDER BY win_rate DESC
       
""")

Unnamed: 0,jockey_name,trainer_name,total_runs,wins,win_rate,sprint_rate,mile_rate,middle_rate,long_rate,best_distance_type
0,武豊,清水久詞,26,11,42.307692,0.0,0.0,50.0,75.0,長距離
1,武豊,白井寿昭,22,9,40.909091,,25.0,46.153846,40.0,長距離
2,内田博幸,須貝尚介,20,8,40.0,,33.333333,36.363636,50.0,長距離
3,中舘英二,中野隆良,23,9,39.130435,50.0,42.857143,41.666667,0.0,短距離
4,武豊,松田博資,26,10,38.461538,,37.5,41.176471,0.0,マイル
5,北村友一,斉藤崇史,24,9,37.5,,40.0,33.333333,100.0,長距離
6,武豊,池江泰郎,69,25,36.231884,,11.764706,36.111111,62.5,長距離
7,安藤勝己,松田国英,42,15,35.714286,0.0,53.846154,31.818182,20.0,マイル
8,ルメール,角居勝彦,21,7,33.333333,0.0,20.0,50.0,0.0,マイル
9,横山典弘,音無秀孝,30,10,33.333333,,42.857143,20.0,33.333333,マイル
