In [None]:
import sys
import os
import logging
from datetime import datetime, date
import pandas as pd
import psycopg2
from psycopg2.extras import RealDictCursor
from sqlalchemy import create_engine
import json
from dotenv import load_dotenv

# プロジェクトのルートディレクトリをパスに追加
sys.path.append('..')

# ログ設定
logging.basicConfig(
    level=logging.WARNING, # WARNING以上のみ表示（INFO, DEBUGを非表示）
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)

# 環境変数読み込み
load_dotenv()

print("=== 競馬レースデータ収集システム ===")
print(f"実行時刻: {datetime.now()}")

# 必要なモジュールをインポート
from src.scraping.scrapers.race_scraper import RaceScraper
from src.scraping.scrapers.horse_scraper import HorseScraper
from src.scraping.scrapers.jockey_scraper import JockeyScraper
from src.scraping.scrapers.trainer_scraper import TrainerScraper
from src.scraping.scrapers.owner_scraper import OwnerScraper
from src.scraping.scrapers.breeder_scraper import BreederScraper
from src.database.schemas.race_schema import Race, RaceResult
from src.scraping.storage.postgresql_storage import PostgreSQLStorage

print("✅ システム初期化完了")



In [None]:
# =============================================================================
# 1. 接続設定
# =============================================================================

print("🔌 データベース接続を設定中...")

# PostgreSQL接続設定
pg_config = {
    'host': os.getenv('POSTGRES_HOST', 'localhost'),
    'database': os.getenv('POSTGRES_DB', 'stallion_db'),
    'user': os.getenv('POSTGRES_USER', 'stallion_user'),
    'password': os.getenv('POSTGRES_PASSWORD'),
    'port': int(os.getenv('POSTGRES_PORT', '5432'))
}

# SQLAlchemy エンジン作成（pandasで使用）
DATABASE_URL = f"postgresql://{pg_config['user']}:{pg_config['password']}@{pg_config['host']}:{pg_config['port']}/{pg_config['database']}"
engine = create_engine(DATABASE_URL)

print("✅ 接続設定完了")


# pandas
def query(sql):
    """SQLクエリを実行してDataFrameで結果を返す"""
    return pd.read_sql(sql, engine)


---
以下、騎手

In [None]:
# スクレイパーとストレージを初期化
scraper = JockeyScraper(delay=2.0)  # 2秒間隔でリクエスト
storage = PostgreSQLStorage()

jockeys = scraper.scrape_jockeys(limit=1000)
print(f"取得した騎手数: {len(jockeys)}")
print(jockeys)

In [None]:
for jockey in jockeys:
    storage.insert_jockey(jockey)
    print (f"Inserted jockey: {jockey.name_ja} ({jockey.jockey_id})")

---
以下、調教師

In [None]:
# スクレイパーとストレージを初期化
trainer_scraper = TrainerScraper(delay=2.0)  # 2秒間隔でリクエスト
storage = PostgreSQLStorage()

trainers = trainer_scraper.scrape_trainers(limit=1000)
print(f"取得した調教師数: {len(trainers)}")
print(trainers)

In [None]:
for i, trainer in enumerate(trainers):
    storage.insert_trainer(trainer)
    print (f"Inserted trainer = {i+1} : {trainer.name_ja} ({trainer.trainer_id})")

In [None]:
trainer_ids = [trainer.trainer_id for trainer in trainers]
unique_trainer_ids = set(trainer_ids)
print(f"Trainer IDs: {trainer_ids}")
print(f"Unique Trainer IDs: {unique_trainer_ids}")
print(f"Total unique trainers: {len(unique_trainer_ids)}")

duplicate_trainer_ids = [trainer_id for trainer_id in unique_trainer_ids if trainer_ids.count(trainer_id) > 1]
if duplicate_trainer_ids:
    print(f"Duplicate Trainer IDs found: {duplicate_trainer_ids}")

---
以下、馬主

In [None]:
# スクレイパーとストレージを初期化
owner_scraper = OwnerScraper(delay=2.0)  # 2秒間隔でリクエスト
storage = PostgreSQLStorage()

owners = owner_scraper.scrape_owners(limit=25000)
print(f"取得した馬主数: {len(owners)}")
print(owners)

In [None]:
for i, owner in enumerate(owners):
    storage.insert_owner(owner)
    print (f"Inserted owner = {i+1} : {owner.name_ja} ({owner.owner_id})")

---
以下、生産者

In [None]:
# スクレイパーとストレージを初期化
breeder_scraper = BreederScraper(delay=2.0)  # 2秒間隔でリクエスト
storage = PostgreSQLStorage()

breeders = breeder_scraper.scrape_breeders(limit=11000)
print(f"取得した生産者数: {len(breeders)}")
print(breeders)

In [None]:
for i, breeder in enumerate(breeders):
    storage.insert_breeder(breeder)
    print (f"Inserted breeder = {i+1} : {breeder.name_ja} ({breeder.breeder_id})")