In [23]:
# Cell 1: Import libraries
import os
import pandas as pd
import numpy as np
import json
import time as time
import logging
import sqlite3  # <-- This was missing
import requests
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional, Union

In [35]:
# Cell 2: SportMonksAPI class
class SportMonksAPI:
    """
    Client for SportMonks API v3
    """
    
    def __init__(self, api_token: str, base_url: str = "https://api.sportmonks.com/v3", rate_limit: int = 3000):
        """Initialize the SportMonks API client"""
        self.api_token = api_token
        self.base_url = base_url
        self.rate_limit = rate_limit
        # Track requests to handle rate limiting
        self.request_count = 0
        self.request_reset_time = time.time() + 3600  # Reset after an hour
        
    def _handle_rate_limit(self):
        """Handle API rate limiting"""
        current_time = time.time()
        
        # Reset counter if an hour has passed
        if current_time > self.request_reset_time:
            self.request_count = 0
            self.request_reset_time = current_time + 3600
            
        # Check if we're approaching the rate limit
        if self.request_count >= self.rate_limit:
            wait_time = self.request_reset_time - current_time
            print(f"Rate limit approaching. Waiting {wait_time:.2f} seconds...")
            time.sleep(wait_time)
            self.request_count = 0
            self.request_reset_time = time.time() + 3600
            
        # Add a small delay between requests to be nice to the API
        time.sleep(0.1)
    
    def make_request(self, endpoint: str, params: Dict = None) -> Dict:
        """Make a request to the SportMonks API"""
        self._handle_rate_limit()
        
        # Prepare the request
        url = f"{self.base_url}{endpoint}"
        
        # Ensure params is a dictionary
        if params is None:
            params = {}
            
        # Add API token to params
        params['api_token'] = self.api_token
        
        # Make the request
        try:
            response = requests.get(url, params=params)
            self.request_count += 1
            
            # Check for errors
            response.raise_for_status()
            
            # Parse the response
            data = response.json()
            return data
        except requests.exceptions.RequestException as e:
            print(f"API request error: {e}")
            if hasattr(e, 'response') and e.response is not None:
                print(f"Response content: {e.response.content}")
            raise
    
    def fetch_paginated_data(self, endpoint: str, params: Dict = None) -> List[Dict]:
        """Fetch all pages of data from a paginated endpoint"""
        if params is None:
            params = {}
            
        # Set default page size to maximum
        if 'per_page' not in params:
            params['per_page'] = 1000
            
        all_data = []
        page = 1
        total_pages = 1  # Will be updated after first request
        
        while page <= total_pages:
            params['page'] = page
            response = self.make_request(endpoint, params)
            
            # Update total pages if available
            pagination = response.get('pagination', {})
            total_pages = pagination.get('total_pages', 1)
            
            # Extract data
            data = response.get('data', [])
            if not data:
                # No more data
                break
                
            all_data.extend(data)
            page += 1
            
        return all_data

    def fetch_core_data(self, entity_type: str) -> List[Dict]:
        """Fetch all data for a core entity type"""
        return self.fetch_paginated_data(f"/core/{entity_type}")
    
    def fetch_football_data(self, entity_type: str, params: Dict = None) -> List[Dict]:
        """Fetch all data for a football entity type"""
        return self.fetch_paginated_data(f"/football/{entity_type}", params)

    def fetch_seasons_by_league(self, league_id: int) -> List[Dict]:
        """Fetch all seasons for a specific league"""
        # Try using simple string filter format based on error message
        params = {
            'filter': f'league_id:{league_id}'
        }
        
        try:
            # Try with the new filter format
            return self.fetch_football_data('seasons', params)
        except:
            # If that fails, get all seasons and filter locally
            all_seasons = self.fetch_football_data('seasons')
            return [season for season in all_seasons if str(season.get('league_id')) == str(league_id)]
    
    def fetch_teams_by_season(self, season_id: int) -> List[Dict]:
        """Fetch all teams for a specific season"""
        params = {
            'filter': f'season_id:{season_id}'
        }
        return self.fetch_football_data('teams', params)
    
    def fetch_squads(self, team_id: int, season_id: int) -> List[Dict]:
        """Fetch squad for a team in a specific season"""
        params = {
            'filter': f'team_id:{team_id},season_id:{season_id}',
            'include': 'player,statistics'
        }
        return self.fetch_football_data('squads', params)
    
    def fetch_fixtures_by_season(self, season_id: int) -> List[Dict]:
        """Fetch all fixtures for a season"""
        # Try to use the schedules endpoint first
        params = {
            'include': 'fixture'
        }
        schedule_data = self.fetch_paginated_data(f"/football/schedules/season/{season_id}", params)
        
        # If no data from schedules, try direct fixtures endpoint
        if not schedule_data:
            params = {
                'filter': f'season_id:{season_id}',
                'include': 'participants;league;venue;state'
            }
            response = self.make_request("/football/fixtures", params)
            return response.get('data', [])
            
        return schedule_data
    
    def fetch_fixture_details(self, fixture_id: int) -> Dict:
        """Fetch detailed information for a fixture"""
        params = {
            'include': 'participants;league;venue;state;scores;events.type;events.period;events.player;statistics.type;sidelined.sideline.player;sidelined.sideline.type;weatherReport'
        }
        return self.make_request(f"/football/fixtures/{fixture_id}", params)
    
    def fetch_fixtures_by_date_range(self, from_date: str, to_date: str) -> List[Dict]:
        """Fetch fixtures for a specific date range"""
        params = {
            'from': from_date,
            'to': to_date
        }
        response = self.make_request("/football/fixtures/date-range", params)
        return response.get('data', [])

    def fetch_fixture_details_batch(self, fixture_ids: List[int]) -> List[Dict]:
        """Fetch detailed information for a batch of fixtures (up to 50)"""
        if len(fixture_ids) > 50:
            raise ValueError("Cannot fetch more than 50 fixtures in one batch")
            
        params = {
            'filter': f'id:{",".join(map(str, fixture_ids))}',
            'include': 'participants;league;venue;state;scores;events.type;events.period;events.player;statistics.type;sidelined.sideline.player;sidelined.sideline.type;weatherReport'
        }
        response = self.make_request("/football/fixtures", params)
        return response.get('data', [])
        
    def fetch_standings_by_season(self, season_id: int) -> List[Dict]:
        """Fetch standings for a season"""
        params = {
            'include': 'team'
        }
        return self.make_request(f"/football/standings/season/{season_id}", params).get('data', [])
    
    def fetch_topscorers_by_season(self, season_id: int) -> List[Dict]:
        """Fetch top scorers for a season"""
        params = {
            'include': 'player'
        }
        return self.make_request(f"/football/topscorers/season/{season_id}", params).get('data', [])

In [2]:
import time
import requests
from typing import List, Dict, Any

class SportMonksAPI:
    """
    Client for SportMonks API v3
    """

    def __init__(self, api_token: str, base_url: str = "https://api.sportmonks.com/v3", rate_limit: int = 3000):
        """Initialize the SportMonks API client"""
        self.api_token = api_token
        self.base_url = base_url
        self.rate_limit = rate_limit
        self.request_count = 0
        self.request_reset_time = time.time() + 3600

    def _handle_rate_limit(self):
        """Handle API rate limiting"""
        current_time = time.time()
        if current_time > self.request_reset_time:
            self.request_count = 0
            self.request_reset_time = current_time + 3600

        if self.request_count >= self.rate_limit:
            wait_time = self.request_reset_time - current_time
            print(f"Rate limit hit. Waiting {wait_time:.1f}s...")
            time.sleep(wait_time)
            self.request_count = 0
            self.request_reset_time = time.time() + 3600

        time.sleep(0.1)

    def make_request(self, endpoint: str, params: Dict = None) -> Dict:
        """Make a request to the SportMonks API"""
        self._handle_rate_limit()
        url = f"{self.base_url}{endpoint}"
        params = params.copy() if params else {}
        params['api_token'] = self.api_token

        try:
            resp = requests.get(url, params=params)
            self.request_count += 1
            resp.raise_for_status()
            return resp.json()
        except requests.exceptions.RequestException as e:
            print(f"API request error: {e}")
            if hasattr(e, 'response') and e.response is not None:
                print(f"Response content: {e.response.content}")
            raise

    def fetch_paginated_data(self, endpoint: str, params: Dict = None) -> List[Dict]:
        """Fetch all pages of data from a paginated endpoint"""
        params = params.copy() if params else {}
        params.setdefault('per_page', 1000)

        all_data = []
        page = 1
        total_pages = 1

        while page <= total_pages:
            params['page'] = page
            result = self.make_request(endpoint, params)
            pagination = result.get('pagination', {})
            total_pages = pagination.get('total_pages', 1)
            batch = result.get('data', [])
            if not batch:
                break
            all_data.extend(batch)
            page += 1

        return all_data

    def fetch_core_data(self, entity_type: str) -> List[Dict]:
        """Fetch all data for a core entity type"""
        return self.fetch_paginated_data(f"/core/{entity_type}")

    def fetch_football_data(self, entity_type: str, params: Dict = None) -> List[Dict]:
        """Fetch all data for a football entity type"""
        return self.fetch_paginated_data(f"/football/{entity_type}", params)

    def fetch_seasons_by_league(self, league_id: int) -> List[Dict]:
        """
        Fetch all seasons for a specific league using the dynamic 'seasonLeagues' filter.
        """
        params = {
            'include': 'league',
            'filters': f'seasonLeagues:{league_id}'
        }
        return self.fetch_football_data('seasons', params)

    def fetch_teams_by_season(self, season_id: int) -> List[Dict]:
        """
        Fetch all teams for a specific season via the dedicated endpoint.
        """
        return self.fetch_paginated_data(f"/football/teams/seasons/{season_id}")

    def fetch_squads(self, team_id: int, season_id: int) -> List[Dict]:
        """
        Fetch squad for a team in a specific season via the dedicated endpoint.
        """
        return self.fetch_paginated_data(f"/football/squads/seasons/{season_id}/teams/{team_id}")

    def fetch_fixtures_by_season(self, season_id: int) -> List[Dict]:
        """
        Fetch all fixtures for a season via the schedules endpoint, with fallback.
        """
        # First try the schedules-by-season endpoint
        schedules = self.fetch_paginated_data(
            f"/football/schedules/seasons/{season_id}", 
            params={'include': 'fixture'}
        )
        if schedules:
            return schedules

        # Fallback to filtering the generic fixtures list
        params = {
            'filters': f'seasonId:{season_id}',
            'include': 'participants;league;venue;state'
        }
        resp = self.make_request("/football/fixtures", params)
        return resp.get('data', [])

    def fetch_fixture_details(self, fixture_id: int) -> Dict:
        """Fetch detailed information for a fixture"""
        params = {
            'include': 'participants;league;venue;state;'
                       'scores;events.type;events.period;events.player;'
                       'statistics.type;sidelined.sideline.player;'
                       'sidelined.sideline.type;weatherReport'
        }
        return self.make_request(f"/football/fixtures/{fixture_id}", params)

    def fetch_fixtures_by_date_range(self, from_date: str, to_date: str) -> List[Dict]:
        """Fetch fixtures for a specific date range"""
        params = {'from': from_date, 'to': to_date}
        resp = self.make_request("/football/fixtures/date-range", params)
        return resp.get('data', [])

    def fetch_fixture_details_batch(self, fixture_ids: List[int]) -> List[Dict]:
        """Fetch detailed information for a batch of fixtures (up to 50)"""
        if len(fixture_ids) > 50:
            raise ValueError("Cannot fetch more than 50 fixtures in one batch")

        params = {
            'filters': f'id:{",".join(map(str, fixture_ids))}',
            'include': 'participants;league;venue;state;'
                       'scores;events.type;events.period;events.player;'
                       'statistics.type;sidelined.sideline.player;'
                       'sidelined.sideline.type;weatherReport'
        }
        resp = self.make_request("/football/fixtures", params)
        return resp.get('data', [])

    def fetch_standings_by_season(self, season_id: int) -> List[Dict]:
        """Fetch standings for a season"""
        resp = self.make_request(f"/football/standings/season/{season_id}", {'include': 'team'})
        return resp.get('data', [])

    def fetch_topscorers_by_season(self, season_id: int) -> List[Dict]:
        """Fetch top scorers for a season"""
        resp = self.make_request(f"/football/topscorers/season/{season_id}", {'include': 'player'})
        return resp.get('data', [])


In [3]:
# Cell 3: SportMonksDB class
class SportMonksDB:
    """
    Database manager for SportMonks data
    """
    
    def __init__(self, db_path: str = "sportmonks_football.db"):
        """
        Initialize the database manager
        
        Args:
            db_path: Path to the SQLite database file
        """
        self.db_path = db_path
        self.conn = None
        self.cursor = None
        
    def connect(self):
        """Connect to the database"""
        self.conn = sqlite3.connect(self.db_path)
        self.conn.row_factory = sqlite3.Row  # Access columns by name
        self.cursor = self.conn.cursor()
        
    def close(self):
        """Close the database connection"""
        if self.conn:
            self.conn.close()
            self.conn = None
            self.cursor = None
            
    def __enter__(self):
        """Context manager entry"""
        self.connect()
        return self
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit"""
        self.close()
        
    def execute(self, query: str, params: tuple = ()):
        """Execute a query"""
        return self.cursor.execute(query, params)
        
    def executemany(self, query: str, params_list: List[tuple]):
        """Execute a query with multiple parameter sets"""
        return self.cursor.executemany(query, params_list)
        
    def commit(self):
        """Commit changes"""
        self.conn.commit()
        
    def create_tables(self):
        """Create all tables in the database"""
        # Reference data tables
        self.execute('''
        CREATE TABLE IF NOT EXISTS continents (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            code TEXT
        )
        ''')
        
        self.execute('''
        CREATE TABLE IF NOT EXISTS countries (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            code TEXT,
            continent_id INTEGER,
            FOREIGN KEY (continent_id) REFERENCES continents (id)
        )
        ''')
        
        self.execute('''
        CREATE TABLE IF NOT EXISTS regions (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            country_id INTEGER,
            FOREIGN KEY (country_id) REFERENCES countries (id)
        )
        ''')
        
        self.execute('''
        CREATE TABLE IF NOT EXISTS cities (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            region_id INTEGER,
            country_id INTEGER,
            FOREIGN KEY (region_id) REFERENCES regions (id),
            FOREIGN KEY (country_id) REFERENCES countries (id)
        )
        ''')
        
        # Venues
        self.execute('''
        CREATE TABLE IF NOT EXISTS venues (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            city_id INTEGER,
            country_id INTEGER,
            capacity INTEGER,
            address TEXT,
            latitude REAL,
            longitude REAL,
            surface TEXT,
            image_path TEXT,
            city_name TEXT,
            national_team INTEGER,
            FOREIGN KEY (city_id) REFERENCES cities (id),
            FOREIGN KEY (country_id) REFERENCES countries (id)
        )
        ''')
        
        # States (match status)
        self.execute('''
        CREATE TABLE IF NOT EXISTS states (
            id INTEGER PRIMARY KEY,
            state TEXT,
            name TEXT,
            short_name TEXT,
            developer_name TEXT
        )
        ''')
        
        # Competitions
        self.execute('''
        CREATE TABLE IF NOT EXISTS leagues (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            country_id INTEGER,
            logo_path TEXT,
            slug TEXT,
            type TEXT,
            active INTEGER,
            sub_type TEXT,
            last_played_at TEXT,
            category INTEGER,
            has_jerseys INTEGER,
            sport_id INTEGER,
            FOREIGN KEY (country_id) REFERENCES countries (id)
        )
        ''')
        
        self.execute('''
        CREATE TABLE IF NOT EXISTS seasons (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            league_id INTEGER,
            start_date TEXT,
            end_date TEXT,
            FOREIGN KEY (league_id) REFERENCES leagues (id)
        )
        ''')
        
        self.execute('''
        CREATE TABLE IF NOT EXISTS stages (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            season_id INTEGER,
            type TEXT,
            FOREIGN KEY (season_id) REFERENCES seasons (id)
        )
        ''')
        
        self.execute('''
        CREATE TABLE IF NOT EXISTS rounds (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            stage_id INTEGER,
            FOREIGN KEY (stage_id) REFERENCES stages (id)
        )
        ''')
        
        # Teams
        self.execute('''
        CREATE TABLE IF NOT EXISTS teams (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            country_id INTEGER,
            venue_id INTEGER,
            short_code TEXT,
            founded INTEGER,
            type TEXT,
            placeholder INTEGER,
            gender TEXT,
            logo_path TEXT,
            sport_id INTEGER,
            last_played_at TEXT,
            FOREIGN KEY (country_id) REFERENCES countries (id),
            FOREIGN KEY (venue_id) REFERENCES venues (id)
        )
        ''')
        
        # Players
        self.execute('''
        CREATE TABLE IF NOT EXISTS players (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            common_name TEXT,
            country_id INTEGER,
            nationality_id INTEGER,
            position_id INTEGER,
            detailed_position_id INTEGER,
            type_id INTEGER,
            date_of_birth TEXT,
            height INTEGER,
            weight INTEGER,
            image_path TEXT,
            firstname TEXT,
            lastname TEXT,
            display_name TEXT,
            gender TEXT,
            FOREIGN KEY (country_id) REFERENCES countries (id)
        )
        ''')
        
        # Team squads
        self.execute('''
        CREATE TABLE IF NOT EXISTS team_squad (
            team_id INTEGER,
            season_id INTEGER,
            player_id INTEGER,
            jersey_number INTEGER,
            on_loan INTEGER,
            PRIMARY KEY (team_id, season_id, player_id),
            FOREIGN KEY (team_id) REFERENCES teams (id),
            FOREIGN KEY (season_id) REFERENCES seasons (id),
            FOREIGN KEY (player_id) REFERENCES players (id)
        )
        ''')
        
        # Fixtures
        self.execute('''
        CREATE TABLE IF NOT EXISTS fixtures (
            id INTEGER PRIMARY KEY,
            sport_id INTEGER,
            league_id INTEGER,
            season_id INTEGER,
            stage_id INTEGER,
            group_id INTEGER,
            aggregate_id INTEGER,
            round_id INTEGER,
            state_id INTEGER,
            venue_id INTEGER,
            name TEXT,
            starting_at TEXT,
            result_info TEXT,
            leg TEXT,
            details TEXT,
            length INTEGER,
            placeholder INTEGER,
            has_odds INTEGER,
            starting_at_timestamp INTEGER,
            FOREIGN KEY (season_id) REFERENCES seasons (id),
            FOREIGN KEY (league_id) REFERENCES leagues (id),
            FOREIGN KEY (stage_id) REFERENCES stages (id),
            FOREIGN KEY (round_id) REFERENCES rounds (id),
            FOREIGN KEY (state_id) REFERENCES states (id),
            FOREIGN KEY (venue_id) REFERENCES venues (id)
        )
        ''')
        
        # Fixture participants (teams)
        self.execute('''
        CREATE TABLE IF NOT EXISTS fixture_participants (
            fixture_id INTEGER,
            team_id INTEGER,
            location TEXT,
            winner INTEGER,
            position INTEGER,
            PRIMARY KEY (fixture_id, team_id),
            FOREIGN KEY (fixture_id) REFERENCES fixtures (id),
            FOREIGN KEY (team_id) REFERENCES teams (id)
        )
        ''')
        
        # Scores
        self.execute('''
        CREATE TABLE IF NOT EXISTS scores (
            id INTEGER PRIMARY KEY,
            fixture_id INTEGER,
            type_id INTEGER,
            participant_id INTEGER,
            goals INTEGER,
            participant TEXT,
            description TEXT,
            FOREIGN KEY (fixture_id) REFERENCES fixtures (id),
            FOREIGN KEY (participant_id) REFERENCES teams (id)
        )
        ''')
        
        # Add all other table creation statements...
        # (For brevity I've included just part of them, make sure to add the rest)
        
        # Metadata table for tracking updates
        self.execute('''
        CREATE TABLE IF NOT EXISTS metadata (
            key TEXT PRIMARY KEY,
            value TEXT
        )
        ''')
        
        # Commit the changes
        self.commit()
        
    def create_metadata_table(self):
        """Create the metadata table if it doesn't exist"""
        self.execute(
            """
            CREATE TABLE IF NOT EXISTS metadata (
                key TEXT PRIMARY KEY,
                value TEXT
            )
            """
        )
        self.commit()
        
    # Add remaining methods like insert_or_update_continents, etc.
    # (For brevity I've left these out, make sure to add them all)
    
    def insert_or_update_continents(self, continents: List[Dict]):
        """Insert or update continents"""
        for continent in continents:
            self.execute(
                '''
                INSERT OR REPLACE INTO continents (id, name, code)
                VALUES (?, ?, ?)
                ''',
                (continent['id'], continent['name'], continent.get('code'))
            )
        self.commit()
    
    def insert_or_update_countries(self, countries: List[Dict]):
        """Insert or update countries"""
        for country in countries:
            self.execute(
                '''
                INSERT OR REPLACE INTO countries (id, name, code, continent_id)
                VALUES (?, ?, ?, ?)
                ''',
                (
                    country['id'], 
                    country['name'], 
                    country.get('code'),
                    country.get('continent_id')
                )
            )
        self.commit()
    
    def insert_or_update_regions(self, regions: List[Dict]):
        """Insert or update regions"""
        for region in regions:
            self.execute(
                '''
                INSERT OR REPLACE INTO regions (id, name, country_id)
                VALUES (?, ?, ?)
                ''',
                (
                    region['id'], 
                    region['name'], 
                    region.get('country_id')
                )
            )
        self.commit()
    
    def insert_or_update_cities(self, cities: List[Dict]):
        """Insert or update cities"""
        for city in cities:
            self.execute(
                '''
                INSERT OR REPLACE INTO cities (id, name, region_id, country_id)
                VALUES (?, ?, ?, ?)
                ''',
                (
                    city['id'], 
                    city['name'], 
                    city.get('region_id'),
                    city.get('country_id')
                )
            )
        self.commit()
    
    def insert_or_update_leagues(self, leagues: List[Dict]):
        """Insert or update leagues"""
        for league in leagues:
            self.execute(
                '''
                INSERT OR REPLACE INTO leagues (id, name, country_id, logo_path, slug, type)
                VALUES (?, ?, ?, ?, ?, ?)
                ''',
                (
                    league['id'], 
                    league['name'], 
                    league.get('country_id'),
                    league.get('logo_path'),
                    league.get('slug'),
                    league.get('type')
                )
            )
        self.commit()
    
    def insert_or_update_seasons(self, seasons: List[Dict]):
        """Insert or update seasons"""
        for season in seasons:
            self.execute(
                '''
                INSERT OR REPLACE INTO seasons (id, name, league_id, start_date, end_date)
                VALUES (?, ?, ?, ?, ?)
                ''',
                (
                    season['id'], 
                    season['name'], 
                    season.get('league_id'),
                    season.get('start_date'),
                    season.get('end_date')
                )
            )
        self.commit()
        
    def get_all_leagues(self):
        """Get all leagues from the database"""
        self.execute("SELECT id, name FROM leagues")
        return self.cursor.fetchall()
        
    def get_seasons_by_league(self, league_id, limit=5):
        """Get the most recent seasons for a league"""
        self.execute(
            """
            SELECT id, name, start_date, end_date
            FROM seasons
            WHERE league_id = ?
            ORDER BY start_date DESC
            LIMIT ?
            """, 
            (league_id, limit)
        )
        return self.cursor.fetchall()
        
    def get_teams_by_season(self, season_id):
        """Get all teams that participated in a season"""
        self.execute(
            """
            SELECT DISTINCT t.id, t.name
            FROM teams t
            JOIN fixture_participants fp ON t.id = fp.team_id
            JOIN fixtures f ON fp.fixture_id = f.id
            WHERE f.season_id = ?
            """,
            (season_id,)
        )
        return self.cursor.fetchall()
        
    def get_fixtures_by_season(self, season_id):
        """Get all fixtures for a season"""
        self.execute(
            """
            SELECT id, starting_at, state_id
            FROM fixtures
            WHERE season_id = ?
            """,
            (season_id,)
        )
        return self.cursor.fetchall()
        
    def set_last_update_time(self, entity_type, timestamp=None):
        """Set the last update time for an entity type"""
        if timestamp is None:
            timestamp = datetime.now().isoformat()
            
        self.execute(
            """
            INSERT OR REPLACE INTO metadata (key, value)
            VALUES (?, ?)
            """,
            (f"last_update_{entity_type}", timestamp)
        )
        self.commit()

In [4]:
# Cell 4: Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("sportsmonks_collector.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [14]:
# Cell 5: SportMonksDataCollector class
class SportMonksDataCollector:
    """
    Orchestrates the data collection process from SportMonks API
    """
    
    def __init__(self, api_token: str, db_path: str = "sportmonks_football.db"):
        """
        Initialize the data collector
        
        Args:
            api_token: SportMonks API token
            db_path: Path to the SQLite database file
        """
        self.api = SportMonksAPI(api_token)
        self.db = SportMonksDB(db_path)
        
    def initialize_database(self):
        """Create database tables if they don't exist"""
        with self.db:
            self.db.create_tables()
            self.db.create_metadata_table()
            
    def collect_reference_data(self):
        """Collect reference data (continents, countries, regions, cities)"""
        logger.info("Collecting reference data...")
        
        with self.db:
            # Continents
            logger.info("Collecting continents...")
            continents = self.api.fetch_core_data("continents")
            self.db.insert_or_update_continents(continents)
            logger.info(f"Collected {len(continents)} continents")
            
            # Countries
            logger.info("Collecting countries...")
            countries = self.api.fetch_core_data("countries")
            self.db.insert_or_update_countries(countries)
            logger.info(f"Collected {len(countries)} countries")
            
            # Regions
            logger.info("Collecting regions...")
            regions = self.api.fetch_core_data("regions")
            self.db.insert_or_update_regions(regions)
            logger.info(f"Collected {len(regions)} regions")
            
            # Cities
            logger.info("Collecting cities...")
            cities = self.api.fetch_core_data("cities")
            self.db.insert_or_update_cities(cities)
            logger.info(f"Collected {len(cities)} cities")
            
            # Set last update time
            self.db.set_last_update_time("reference_data")
        
        logger.info("Reference data collection completed")
    
    def collect_leagues(self):
        """Collect all leagues"""
        logger.info("Collecting leagues...")
        
        with self.db:
            leagues = self.api.fetch_football_data("leagues")
            self.db.insert_or_update_leagues(leagues)
            logger.info(f"Collected {len(leagues)} leagues")
            
            # Set last update time
            self.db.set_last_update_time("leagues")
        
        logger.info("Leagues collection completed")
        return leagues
    
    def collect_seasons_for_leagues(self, leagues: List[Dict] = None):
        """Collect seasons for all leagues or specific leagues"""
        logger.info("Collecting seasons for leagues...")
        
        if leagues is None:
            with self.db:
                leagues = self.db.get_all_leagues()
        
        total_seasons = 0
        
        with self.db:
            for league in leagues:
                league_id = league['id']
                logger.info(f"Collecting seasons for league {league_id} ({league['name']})...")
                
                seasons = self.api.fetch_seasons_by_league(league_id)
                self.db.insert_or_update_seasons(seasons)
                
                total_seasons += len(seasons)
                logger.info(f"Collected {len(seasons)} seasons for league {league_id}")
            
            # Set last update time
            self.db.set_last_update_time("seasons")
        
        logger.info(f"Seasons collection completed. Total: {total_seasons} seasons")

    def collect_states_and_venues(self):
        """Fetch all match‐states and venues, and upsert into DB."""
        logger.info("Collecting match states...")
        with self.db:
            states = self.api.fetch_football_data("states")
            self.db.insert_or_update_states(states)
            self.db.set_last_update_time("states")

            logger.info("Collecting venues...")
            venues = self.api.fetch_football_data("venues")
            self.db.insert_or_update_venues(venues)
            self.db.set_last_update_time("venues")
        logger.info("States and venues collection completed")

    def collect_teams_and_squads_for_league(self, league_id: int):
        """
        For every season in a league, fetch all teams + their squads,
        and upsert into DB.
        """
        # get seasons from DB
        seasons = self.db.get_seasons_by_league(league_id, limit=1000)
        with self.db:
            for season in seasons:
                sid = season['id']
                logger.info(f"  → League {league_id}, Season {sid}: fetching teams...")
                teams = self.api.fetch_teams_by_season(sid)
                self.db.insert_or_update_teams(teams)

                for team in teams:
                    tid = team['id']
                    logger.info(f"    • Season {sid}, Team {tid}: fetching squad...")
                    squad = self.api.fetch_squads(tid, sid)
                    self.db.insert_or_update_team_squad(tid, sid, squad)

            self.db.set_last_update_time(f"teams_squads_league_{league_id}")
        logger.info("Teams & squads collection completed")

    def collect_fixtures_for_league(self, league_id: int):
        """
        For every season in a league, fetch all fixtures (with participants/scores)
        and upsert into DB.
        """
        seasons = self.db.get_seasons_by_league(league_id, limit=1000)
        with self.db:
            for season in seasons:
                sid = season['id']
                logger.info(f"  → League {league_id}, Season {sid}: fetching fixtures...")
                fixtures = self.api.fetch_fixtures_by_season(sid)
                self.db.insert_or_update_fixtures(fixtures)

            self.db.set_last_update_time(f"fixtures_league_{league_id}")
        logger.info("Fixtures collection completed")
    
# Cell 6: Initialize the system with a single league test
# Set your API token
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"

# Create collector
collector = SportMonksDataCollector(API_TOKEN)

# Initialize database
collector.initialize_database()
print("Database initialized")

# Collect reference data
collector.collect_reference_data()
print("Reference data collected")

# Get leagues and find Premier League
leagues = collector.collect_leagues()
premier_league = [l for l in leagues if l['name'] == 'Premier League']

if premier_league:
    print(f"Found Premier League with ID: {premier_league[0]['id']}")
    
    # Get seasons for Premier League only
    collector.collect_seasons_for_leagues(premier_league)
    print("Premier League seasons collected")
else:
    print("Premier League not found")

2025-04-28 20:26:55,730 - __main__ - INFO - Collecting reference data...
2025-04-28 20:26:55,730 - __main__ - INFO - Collecting continents...


Database initialized


2025-04-28 20:26:55,975 - __main__ - INFO - Collected 7 continents
2025-04-28 20:26:55,975 - __main__ - INFO - Collecting countries...
2025-04-28 20:26:56,267 - __main__ - INFO - Collected 25 countries
2025-04-28 20:26:56,268 - __main__ - INFO - Collecting regions...
2025-04-28 20:26:56,509 - __main__ - INFO - Collected 25 regions
2025-04-28 20:26:56,511 - __main__ - INFO - Collecting cities...
2025-04-28 20:26:56,746 - __main__ - INFO - Collected 25 cities
2025-04-28 20:26:56,747 - __main__ - INFO - Reference data collection completed
2025-04-28 20:26:56,748 - __main__ - INFO - Collecting leagues...


Reference data collected


2025-04-28 20:26:56,980 - __main__ - INFO - Collected 25 leagues
2025-04-28 20:26:56,982 - __main__ - INFO - Leagues collection completed
2025-04-28 20:26:56,983 - __main__ - INFO - Collecting seasons for leagues...
2025-04-28 20:26:56,984 - __main__ - INFO - Collecting seasons for league 8 (Premier League)...


Found Premier League with ID: 8


2025-04-28 20:26:57,230 - __main__ - INFO - Collected 25 seasons for league 8
2025-04-28 20:26:57,230 - __main__ - INFO - Collecting seasons for league 486 (Premier League)...
2025-04-28 20:26:57,547 - __main__ - INFO - Collected 21 seasons for league 486
2025-04-28 20:26:57,550 - __main__ - INFO - Seasons collection completed. Total: 46 seasons


Premier League seasons collected


In [6]:
# Cell 6: Initialize the system with a single league test
# Set your API token
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"

# Create collector
collector = SportMonksDataCollector(API_TOKEN)

# Initialize database
collector.initialize_database()
print("Database initialized")

# Collect reference data
collector.collect_reference_data()
print("Reference data collected")

# Get leagues and find Premier League
leagues = collector.collect_leagues()
premier_league = [l for l in leagues if l['name'] == 'Premier League']

if premier_league:
    print(f"Found Premier League with ID: {premier_league[0]['id']}")
    
    # Get seasons for Premier League only
    collector.collect_seasons_for_leagues(premier_league)
    print("Premier League seasons collected")
else:
    print("Premier League not found")

2025-04-28 18:42:52,629 - __main__ - INFO - Collecting reference data...
2025-04-28 18:42:52,629 - __main__ - INFO - Collecting continents...


Database initialized


2025-04-28 18:42:52,872 - __main__ - INFO - Collected 7 continents
2025-04-28 18:42:52,872 - __main__ - INFO - Collecting countries...
2025-04-28 18:42:53,176 - __main__ - INFO - Collected 25 countries
2025-04-28 18:42:53,177 - __main__ - INFO - Collecting regions...
2025-04-28 18:42:53,422 - __main__ - INFO - Collected 25 regions
2025-04-28 18:42:53,423 - __main__ - INFO - Collecting cities...
2025-04-28 18:42:53,666 - __main__ - INFO - Collected 25 cities
2025-04-28 18:42:53,669 - __main__ - INFO - Reference data collection completed
2025-04-28 18:42:53,670 - __main__ - INFO - Collecting leagues...


Reference data collected


2025-04-28 18:42:53,924 - __main__ - INFO - Collected 25 leagues
2025-04-28 18:42:53,926 - __main__ - INFO - Leagues collection completed
2025-04-28 18:42:53,927 - __main__ - INFO - Collecting seasons for leagues...
2025-04-28 18:42:53,927 - __main__ - INFO - Collecting seasons for league 8 (Premier League)...


Found Premier League with ID: 8


2025-04-28 18:42:54,252 - __main__ - INFO - Collected 25 seasons for league 8
2025-04-28 18:42:54,253 - __main__ - INFO - Collecting seasons for league 486 (Premier League)...
2025-04-28 18:42:54,522 - __main__ - INFO - Collected 21 seasons for league 486
2025-04-28 18:42:54,523 - __main__ - INFO - Seasons collection completed. Total: 46 seasons


Premier League seasons collected


In [8]:
from sportsmonks_database import SportMonksDB
print("Has states inserter?", "insert_or_update_states" in dir(SportMonksDB))

Has states inserter? True


In [15]:
# now re-instantiate
API_TOKEN = "oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd"
collector = SportMonksDataCollector(API_TOKEN)

# Initialize database
collector.initialize_database()
print("Database initialized")

# Collect reference data
collector.collect_reference_data()
print("Reference data collected")


collector.collect_states_and_venues()  # should now work


2025-04-28 20:27:09,711 - __main__ - INFO - Collecting reference data...
2025-04-28 20:27:09,711 - __main__ - INFO - Collecting continents...


Database initialized


2025-04-28 20:27:09,944 - __main__ - INFO - Collected 7 continents
2025-04-28 20:27:09,944 - __main__ - INFO - Collecting countries...
2025-04-28 20:27:10,184 - __main__ - INFO - Collected 25 countries
2025-04-28 20:27:10,185 - __main__ - INFO - Collecting regions...
2025-04-28 20:27:10,422 - __main__ - INFO - Collected 25 regions
2025-04-28 20:27:10,423 - __main__ - INFO - Collecting cities...
2025-04-28 20:27:10,667 - __main__ - INFO - Collected 25 cities
2025-04-28 20:27:10,671 - __main__ - INFO - Reference data collection completed
2025-04-28 20:27:10,672 - __main__ - INFO - Collecting match states...


Reference data collected


2025-04-28 20:27:10,922 - __main__ - INFO - Collecting venues...
2025-04-28 20:27:11,201 - __main__ - INFO - States and venues collection completed


In [34]:
class SportMonksDataCollector:
    # …

    def collect_teams_and_squads_for_league(self, league_id: int):
        """
        For every season in a league, fetch all teams + their squads,
        and upsert into DB.
        """
        logger.info(f"Collecting teams & squads for league {league_id}…")
        with self.db:
            # now cursor is valid
            seasons = self.db.get_seasons_by_league(league_id, limit=1000)
            for season in seasons:
                sid = season['id']
                logger.info(f"  → Season {sid}: fetching teams…")
                teams = self.api.fetch_teams_by_season(sid)
                self.db.insert_or_update_teams(teams)

                for team in teams:
                    tid = team['id']
                    logger.info(f"    • Team {tid}: fetching squad…")
                    squad = self.api.fetch_squads(tid, sid)
                    self.db.insert_or_update_team_squad(tid, sid, squad)

            self.db.set_last_update_time(f"teams_squads_league_{league_id}")
        logger.info("Teams & squads collection completed")

    def collect_fixtures_for_league(self, league_id: int):
        """
        For every season in a league, fetch all fixtures (with participants/scores)
        and upsert into DB.
        """
        logger.info(f"Collecting fixtures for league {league_id}…")
        with self.db:
            seasons = self.db.get_seasons_by_league(league_id, limit=1000)
            for season in seasons:
                sid = season['id']
                logger.info(f"  → Season {sid}: fetching fixtures…")
                fixtures = self.api.fetch_fixtures_by_season(sid)
                self.db.insert_or_update_fixtures(fixtures)

            self.db.set_last_update_time(f"fixtures_league_{league_id}")
        logger.info("Fixtures collection completed")


In [35]:
# This will populate your seasons table for *every* league
collector.collect_seasons_for_leagues()
print("✅ Seasons for all leagues loaded.")


2025-04-28 20:46:35,376 - __main__ - INFO - Collecting seasons for leagues...
2025-04-28 20:46:35,378 - __main__ - INFO - Collecting seasons for league 8 (Premier League)...
2025-04-28 20:46:35,640 - __main__ - INFO - Collected 25 seasons for league 8
2025-04-28 20:46:35,641 - __main__ - INFO - Collecting seasons for league 9 (Championship)...
2025-04-28 20:46:35,893 - __main__ - INFO - Collected 20 seasons for league 9
2025-04-28 20:46:35,894 - __main__ - INFO - Collecting seasons for league 24 (FA Cup)...
2025-04-28 20:46:36,152 - __main__ - INFO - Collected 20 seasons for league 24
2025-04-28 20:46:36,153 - __main__ - INFO - Collecting seasons for league 27 (Carabao Cup)...
2025-04-28 20:46:36,407 - __main__ - INFO - Collected 20 seasons for league 27
2025-04-28 20:46:36,409 - __main__ - INFO - Collecting seasons for league 72 (Eredivisie)...
2025-04-28 20:46:36,653 - __main__ - INFO - Collected 20 seasons for league 72
2025-04-28 20:46:36,654 - __main__ - INFO - Collecting seasons 

✅ Seasons for all leagues loaded.


In [78]:
# After you've initialized the database and collected reference data
import sqlite3
import time

# Connect to the database for direct queries if needed
conn = sqlite3.connect(collector.db.db_path)
cursor = conn.cursor()

# Skip states and venues if they're causing issues
# Instead of collector.collect_states_and_venues(), let's proceed directly to fixtures
print("Skipping states and venues collection due to error")

# Get Premier League ID from the previously collected leagues
premier_league_id = premier_league[0]['id']
print(f"Collecting fixtures for Premier League (ID: {premier_league_id})...")

try:
    # Let's handle any potential errors with fixtures collection
    collector.collect_fixtures_for_league(premier_league_id)
    print("Premier League fixtures collected")
except Exception as e:
    print(f"Error collecting fixtures: {e}")
    
    # If the error is related to API rate limiting, we can add a retry with delay
    print("Waiting 10 seconds before retry...")
    time.sleep(10)
    try:
        collector.collect_fixtures_for_league(premier_league_id)
        print("Premier League fixtures collected (retry succeeded)")
    except Exception as e2:
        print(f"Retry failed: {e2}")
        print("You may need to check the SportMonksAPI.fetch_fixtures_by_season method")

# Verify the seasons we have
try:
    seasons = collector.db.get_seasons_by_league(premier_league_id)
    print(f"Found {len(seasons)} seasons for Premier League")
    
    # Print the most recent 5 seasons
    for season in seasons[:5]:
        print(f"Season ID: {season['id']}, Name: {season.get('name', 'N/A')}")
        
    # Try a direct SQL query to see if fixtures were collected
    try:
        cursor.execute("SELECT COUNT(*) FROM fixtures WHERE league_id = ?", (premier_league_id,))
        fixture_count = cursor.fetchone()[0]
        print(f"Total Premier League fixtures in database: {fixture_count}")
    except sqlite3.OperationalError:
        print("Could not query fixtures table - it may not exist yet")
except Exception as e:
    print(f"Error verifying seasons: {e}")

# Close the connection
conn.close()

2025-04-28 21:38:32,633 - __main__ - INFO - Collecting fixtures for league 8…
2025-04-28 21:38:32,636 - __main__ - INFO -   → Season 2: fetching fixtures…


Skipping states and venues collection due to error
Collecting fixtures for Premier League (ID: 8)...
API request error: 400 Client Error: Bad Request for url: https://api.sportmonks.com/v3/football/schedules/seasons/2?include=fixture&per_page=1000&page=1&api_token=oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd
Response content: b'    {"message":"You may not use more than 0 nested includes on this endpoint","link":"https:\\/\\/docs.sportmonks.com\\/football\\/api\\/response-codes\\/include-exceptions"}'
Error collecting fixtures: 400 Client Error: Bad Request for url: https://api.sportmonks.com/v3/football/schedules/seasons/2?include=fixture&per_page=1000&page=1&api_token=oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd
Waiting 10 seconds before retry...


2025-04-28 21:38:43,161 - __main__ - INFO - Collecting fixtures for league 8…
2025-04-28 21:38:43,165 - __main__ - INFO -   → Season 2: fetching fixtures…


API request error: 400 Client Error: Bad Request for url: https://api.sportmonks.com/v3/football/schedules/seasons/2?include=fixture&per_page=1000&page=1&api_token=oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd
Response content: b'    {"message":"You may not use more than 0 nested includes on this endpoint","link":"https:\\/\\/docs.sportmonks.com\\/football\\/api\\/response-codes\\/include-exceptions"}'
Retry failed: 400 Client Error: Bad Request for url: https://api.sportmonks.com/v3/football/schedules/seasons/2?include=fixture&per_page=1000&page=1&api_token=oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd
You may need to check the SportMonksAPI.fetch_fixtures_by_season method
Error verifying seasons: 'NoneType' object has no attribute 'execute'


In [36]:
conn = sqlite3.connect(collector.db.db_path)
# Show a few seasons:
print(pd.read_sql_query(
    "SELECT id, league_id, name, start_date, end_date FROM seasons LIMIT 10", conn
))
# Show distinct league IDs that have at least one season
print("\nLeagues with seasons:", 
      pd.read_sql_query("SELECT DISTINCT league_id FROM seasons", conn)['league_id'].tolist()
)
conn.close()


   id  league_id       name start_date end_date
0   2          8  2010/2011       None     None
1   3          8  2013/2014       None     None
2   6          8  2008/2009       None     None
3   7          8  2012/2013       None     None
4   8          8  2006/2007       None     None
5   9          8  2011/2012       None     None
6  10          8  2015/2016       None     None
7  11          8  2009/2010       None     None
8  12          8  2014/2015       None     None
9  13          8  2016/2017       None     None

Leagues with seasons: [8, 9, 72, 82, 181, 208, 244, 27, 24, 271, 301, 390, 387, 384, 444, 453, 462, 501, 486, 570, 573, 591, 567, 564, 600]


In [72]:
def fetch_fixtures_by_season(self, season_id: int) -> List[Dict]:
    """Fetch all fixtures for a season."""
    params = {
        'filters': f'season_ids:{season_id}',  # Updated filter name
        'include': 'participants;league;venue;state;scores'
    }

    try:
        logger.info(f"Fetching fixtures for season ID: {season_id}")
        response = self.make_request("/football/fixtures", params)
        
        # Log the raw response for debugging
        logger.debug(f"API response for season {season_id}: {response}")
        
        data = response.get('data', [])
        if data:
            logger.info(f"Successfully fetched {len(data)} fixtures for season {season_id}")
            return data

        logger.warning(f"No fixtures found for season {season_id}.")
        return []

    except Exception as e:
        logger.error(f"Error fetching fixtures for season {season_id}: {str(e)}")
        return []

In [75]:
season_id = 2
params = {
    'filters': f'seasonId:{season_id}',
    'include': 'participants;league;venue;state;scores'
}
try:
    # Try fetching fixtures using direct filters
    response = collector.api.make_request("/football/fixtures", params)
    fixtures = response.get('data', [])
    if fixtures:
        print(f"Fetched {len(fixtures)} fixtures for season {season_id}")
    else:
        print(f"No fixtures found for season {season_id}")
except Exception as e:
    print(f"Error fetching fixtures: {str(e)}")

API request error: 400 Client Error: Bad Request for url: https://api.sportmonks.com/v3/football/fixtures?filters=seasonId%3A2&include=participants%3Bleague%3Bvenue%3Bstate%3Bscores&api_token=oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd
Response content: b'    {"message":"You requested filters do not exist. Visit the documentation to see all the available filters and how to apply them."}'
Error fetching fixtures: 400 Client Error: Bad Request for url: https://api.sportmonks.com/v3/football/fixtures?filters=seasonId%3A2&include=participants%3Bleague%3Bvenue%3Bstate%3Bscores&api_token=oYeoAVFUTQpu7MfoFqbvyiYfgRRkuBWW0p2atkZnySe4X3xrHkjgGhOvI0pd


In [68]:
# Connect to the database
conn = sqlite3.connect(collector.db.db_path)

# Create query to get 10 most recent Premier League fixtures
query = """
SELECT 
    f.id,
    f.starting_at,
    f.name,
    f.state_id,
    l.name as league_name
FROM fixtures f
JOIN leagues l ON f.league_id = l.id
WHERE l.id = 8  -- Premier League ID
ORDER BY f.starting_at DESC
LIMIT 10
"""

# Execute query and display results
df_recent = pd.read_sql_query(query, conn)
print("10 Most Recent Premier League Fixtures:")
print(df_recent.to_string(index=False))

# Close connection
conn.close()

10 Most Recent Premier League Fixtures:
Empty DataFrame
Columns: [id, starting_at, name, state_id, league_name]
Index: []


In [66]:
# Connect to the database and check fixtures table
conn = sqlite3.connect(collector.db.db_path)

# Check total number of fixtures
fixtures_count = pd.read_sql_query(
    "SELECT COUNT(*) as count FROM fixtures", 
    conn
)
print(f"Total fixtures in database: {fixtures_count.iloc[0]['count']}")

# Check fixtures by league
fixtures_by_league = pd.read_sql_query(
    """
    SELECT 
        l.id as league_id,
        l.name as league_name,
        COUNT(*) as fixture_count
    FROM fixtures f
    JOIN leagues l ON f.league_id = l.id
    GROUP BY l.id, l.name
    ORDER BY fixture_count DESC
    """,
    conn
)
print("\nFixtures by league:")
print(fixtures_by_league)

conn.close()

Total fixtures in database: 0

Fixtures by league:
Empty DataFrame
Columns: [league_id, league_name, fixture_count]
Index: []


In [29]:
conn = sqlite3.connect(collector.db.db_path)
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print("Tables in DB:", [row[0] for row in cursor.fetchall()])
conn.close()

Tables in DB: ['continents', 'countries', 'regions', 'cities', 'venues', 'states', 'leagues', 'seasons', 'stages', 'rounds', 'teams', 'players', 'team_squad', 'fixtures', 'fixture_participants', 'scores', 'metadata', 'event_types', 'periods', 'events', 'stat_types', 'fixture_team_stats', 'sqlite_sequence', 'sideline_types', 'sidelines', 'fixture_sidelines', 'weather_reports', 'player_stat_detail', 'fixture_player_stats', 'standings', 'top_performers', 'bookmakers', 'markets', 'odds', 'predictions']


In [64]:
# Connect to your actual DB file
conn = sqlite3.connect(collector.db.db_path)

# Load all fixtures for league_id = 1
df = pd.read_sql_query(
    "SELECT * FROM fixtures WHERE league_id = 8",
    conn
)

# Show the first 10 rows
print("First 10 rows of fixtures for League 8:")
print(df.head(10).to_string(index=False))

# Show schema & non-null counts
print("\nSchema & non-null counts:")
df.info()

conn.close()

First 10 rows of fixtures for League 8:
Empty DataFrame
Columns: [id, sport_id, league_id, season_id, stage_id, group_id, aggregate_id, round_id, state_id, venue_id, name, starting_at, result_info, leg, details, length, placeholder, has_odds, starting_at_timestamp]
Index: []

Schema & non-null counts:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 19 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   id                     0 non-null      object
 1   sport_id               0 non-null      object
 2   league_id              0 non-null      object
 3   season_id              0 non-null      object
 4   stage_id               0 non-null      object
 5   group_id               0 non-null      object
 6   aggregate_id           0 non-null      object
 7   round_id               0 non-null      object
 8   state_id               0 non-null      object
 9   venue_id               0 non-null     

In [66]:
import sqlite3
import pandas as pd

# 1) Connect to your DB
conn = sqlite3.connect("sportmonks_football.db")
cursor = conn.cursor()

# 2) List all tables
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = [row[0] for row in cursor.fetchall()]
print("Tables in database:", tables)

# 3) For each table, show schema and row count
for table in tables:
    print(f"\n=== {table} ===")
    # Schema via pandas (dtype inference)
    df = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 0", conn)
    print("Columns and dtypes:")
    print(df.dtypes)
    # Row count
    cursor.execute(f"SELECT COUNT(*) FROM {table}")
    count = cursor.fetchone()[0]
    print(f"Total rows: {count}")

# Close connection when done
conn.close()


Tables in database: ['continents', 'countries', 'regions', 'cities', 'venues', 'states', 'leagues', 'seasons', 'stages', 'rounds', 'teams', 'players', 'team_squad', 'fixtures', 'fixture_participants', 'scores', 'metadata']

=== continents ===
Columns and dtypes:
id      object
name    object
code    object
dtype: object
Total rows: 7

=== countries ===
Columns and dtypes:
id              object
name            object
code            object
continent_id    object
dtype: object
Total rows: 25

=== regions ===
Columns and dtypes:
id            object
name          object
country_id    object
dtype: object
Total rows: 25

=== cities ===
Columns and dtypes:
id            object
name          object
region_id     object
country_id    object
dtype: object
Total rows: 25

=== venues ===
Columns and dtypes:
id               object
name             object
city_id          object
country_id       object
capacity         object
address          object
latitude         object
longitude        objec

In [67]:
import sqlite3
import pandas as pd
from IPython.display import display, Markdown

# Connect
conn = sqlite3.connect("sportmonks_football.db")
cursor = conn.cursor()

# Get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = [row[0] for row in cursor.fetchall()]

for table in tables:
    display(Markdown(f"### Table: `{table}`"))
    
    # Row count
    cursor.execute(f"SELECT COUNT(*) FROM {table}")
    count = cursor.fetchone()[0]
    print(f"Total rows: {count}\n")
    
    # Schema & dtypes
    df0 = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 0", conn)
    print("Columns and dtypes:")
    print(df0.dtypes, "\n")
    
    # Sample data
    df_sample = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 5", conn)
    if not df_sample.empty:
        print("First 5 rows:")
        display(df_sample)
    else:
        print("No rows to preview.")
    
    print("\n---\n")

# Close connection
conn.close()


### Table: `continents`

Total rows: 7

Columns and dtypes:
id      object
name    object
code    object
dtype: object 

First 5 rows:


Unnamed: 0,id,name,code
0,1,Europe,EU
1,2,Asia,AS
2,3,Africa,AF
3,4,Oceania,OC
4,5,Antarctica,AT



---



### Table: `countries`

Total rows: 25

Columns and dtypes:
id              object
name            object
code            object
continent_id    object
dtype: object 

First 5 rows:


Unnamed: 0,id,name,code,continent_id
0,2,Poland,,1
1,5,Brazil,,7
2,11,Germany,,1
3,17,France,,1
4,20,Portugal,,1



---



### Table: `regions`

Total rows: 25

Columns and dtypes:
id            object
name          object
country_id    object
dtype: object 

First 5 rows:


Unnamed: 0,id,name,country_id
0,1,Al Qadisiyah,107
1,2,HaTsafon,802
2,3,Hawaii,3483
3,4,Constantine,614
4,5,Tiaret,614



---



### Table: `cities`

Total rows: 25

Columns and dtypes:
id            object
name          object
region_id     object
country_id    object
dtype: object 

First 5 rows:


Unnamed: 0,id,name,region_id,country_id
0,1,'Afak,1,107
1,2,'Afula 'Illit,2,802
2,3,'Aiea,3,24143344
3,4,'Ain Abid,4,614
4,5,'Ain Deheb,5,614



---



### Table: `venues`

Total rows: 0

Columns and dtypes:
id               object
name             object
city_id          object
country_id       object
capacity         object
address          object
latitude         object
longitude        object
surface          object
image_path       object
city_name        object
national_team    object
dtype: object 

No rows to preview.

---



### Table: `states`

Total rows: 0

Columns and dtypes:
id                object
state             object
name              object
short_name        object
developer_name    object
dtype: object 

No rows to preview.

---



### Table: `leagues`

Total rows: 25

Columns and dtypes:
id                object
name              object
country_id        object
logo_path         object
slug              object
type              object
active            object
sub_type          object
last_played_at    object
category          object
has_jerseys       object
sport_id          object
dtype: object 

First 5 rows:


Unnamed: 0,id,name,country_id,logo_path,slug,type,active,sub_type,last_played_at,category,has_jerseys,sport_id
0,8,Premier League,462,,,league,,,,,,
1,9,Championship,462,,,league,,,,,,
2,24,FA Cup,462,,,league,,,,,,
3,27,Carabao Cup,462,,,league,,,,,,
4,72,Eredivisie,38,,,league,,,,,,



---



### Table: `seasons`

Total rows: 46

Columns and dtypes:
id            object
name          object
league_id     object
start_date    object
end_date      object
dtype: object 

First 5 rows:


Unnamed: 0,id,name,league_id,start_date,end_date
0,2,2010/2011,8,,
1,3,2013/2014,8,,
2,6,2008/2009,8,,
3,7,2012/2013,8,,
4,8,2006/2007,8,,



---



### Table: `stages`

Total rows: 0

Columns and dtypes:
id           object
name         object
season_id    object
type         object
dtype: object 

No rows to preview.

---



### Table: `rounds`

Total rows: 0

Columns and dtypes:
id          object
name        object
stage_id    object
dtype: object 

No rows to preview.

---



### Table: `teams`

Total rows: 0

Columns and dtypes:
id                object
name              object
country_id        object
venue_id          object
short_code        object
founded           object
type              object
placeholder       object
gender            object
logo_path         object
sport_id          object
last_played_at    object
dtype: object 

No rows to preview.

---



### Table: `players`

Total rows: 0

Columns and dtypes:
id                      object
name                    object
common_name             object
country_id              object
nationality_id          object
position_id             object
detailed_position_id    object
type_id                 object
date_of_birth           object
height                  object
weight                  object
image_path              object
firstname               object
lastname                object
display_name            object
gender                  object
dtype: object 

No rows to preview.

---



### Table: `team_squad`

Total rows: 0

Columns and dtypes:
team_id          object
season_id        object
player_id        object
jersey_number    object
on_loan          object
dtype: object 

No rows to preview.

---



### Table: `fixtures`

Total rows: 0

Columns and dtypes:
id                       object
sport_id                 object
league_id                object
season_id                object
stage_id                 object
group_id                 object
aggregate_id             object
round_id                 object
state_id                 object
venue_id                 object
name                     object
starting_at              object
result_info              object
leg                      object
details                  object
length                   object
placeholder              object
has_odds                 object
starting_at_timestamp    object
dtype: object 

No rows to preview.

---



### Table: `fixture_participants`

Total rows: 0

Columns and dtypes:
fixture_id    object
team_id       object
location      object
winner        object
position      object
dtype: object 

No rows to preview.

---



### Table: `scores`

Total rows: 0

Columns and dtypes:
id                object
fixture_id        object
type_id           object
participant_id    object
goals             object
participant       object
description       object
dtype: object 

No rows to preview.

---



### Table: `metadata`

Total rows: 3

Columns and dtypes:
key      object
value    object
dtype: object 

First 5 rows:


Unnamed: 0,key,value
0,last_update_reference_data,2025-04-28T18:37:19.510959
1,last_update_leagues,2025-04-28T18:37:19.755180
2,last_update_seasons,2025-04-28T18:37:20.247491



---

