# Premier League Football Analytics Database

## Setup Database Connection

In [3]:
import pandas as pd
import numpy as np
import re
from datetime import datetime
import logging
import warnings
from data201 import db_connection, df_query

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
warnings.filterwarnings('ignore')

In [4]:
conn = db_connection(config_file = 'premier_league_analytics.ini')
cursor = conn.cursor()

## Create the Database

## Create Staging Table

In [None]:
def create_staging_table():
    """Create staging table that mirrors the CSV structure"""
    conn = None
    cursor = None
    
    try:
        conn = db_connection(config_file = 'premier_league_analytics.ini')
        cursor = conn.cursor()
        
        # Create staging table with backticks to escape reserved words
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `stg_premier_league_raw` (
          `Id`            INT AUTO_INCREMENT PRIMARY KEY,
          `Div`           VARCHAR(10),
          `Date`          DATE,
          `Time`          TIME,
          `HomeTeam`      VARCHAR(100),
          `AwayTeam`      VARCHAR(100),
          `FTHG`          INT,
          `FTAG`          INT,
          `FTR`           CHAR(1),
          `HTHG`          INT,
          `HTAG`          INT,
          `HTR`           CHAR(1),
          `Referee`       VARCHAR(100),
          `HS`            INT,
          `AS`            INT,
          `HST`           INT,
          `AST`           INT,
          `HF`            INT,
          `AF`            INT,
          `HC`            INT,
          `AC`            INT,
          `HY`            INT,
          `AY`            INT,
          `HR`            INT,
          `AR`            INT,
          `B365H`         FLOAT,
          `B365D`         FLOAT,
          `B365A`         FLOAT,
          `BWH`           FLOAT,
          `BWD`           FLOAT,
          `BWA`           FLOAT,
          `IWH`           FLOAT,
          `IWD`           FLOAT,
          `IWA`           FLOAT,
          `PSH`           FLOAT,
          `PSD`           FLOAT,
          `PSA`           FLOAT,
          `WHH`           FLOAT,
          `WHD`           FLOAT,
          `WHA`           FLOAT,
          `VCH`           FLOAT,
          `VCD`           FLOAT,
          `VCA`           FLOAT,
          `B365_2_5O`     FLOAT,
          `B365_2_5U`     FLOAT,
          `P_2_5O`        FLOAT,
          `P_2_5U`        FLOAT,
          `MaxH`          FLOAT,
          `MaxD`          FLOAT,
          `MaxA`          FLOAT,
          `AvgH`          FLOAT,
          `AvgD`          FLOAT,
          `AvgA`          FLOAT,
          `B365CH`        FLOAT,
          `B365CD`        FLOAT,
          `B365CA`        FLOAT,
          `BWCH`          FLOAT,
          `BWCD`          FLOAT,
          `BWCA`          FLOAT,
          `IWCH`          FLOAT,
          `IWCD`          FLOAT,
          `IWCA`          FLOAT,
          `PSCH`          FLOAT,
          `PSCD`          FLOAT,
          `PSCA`          FLOAT,
          `WHCH`          FLOAT,
          `WHCD`          FLOAT,
          `WHCA`          FLOAT,
          `VCCH`          FLOAT,
          `VCCD`          FLOAT,
          `VCCA`          FLOAT,
          `MaxCH`         FLOAT,
          `MaxCD`         FLOAT,
          `MaxCA`         FLOAT,
          `AvgCH`         FLOAT,
          `AvgCD`         FLOAT,
          `AvgCA`         FLOAT,
          `AHh`           FLOAT,
          `B365AHH`       FLOAT,
          `B365AHA`       FLOAT,
          `PAHH`          FLOAT,
          `PAHA`          FLOAT,
          `MaxAHH`        FLOAT,
          `MaxAHA`        FLOAT,
          `AvgAHH`        FLOAT,
          `AvgAHA`        FLOAT,
          `SourceFile`    VARCHAR(255),
          `LoadTimestamp` DATETIME NOT NULL,
          `ProcessedFlag` TINYINT DEFAULT 0,
          INDEX idx_processed (`ProcessedFlag`)
        ) 
        ENGINE=InnoDB
        DEFAULT CHARSET=utf8mb4;
        """)
        
        conn.commit()
        logging.info("Staging table created successfully")
        return True
    except Exception as e:
        if conn:
            conn.rollback()
        logging.error(f"Error creating staging table: {e}")
        return False
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

## Create Operational Tables

In [None]:
def create_operational_tables():
    """Create operational database tables"""
    conn = None
    cursor = None
    
    try:
        conn = db_connection(config_file = 'premier_league_analytics.ini')
        cursor = conn.cursor()
        
        # Create Users table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS Users (
            UserID INT AUTO_INCREMENT PRIMARY KEY,
            Username VARCHAR(50) NOT NULL UNIQUE,
            PasswordHash VARCHAR(255) NOT NULL,
            Role ENUM('admin', 'manager', 'user') NOT NULL,
            CreatedDate DATETIME DEFAULT CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)

        # Insert first admin user (username=Admin, password=admin)
        cursor.execute("""
        INSERT INTO Users (Username, PasswordHash, Role)
        VALUES (
            'Admin',
            SHA2('admin', 256),
            'admin'
        )
        ON DUPLICATE KEY UPDATE
            PasswordHash = VALUES(PasswordHash),
            Role = VALUES(Role);
        """)

        # Create Teams table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Teams` (
            `TeamID` INT AUTO_INCREMENT PRIMARY KEY,
            `TeamName` VARCHAR(100) NOT NULL UNIQUE,
            `ShortName` VARCHAR(50) NOT NULL,
            `Stadium` VARCHAR(100),
            `City` VARCHAR(50),
            `YearFounded` INT,
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            `ModifiedDate` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create Seasons table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Seasons` (
            `SeasonID` INT AUTO_INCREMENT PRIMARY KEY,
            `SeasonName` VARCHAR(20) NOT NULL UNIQUE,
            `StartDate` DATE NOT NULL,
            `EndDate` DATE NOT NULL,
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create Referees table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Referees` (
            `RefereeID` INT AUTO_INCREMENT PRIMARY KEY,
            `RefereeName` VARCHAR(100) NOT NULL UNIQUE,
            `YearsExperience` INT,
            `Nationality` VARCHAR(50),
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            `ModifiedDate` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create Divisions table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Divisions` (
            `DivisionID` INT AUTO_INCREMENT PRIMARY KEY,
            `DivisionCode` VARCHAR(10) NOT NULL UNIQUE,
            `LeagueName` VARCHAR(100),
            `Country` VARCHAR(50),
            `Tier` INT,
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create Matches table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Matches` (
            `MatchID` INT AUTO_INCREMENT PRIMARY KEY,
            `SeasonID` INT NOT NULL,
            `DivisionID` INT NOT NULL,
            `MatchDate` DATE NOT NULL,
            `MatchTime` TIME,
            `HomeTeamID` INT NOT NULL,
            `AwayTeamID` INT NOT NULL,
            `FTHG` INT NOT NULL,
            `FTAG` INT NOT NULL,
            `FTR` CHAR(1) NOT NULL,
            `HTHG` INT,
            `HTAG` INT,
            `HTR` CHAR(1),
            `RefereeID` INT,
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (`SeasonID`) REFERENCES `Seasons`(`SeasonID`),
            FOREIGN KEY (`DivisionID`) REFERENCES `Divisions`(`DivisionID`),
            FOREIGN KEY (`HomeTeamID`) REFERENCES `Teams`(`TeamID`),
            FOREIGN KEY (`AwayTeamID`) REFERENCES `Teams`(`TeamID`),
            FOREIGN KEY (`RefereeID`) REFERENCES `Referees`(`RefereeID`),
            UNIQUE (`HomeTeamID`, `AwayTeamID`, `MatchDate`, `MatchTime`),
            CHECK (`HomeTeamID` <> `AwayTeamID`),
            CHECK (`FTR` IN ('H', 'D', 'A')),
            CHECK (`HTR` IN ('H', 'D', 'A'))
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create MatchStatistics table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `MatchStatistics` (
            `StatID` INT AUTO_INCREMENT PRIMARY KEY,
            `MatchID` INT NOT NULL UNIQUE,
            `HomeShots` INT CHECK (`HomeShots` >= 0),
            `AwayShots` INT CHECK (`AwayShots` >= 0),
            `HomeShotsTarget` INT CHECK (`HomeShotsTarget` >= 0),
            `AwayShotsTarget` INT CHECK (`AwayShotsTarget` >= 0),
            `HomeCorners` INT CHECK (`HomeCorners` >= 0),
            `AwayCorners` INT CHECK (`AwayCorners` >= 0),
            `HomeFouls` INT CHECK (`HomeFouls` >= 0),
            `AwayFouls` INT CHECK (`AwayFouls` >= 0),
            `HomeYellowCards` INT CHECK (`HomeYellowCards` >= 0),
            `AwayYellowCards` INT CHECK (`AwayYellowCards` >= 0),
            `HomeRedCards` INT CHECK (`HomeRedCards` >= 0),
            `AwayRedCards` INT CHECK (`AwayRedCards` >= 0),
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (`MatchID`) REFERENCES `Matches`(`MatchID`)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create Bookmakers table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Bookmakers` (
            `BookmakerID` INT AUTO_INCREMENT PRIMARY KEY,
            `BookmakerCode` VARCHAR(10) NOT NULL UNIQUE,
            `BookmakerName` VARCHAR(100) NOT NULL,
            `Website` VARCHAR(100),
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create Markets table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `Markets` (
            `MarketID` INT AUTO_INCREMENT PRIMARY KEY,
            `MarketType` VARCHAR(50) NOT NULL,
            `MarketSubtype` VARCHAR(50),
            `Parameter` VARCHAR(20),
            `Description` VARCHAR(255),
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            UNIQUE (`MarketType`, `MarketSubtype`, `Parameter`)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create BettingOdds table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `BettingOdds` (
            `OddsID` INT AUTO_INCREMENT PRIMARY KEY,
            `MatchID` INT NOT NULL,
            `BookmakerID` INT NOT NULL,
            `MarketID` INT NOT NULL,
            `OutcomeCode` VARCHAR(10) NOT NULL,
            `OddsValue` FLOAT NOT NULL CHECK (`OddsValue` > 1.0),
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (`MatchID`) REFERENCES `Matches`(`MatchID`),
            FOREIGN KEY (`BookmakerID`) REFERENCES `Bookmakers`(`BookmakerID`),
            FOREIGN KEY (`MarketID`) REFERENCES `Markets`(`MarketID`),
            CONSTRAINT `uq_odds_unique` UNIQUE (`MatchID`, `BookmakerID`, `MarketID`, `OutcomeCode`)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create ETL logging table
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `ETLLog` (
            `LogID` INT AUTO_INCREMENT PRIMARY KEY,
            `ProcessName` VARCHAR(100) NOT NULL,
            `StartTime` DATETIME NOT NULL,
            `EndTime` DATETIME,
            `RecordsProcessed` INT DEFAULT 0,
            `RecordsFailed` INT DEFAULT 0,
            `Status` VARCHAR(20),
            `ErrorMessage` TEXT,
            `CreatedDate` DATETIME DEFAULT CURRENT_TIMESTAMP,
            `FileHash` VARCHAR(64)
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        # Create dead letter table for failed records
        cursor.execute("""
        CREATE TABLE IF NOT EXISTS `ETLDeadLetter` (
            `Id` INT AUTO_INCREMENT PRIMARY KEY,
            `SourceTable` VARCHAR(100) NOT NULL,
            `SourceId` INT,
            `ErrorMessage` TEXT,
            `RawData` TEXT,
            `ErrorTimestamp` DATETIME DEFAULT CURRENT_TIMESTAMP
        ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
        """)
        
        conn.commit()
        logging.info("Operational tables created successfully")
        return True
    except Exception as e:
        if conn:
            conn.rollback()
        logging.error(f"Error creating operational tables: {e}")
        return False
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

## Create Indexes

In [7]:
def create_indexes():
    """Create additional indexes for query optimization using ALTER TABLE"""
    conn = None
    cursor = None
    
    try:
        conn = db_connection(config_file = 'premier_league_analytics.ini')
        cursor = conn.cursor()
        
        # Check for matches indexes
        matches_index_query = """
            SELECT COUNT(*) AS count
            FROM INFORMATION_SCHEMA.STATISTICS
            WHERE TABLE_SCHEMA = DATABASE()
            AND TABLE_NAME = 'Matches'
            AND INDEX_NAME IN ('idx_matches_season', 'idx_matches_division', 'idx_matches_date', 
                               'idx_matches_teams', 'idx_matches_referee')
        """
        cursor.execute(matches_index_query)
        matches_result = cursor.fetchone()
        
        # Create matches indexes if needed
        if matches_result[0] < 5:
            cursor.execute("""
                ALTER TABLE `Matches`
                ADD INDEX `idx_matches_season` (`SeasonID`),
                ADD INDEX `idx_matches_division` (`DivisionID`),
                ADD INDEX `idx_matches_date` (`MatchDate`),
                ADD INDEX `idx_matches_teams` (`HomeTeamID`, `AwayTeamID`),
                ADD INDEX `idx_matches_referee` (`RefereeID`)
            """)
            logging.info("Matches indexes created")
        
        # Check for match statistics index
        stats_index_query = """
            SELECT COUNT(*) AS count
            FROM INFORMATION_SCHEMA.STATISTICS
            WHERE TABLE_SCHEMA = DATABASE()
            AND TABLE_NAME = 'MatchStatistics'
            AND INDEX_NAME = 'idx_matchstats_match'
        """
        cursor.execute(stats_index_query)
        stats_result = cursor.fetchone()
        
        # Create match statistics index if needed
        if stats_result[0] == 0:
            cursor.execute("""
                ALTER TABLE `MatchStatistics`
                ADD INDEX `idx_matchstats_match` (`MatchID`)
            """)
            logging.info("MatchStatistics index created")
        
        # Check for betting odds indexes
        odds_index_query = """
            SELECT COUNT(*) AS count
            FROM INFORMATION_SCHEMA.STATISTICS
            WHERE TABLE_SCHEMA = DATABASE()
            AND TABLE_NAME = 'BettingOdds'
            AND INDEX_NAME IN ('idx_betting_match', 'idx_betting_bookmaker', 
                              'idx_betting_market', 'idx_betting_match_market')
        """
        cursor.execute(odds_index_query)
        odds_result = cursor.fetchone()
        conn.commit()
        
        # Create betting odds indexes if needed
        if odds_result[0] < 4:
            cursor.execute("""
                ALTER TABLE `BettingOdds`
                ADD INDEX `idx_betting_match` (`MatchID`),
                ADD INDEX `idx_betting_bookmaker` (`BookmakerID`),
                ADD INDEX `idx_betting_market` (`MarketID`),
                ADD INDEX `idx_betting_match_market` (`MatchID`, `MarketID`)
            """)
            logging.info("BettingOdds indexes created")
        
        conn.commit()
        logging.info("Indexes created successfully")
        return True
    except Exception as e:
        if conn:
            conn.rollback()
        logging.error(f"Error creating indexes: {e}")
        return False
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

## Create Views

In [8]:
def create_views():
    """Create views for common queries"""
    conn = None
    cursor = None
    
    try:
        conn = db_connection(config_file = 'premier_league_analytics.ini')
        cursor = conn.cursor()
        
        # Match details view
        cursor.execute("""
        CREATE OR REPLACE VIEW `vw_MatchDetails` AS
        SELECT 
            m.`MatchID`,
            s.`SeasonName`,
            d.`LeagueName`,
            d.`DivisionCode`,
            m.`MatchDate`,
            m.`MatchTime`,
            ht.`TeamName` AS HomeTeam,
            at.`TeamName` AS AwayTeam,
            m.`FTHG`,
            m.`FTAG`,
            m.`FTR`,
            m.`HTHG`,
            m.`HTAG`,
            m.`HTR`,
            r.`RefereeName`,
            ms.`HomeShots`,
            ms.`AwayShots`,
            ms.`HomeShotsTarget`,
            ms.`AwayShotsTarget`,
            ms.`HomeCorners`,
            ms.`AwayCorners`,
            ms.`HomeFouls`,
            ms.`AwayFouls`,
            ms.`HomeYellowCards`,
            ms.`AwayYellowCards`,
            ms.`HomeRedCards`,
            ms.`AwayRedCards`
        FROM `Matches` m
        JOIN `Teams` ht ON m.`HomeTeamID` = ht.`TeamID`
        JOIN `Teams` at ON m.`AwayTeamID` = at.`TeamID`
        LEFT JOIN `Seasons` s ON m.`SeasonID` = s.`SeasonID`
        LEFT JOIN `Divisions` d ON m.`DivisionID` = d.`DivisionID`
        LEFT JOIN `Referees` r ON m.`RefereeID` = r.`RefereeID`
        LEFT JOIN `MatchStatistics` ms ON m.`MatchID` = ms.`MatchID`
        """)
        
        # League table view
        cursor.execute("""
        CREATE OR REPLACE VIEW `vw_LeagueTable` AS
        WITH TeamMatches AS (
            -- Home matches
            SELECT
                m.`SeasonID`,
                s.`SeasonName`,
                m.`DivisionID`,
                d.`DivisionCode`,
                d.`LeagueName`,
                m.`HomeTeamID` AS TeamID,
                t.`TeamName`,
                COUNT(*) AS Played,
                SUM(CASE WHEN m.`FTR` = 'H' THEN 1 ELSE 0 END) AS Won,
                SUM(CASE WHEN m.`FTR` = 'D' THEN 1 ELSE 0 END) AS Drawn,
                SUM(CASE WHEN m.`FTR` = 'A' THEN 1 ELSE 0 END) AS Lost,
                SUM(m.`FTHG`) AS GoalsFor,
                SUM(m.`FTAG`) AS GoalsAgainst,
                SUM(CASE WHEN m.`FTR` = 'H' THEN 3 WHEN m.`FTR` = 'D' THEN 1 ELSE 0 END) AS Points
            FROM `Matches` m
            JOIN `Teams` t ON m.`HomeTeamID` = t.`TeamID`
            JOIN `Seasons` s ON m.`SeasonID` = s.`SeasonID`
            JOIN `Divisions` d ON m.`DivisionID` = d.`DivisionID`
            GROUP BY m.`SeasonID`, s.`SeasonName`, m.`DivisionID`, d.`DivisionCode`, d.`LeagueName`, m.`HomeTeamID`, t.`TeamName`
            
            UNION ALL
            
            -- Away matches
            SELECT
                m.`SeasonID`,
                s.`SeasonName`,
                m.`DivisionID`,
                d.`DivisionCode`,
                d.`LeagueName`,
                m.`AwayTeamID` AS TeamID,
                t.`TeamName`,
                COUNT(*) AS Played,
                SUM(CASE WHEN m.`FTR` = 'A' THEN 1 ELSE 0 END) AS Won,
                SUM(CASE WHEN m.`FTR` = 'D' THEN 1 ELSE 0 END) AS Drawn,
                SUM(CASE WHEN m.`FTR` = 'H' THEN 1 ELSE 0 END) AS Lost,
                SUM(m.`FTAG`) AS GoalsFor,
                SUM(m.`FTHG`) AS GoalsAgainst,
                SUM(CASE WHEN m.`FTR` = 'A' THEN 3 WHEN m.`FTR` = 'D' THEN 1 ELSE 0 END) AS Points
            FROM `Matches` m
            JOIN `Teams` t ON m.`AwayTeamID` = t.`TeamID`
            JOIN `Seasons` s ON m.`SeasonID` = s.`SeasonID`
            JOIN `Divisions` d ON m.`DivisionID` = d.`DivisionID`
            GROUP BY m.`SeasonID`, s.`SeasonName`, m.`DivisionID`, d.`DivisionCode`, d.`LeagueName`, m.`AwayTeamID`, t.`TeamName`
        )
        SELECT
            `SeasonID`,
            `SeasonName`,
            `DivisionID`,
            `DivisionCode`,
            `LeagueName`,
            `TeamID`,
            `TeamName`,
            SUM(`Played`) AS Played,
            SUM(`Won`) AS Won,
            SUM(`Drawn`) AS Drawn,
            SUM(`Lost`) AS Lost,
            SUM(`GoalsFor`) AS GF,
            SUM(`GoalsAgainst`) AS GA,
            SUM(`GoalsFor`) - SUM(`GoalsAgainst`) AS GD,
            SUM(`Points`) AS Points
        FROM TeamMatches
        GROUP BY `SeasonID`, `SeasonName`, `DivisionID`, `DivisionCode`, `LeagueName`, `TeamID`, `TeamName`
        ORDER BY `SeasonID`, `DivisionID`, `Points` DESC, GD DESC, GF DESC
        """)
        
        # Add more views if needed...
        
        conn.commit()
        logging.info("Views created successfully")
        return True
    except Exception as e:
        if conn:
            conn.rollback()
        logging.error(f"Error creating views: {e}")
        return False
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

## Initialize Reference Data

In [9]:
def init_reference_data():
    """Initialize reference data for bookmakers, markets, and seasons"""
    conn = None
    cursor = None
    
    try:
        conn = db_connection(config_file = 'premier_league_analytics.ini')
        cursor = conn.cursor()
        
        # Initialize bookmakers
        bookmakers = [
            ('B365', 'Bet365', 'https://www.bet365.com'),
            ('BW', 'Bet&Win', 'https://www.bwin.com'),
            ('IW', 'Interwetten', 'https://www.interwetten.com'),
            ('PS', 'Pinnacle Sports', 'https://www.pinnacle.com'),
            ('WH', 'William Hill', 'https://www.williamhill.com'),
            ('VC', 'Victor Chandler', 'https://www.betvictor.com'),
            ('Max', 'Maximum Odds', None),
            ('Avg', 'Average Odds', None)
        ]
        
        for code, name, website in bookmakers:
            try:
                cursor.execute(
                    "INSERT INTO `Bookmakers` (`BookmakerCode`, `BookmakerName`, `Website`) VALUES (%s, %s, %s)",
                    (code, name, website)
                )
            except Exception as e:
                # Skip if bookmaker already exists (duplicate key error)
                pass
        
        # Initialize markets
        markets = [
            ('MatchResult', None, None, 'Match winner (Home/Draw/Away)'),
            ('OverUnder', None, '2.5', 'Over/Under 2.5 goals'),
            ('AsianHandicap', None, None, 'Asian handicap betting'),
            ('Corners', None, None, 'Corner count markets'),
            ('Corners', 'Handicap', None, 'Corner handicap markets')
        ]
        
        for market_type, subtype, parameter, description in markets:
            try:
                cursor.execute(
                    "INSERT INTO `Markets` (`MarketType`, `MarketSubtype`, `Parameter`, `Description`) VALUES (%s, %s, %s, %s)",
                    (market_type, subtype, parameter, description)
                )
            except Exception as e:
                # Skip if market already exists
                pass
        
        # Initialize divisions
        divisions = [
            ('E0', 'English Premier League', 'England', 1),
            ('E1', 'English Championship', 'England', 2),
            ('SP1', 'Spanish La Liga', 'Spain', 1),
            ('D1', 'German Bundesliga', 'Germany', 1),
            ('I1', 'Italian Serie A', 'Italy', 1),
            ('F1', 'French Ligue 1', 'France', 1)
        ]
        
        for code, league_name, country, tier in divisions:
            try:
                cursor.execute(
                    "INSERT INTO `Divisions` (`DivisionCode`, `LeagueName`, `Country`, `Tier`) VALUES (%s, %s, %s, %s)",
                    (code, league_name, country, tier)
                )
            except Exception as e:
                # Skip if division already exists
                pass
        
        # Initialize recent seasons
        seasons = [
            ('2020-2021', '2020-08-01', '2021-05-31'),
            ('2021-2022', '2021-08-01', '2022-05-31'),
            ('2022-2023', '2022-08-01', '2023-05-31'),
            ('2023-2024', '2023-08-01', '2024-05-31'),
            ('2024-2025', '2024-08-01', '2025-05-31')
        ]
        
        for season_name, start_date, end_date in seasons:
            try:
                cursor.execute(
                    "INSERT INTO `Seasons` (`SeasonName`, `StartDate`, `EndDate`) VALUES (%s, %s, %s)",
                    (season_name, start_date, end_date)
                )
            except Exception as e:
                # Skip if season already exists
                pass
        
        conn.commit()
        logging.info("Reference data initialized successfully")
        return True
    except Exception as e:
        if conn:
            conn.rollback()
        logging.error(f"Error initializing reference data: {e}")
        return False
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()

## Main Execution Function

In [10]:
def setup_database():
    """Main function to set up the Premier League Football Analytics operational database"""
    try:
        # Create tables
        if not create_staging_table():
            return False
        
        if not create_operational_tables():
            return False
        
        if not create_indexes():
            return False
        
        if not create_views():
            return False
        
        if not init_reference_data():
            return False
        
        logging.info("Premier League Football Analytics operational database setup completed successfully")
        return True
    except Exception as e:
        logging.error(f"Error setting up database: {e}")
        return False

## Execute Database Setup

In [11]:
if __name__ == "__main__":
    result = setup_database()
    if result:
        print("Database setup completed successfully!")
    else:
        print("Database setup failed. Check the logs for details.")

2025-05-08 17:06:54,310 - INFO - Staging table created successfully
2025-05-08 17:06:54,380 - INFO - Operational tables created successfully
2025-05-08 17:06:54,411 - INFO - Matches indexes created
2025-05-08 17:06:54,416 - INFO - MatchStatistics index created
2025-05-08 17:06:54,422 - INFO - BettingOdds indexes created
2025-05-08 17:06:54,422 - INFO - Indexes created successfully
2025-05-08 17:06:54,445 - INFO - Views created successfully
2025-05-08 17:06:54,468 - INFO - Reference data initialized successfully
2025-05-08 17:06:54,470 - INFO - Premier League Football Analytics operational database setup completed successfully


Database setup completed successfully!
