In [None]:
import pandas as pd
from neo4j import GraphDatabase
from sklearn.metrics import accuracy_score

class Neo4jElo:

    def __init__(self, uri, user, password):
        """
        Initialize the connection to the Neo4j database.
        
        Args:
            uri (str): URI of the Neo4j instance.
            user (str): Username for Neo4j authentication.
            password (str): Password for Neo4j authentication.
        """
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        """Close the connection to the Neo4j database."""
        self.driver.close()

    def create_teams(self, teams):
        """
        Create nodes for all teams in the NFL dataset.
        
        Args:
            teams (list): List of unique team names.
        """
        with self.driver.session() as session:
            for team in teams:
                session.run("MERGE (t:Team {name: $name})", name=team)

    def create_game(self, game_id, home_team, away_team, home_score, away_score, game_date):
        """
        Create a single relationship between home_team and away_team for a game with all metrics as properties.
        
        Args:
            game_id (int): Unique identifier for the game.
            home_team (str): Name of the home team.
            away_team (str): Name of the away team.
            home_score (int): Score of the home team.
            away_score (int): Score of the away team.
            game_date (str): Date of the game.
        """
        with self.driver.session() as session:
            session.run("""
                MATCH (home:Team {name: $home_team}), (away:Team {name: $away_team})
                MERGE (home)-[r:PLAYED {game_id: $game_id, 
                                        home_score: $home_score, 
                                        away_score: $away_score,
                                        game_date: $game_date}]->(away)
            """, game_id=game_id, home_team=home_team, away_team=away_team,
                home_score=home_score, away_score=away_score, game_date=game_date)

    def initialize_elo(self):
        """Initialize ELO ratings for all teams."""
        with self.driver.session() as session:
            session.run("MATCH (t:Team) SET t.elo = 1500")

    def calculate_elo(self, k=20):
        """
        Calculate and update ELO ratings after each game.
        
        Args:
            k (int): K-factor for ELO rating calculation.
        """
        with self.driver.session() as session:
            # Initialize ELO ratings if not already done
            self.initialize_elo()
            
            # Retrieve all games from the database
            games = session.run("""
                MATCH (home:Team)-[r:PLAYED]->(away:Team)
                RETURN home.name AS home_team, away.name AS away_team,
                r.home_score AS home_score, r.away_score AS away_score
            """)
            
            for record in games:
                home_team = record["home_team"]
                away_team = record["away_team"]
                home_score = record["home_score"]
                away_score = record["away_score"]
                
                # Fetch current ELO ratings from Neo4j
                home_elo = session.run("MATCH (t:Team {name: $name}) RETURN t.elo AS elo", name=home_team).single()["elo"]
                away_elo = session.run("MATCH (t:Team {name: $name}) RETURN t.elo AS elo", name=away_team).single()["elo"]
                
                # Calculate expected scores
                expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
                expected_away = 1 / (1 + 10 ** ((home_elo - away_elo) / 400))
                
                # Determine actual scores
                actual_home = 1 if home_score > away_score else 0 if home_score < away_score else 0.5
                
                # Update ELO ratings
                new_home_elo = home_elo + k * (actual_home - expected_home)
                new_away_elo = away_elo + k * ((1 - actual_home) - expected_away)
                
                # Update the database with new ELO ratings
                session.run("MATCH (t:Team {name: $name}) SET t.elo = $elo", name=home_team, elo=new_home_elo)
                session.run("MATCH (t:Team {name: $name}) SET t.elo = $elo", name=away_team, elo=new_away_elo)

    def get_team_elos(self, home_team, away_team):
        """
        Query Neo4j to get current ELO ratings for both teams.
        
        Args:
            home_team (str): Name of the home team.
            away_team (str): Name of the away team.
        
        Returns:
            tuple: Current ELO ratings for both teams.
        """
        with self.driver.session() as session:
            result = session.run("""
                MATCH (home:Team {name: $home_team}), (away:Team {name: $away_team})
                RETURN home.elo AS home_elo, away.elo AS away_elo
            """, home_team=home_team, away_team=away_team)
            
            record = result.single()
            return record["home_elo"], record["away_elo"]

    def predict_winner(self, home_team, away_team):
        """
        Predict winner based on current ELO ratings of both teams.
        
        Args:
            home_team (str): Name of the home team.
            away_team (str): Name of the away team.
        
        Returns:
            str: Predicted winner ('Home' or 'Away').
        """
        # Get current ELO ratings
        home_elo, away_elo = self.get_team_elos(home_team, away_team)
        
        # Calculate expected scores
        expected_home = 1 / (1 + 10 ** ((away_elo - home_elo) / 400))
        
        # Predict winner based on expected scores
        return "Home" if expected_home > 0.5 else "Away"

# Main function to load data and run predictions on last 500 games
def main():
    uri = "bolt://localhost:7687"
    user = "neo4j"
    password = "password"

    elo_system = Neo4jElo(uri, user, password)

    # Load your dataset from CSV file
    data_path = 'data/processed/elo_data.csv'  # Replace with your file path
    nfl_data = pd.read_csv(data_path)

    # Create nodes for all teams
    teams = pd.concat([nfl_data['home_team'], nfl_data['away_team']]).unique()
    elo_system.create_teams(teams)

    # Insert games into Neo4j and calculate ELOs
    for _, row in nfl_data.iterrows():
        elo_system.create_game(row['game_id'], row['home_team'], row['away_team'], row['score_home'], row['score_away'], row['game_date'])

    elo_system.calculate_elo()

    # Predict outcomes for last 500 games and calculate accuracy
    actual_results = []
    predicted_results = []

    last_500_games = nfl_data.tail(500)
    
    for _, row in last_500_games.iterrows():
        actual_result = "Home" if row['score_home'] > row['score_away'] else "Away"
        
        predicted_result = elo_system.predict_winner(row['home_team'], row['away_team'])
        
        actual_results.append(actual_result)
        predicted_results.append(predicted_result)

    accuracy = accuracy_score(actual_results, predicted_results)
    
    print(f"Prediction Accuracy on Last 500 Games: {accuracy:.2f}")

    elo_system.close()

if __name__ == "__main__":
    main()

Prediction Accuracy on Last 500 Games: 0.63
