In [None]:
import pandas as pd
from collections import defaultdict

def load_passing_data(file_path):
    """
    Load passing data from an Excel file.

    Parameters:
    file_path (str): Path to the Excel file.

    Returns:
    DataFrame: A DataFrame containing the passing event data.
    """
    return pd.read_excel(file_path)

def extract_passing_sequence(df):
    """
    Extract the sequence of player passes from the event DataFrame.

    Parameters:
    df (DataFrame): DataFrame containing passing event data.

    Returns:
    list: A list of player names and contestant IDs representing the passing sequence.
    """
    return df[['playerName', 'contestantId']].dropna().values.tolist()

def identify_flow_motifs(passing_sequence):
    """
    Identify flow motifs from a sequence of passes.

    Parameters:
    passing_sequence (list): List of player passes (e.g., [['A', 1], ['B', 2], ['A', 1], ['C', 3], ...]).

    Returns:
    dict: A dictionary with motif types as keys and their counts as values.
    """
    motifs_count = defaultdict(int)
    
    # Iterate through the sequence to extract unique 4-pass motifs
    for i in range(len(passing_sequence) - 3):
        # Extract the current sequence of 4 passes
        motif_sequence = passing_sequence[i:i + 4]
        
        # Map players to A, B, C, D based on order of occurrence
        player_mapping = {}
        mapped_sequence = []
        next_label = 'A'
        
        for player, contestant_id in motif_sequence:
            player_key = (player, contestant_id)
            if player_key not in player_mapping:
                player_mapping[player_key] = next_label
                next_label = chr(ord(next_label) + 1)  # Move to the next letter (B, C, etc.)
            mapped_sequence.append(player_mapping[player_key])
        
        # Convert list to motif string
        motif = ''.join(mapped_sequence)
        
        # Only count valid motifs (ABAB, ABAC, ABCA, ABCB, ABCD)
        if motif in {'ABAB', 'ABAC', 'ABCA', 'ABCB', 'ABCD'}:
            motifs_count[motif] += 1

    return motifs_count

def save_motif_percentages_to_excel(grouped_motif_counts, id_to_team, output_file):
    """
    Save motif percentages to an Excel file.

    Parameters:
    grouped_motif_counts (dict): Dictionary containing motif counts for each contestant.
    id_to_team (dict): Mapping of contestant IDs to team names.
    output_file (str): Path to the output Excel file.
    """
    data_rows = []

    for contestant_id, motif_counts in grouped_motif_counts.items():
        total_motifs = sum(motif_counts.values())
        motif_percentages = {motif: count / total_motifs for motif, count in motif_counts.items()} if total_motifs > 0 else {}

        # Append contestant_id, team name, and motif percentages to data rows
        row = [contestant_id, id_to_team.get(contestant_id, 'Unknown')] + [motif_percentages.get(motif, 0) for motif in ['ABAB', 'ABAC', 'ABCA', 'ABCB', 'ABCD']]
        data_rows.append(row)

    # Create DataFrame for Excel output
    columns = ['ContestantID', 'Team', 'ABAB', 'ABAC', 'ABCA', 'ABCB', 'ABCD']
    df = pd.DataFrame(data_rows, columns=columns)

    # Save to a single Excel sheet
    df.to_excel(output_file, index=False)

if __name__ == "__main__":
    # Load passing event data from an Excel file
    file_path = "/Users/marclambertes/Downloads/EREII.xlsx"  # Replace with your file path
    df = load_passing_data(file_path)

    # Load the team mapping file
    mapping_file_path = '/Users/marclambertes/Downloads/opta_club_rankings_19012025.xlsx'  # Replace with your mapping file path
    mapping_df = pd.read_excel(mapping_file_path)  # Assuming it has columns 'id' and 'team'

    # Create a dictionary to map 'id' to 'team'
    id_to_team = dict(zip(mapping_df['id'], mapping_df['team']))

    # Extract the passing sequence
    passing_sequence = extract_passing_sequence(df)

    # Group by contestantId and analyze motifs per team
    grouped_sequences = defaultdict(list)
    for player_name, contestant_id in passing_sequence:
        grouped_sequences[contestant_id].append((player_name, contestant_id))

    grouped_motif_counts = {}
    for contestant_id, sequence in grouped_sequences.items():
        grouped_motif_counts[contestant_id] = identify_flow_motifs(sequence)
    
    # Display the results
    print("Flow Motif Counts by Contestant:")
    for contestant_id, motif_counts in grouped_motif_counts.items():
        print(f"Contestant {contestant_id}:")
        for motif, count in motif_counts.items():
            print(f"  {motif}: {count}")
    
    # Save motif percentages to Excel
    output_file = "Pass Flow Motifs - Eerste Divisie.xlsx"
    save_motif_percentages_to_excel(grouped_motif_counts, id_to_team, output_file)
    print(f"Motif percentages saved to {output_file}")
