In [4]:
import numpy as np

# Hidden States (Roles)
states = ["Defensive", "Neutral", "Attacking"]

# Observations (Actions)
observations = ["Pass", "Dribble", "Tackle", "Shot", "Cross"]

# Transition Probabilities (Role Changes)
transition_probs = np.array([
    [0.5, 0.4, 0.1],  # Defensive -> Defensive, Neutral, Attacking
    [0.2, 0.4, 0.4],  # Neutral -> Defensive, Neutral, Attacking
    [0.1, 0.3, 0.6]   # Attacking -> Defensive, Neutral, Attacking
])

# Emission Probabilities (Likelihood of Actions Given Role)
emission_probs = np.array([
    [0.1, 0.1, 0.5, 0.2, 0.1],  # Defensive -> Pass, Dribble, Tackle, Shot, Cross
    [0.4, 0.4, 0.05, 0.1, 0.05],  # Neutral -> Pass, Dribble, Tackle, Shot, Cross
    [0.3, 0.4, 0.02, 0.2, 0.08]   # Attacking -> Pass, Dribble, Tackle, Shot, Cross
])

# Initial Probabilities (Starting Role)
initial_probs = np.array([0.2, 0.5, 0.3])  # Defensive, Neutral, Attacking

# Observed Actions (Match Data)
observed_sequence = ["Dribble", "Pass", "Shot", "Cross", "Pass"]

# Convert Observations to Indices
obs_indices = [observations.index(obs) for obs in observed_sequence]

# Viterbi Algorithm Function
def viterbi(obs_indices, states, initial_probs, transition_probs, emission_probs):
    num_obs = len(obs_indices)  # Number of observations
    num_states = len(states)    # Number of hidden states
    
    # Initialize DP table and backpointers
    dp = np.zeros((num_states, num_obs))  # Probability table
    backpointer = np.zeros((num_states, num_obs), dtype=int)  # Pointer table
    
    # Initialization step
    for s in range(num_states):
        dp[s, 0] = initial_probs[s] * emission_probs[s, obs_indices[0]]
    
    # Recursion step
    for t in range(1, num_obs):
        for s in range(num_states):
            probabilities = dp[:, t-1] * transition_probs[:, s] * emission_probs[s, obs_indices[t]]
            dp[s, t] = np.max(probabilities)
            backpointer[s, t] = np.argmax(probabilities)
    
    # Backtracking step
    best_path = []
    best_last_state = np.argmax(dp[:, -1])  # Best final state
    best_path.append(best_last_state)
    
    for t in range(num_obs-1, 0, -1):
        best_last_state = backpointer[best_last_state, t]
        best_path.insert(0, best_last_state)
    
    return [states[state] for state in best_path], dp

# Run the Viterbi Algorithm
best_path, dp_table = viterbi(obs_indices, states, initial_probs, transition_probs, emission_probs)

# Display Results
print("Observed Actions:", observed_sequence)
print("Most Likely States (Roles):", best_path)
print("\nProbability Table (DP):")
print(dp_table)

# Generate Detailed Insights
def generate_insights(observed_sequence, best_path):
    insights = []
    for i, (action, role) in enumerate(zip(observed_sequence, best_path)):
        insights.append(f"At step {i+1}, player performed '{action}' while likely in '{role}' role.")
    return "\n".join(insights)

# Print Detailed Insights
detailed_insights = generate_insights(observed_sequence, best_path)
print("\nDetailed Insights:\n", detailed_insights)


Observed Actions: ['Dribble', 'Pass', 'Shot', 'Cross', 'Pass']
Most Likely States (Roles): ['Neutral', 'Attacking', 'Attacking', 'Attacking', 'Attacking']

Probability Table (DP):
[[2.00000e-02 4.00000e-03 1.28000e-03 6.40000e-05 3.20000e-06]
 [2.00000e-01 3.20000e-02 1.28000e-03 4.32000e-05 1.65888e-05]
 [1.20000e-01 2.40000e-02 2.88000e-03 1.38240e-04 2.48832e-05]]

Detailed Insights:
 At step 1, player performed 'Dribble' while likely in 'Neutral' role.
At step 2, player performed 'Pass' while likely in 'Attacking' role.
At step 3, player performed 'Shot' while likely in 'Attacking' role.
At step 4, player performed 'Cross' while likely in 'Attacking' role.
At step 5, player performed 'Pass' while likely in 'Attacking' role.


In [1]:
import pandas as pd
import numpy as np

# Read the Excel file
def load_excel_data(excel_file):
    try:
        df = pd.read_excel(excel_file)
        print("Excel file loaded successfully.")
        return df
    except Exception as e:
        print(f"Error loading Excel file: {e}")
        exit()

# Extract event IDs for a specific player
def get_event_ids(player_name, dataframe):
    """
    Extracts event IDs for a given player name from the DataFrame.
    
    Args:
        player_name (str): The name of the player.
        dataframe (pd.DataFrame): The DataFrame containing event data.
    
    Returns:
        List of event IDs corresponding to the player name.
    """
    # Check if required columns are in the DataFrame
    required_columns = ['playerName', 'eventId']
    for col in required_columns:
        if col not in dataframe.columns:
            raise ValueError(f"Column '{col}' not found in the Excel file.")
    
    # Filter rows where playerName matches and return event IDs
    filtered_df = dataframe[dataframe['playerName'] == player_name]
    event_ids = filtered_df['eventId'].tolist()
    
    return event_ids

# Hidden Markov Model (HMM) Components
states = ["Defensive", "Neutral", "Attacking"]
observations = ["Pass", "Dribble", "Tackle", "Shot", "Cross"]

# Transition Probabilities (Role Changes)
transition_probs = np.array([
    [0.5, 0.4, 0.1],  # Defensive -> Defensive, Neutral, Attacking
    [0.2, 0.4, 0.4],  # Neutral -> Defensive, Neutral, Attacking
    [0.1, 0.3, 0.6]   # Attacking -> Defensive, Neutral, Attacking
])

# Emission Probabilities (Likelihood of Actions Given Role)
emission_probs = np.array([
    [0.1, 0.1, 0.5, 0.2, 0.1],  # Defensive -> Pass, Dribble, Tackle, Shot, Cross
    [0.4, 0.4, 0.05, 0.1, 0.05],  # Neutral -> Pass, Dribble, Tackle, Shot, Cross
    [0.3, 0.4, 0.02, 0.2, 0.08]   # Attacking -> Pass, Dribble, Tackle, Shot, Cross
])

# Initial Probabilities (Starting Role)
initial_probs = np.array([0.2, 0.5, 0.3])  # Defensive, Neutral, Attacking

# Viterbi Algorithm Function
def viterbi(obs_indices, states, initial_probs, transition_probs, emission_probs):
    num_obs = len(obs_indices)  # Number of observations
    num_states = len(states)    # Number of hidden states
    
    # Initialize DP table and backpointers
    dp = np.zeros((num_states, num_obs))  # Probability table
    backpointer = np.zeros((num_states, num_obs), dtype=int)  # Pointer table
    
    # Initialization step
    for s in range(num_states):
        dp[s, 0] = initial_probs[s] * emission_probs[s, obs_indices[0]]
    
    # Recursion step
    for t in range(1, num_obs):
        for s in range(num_states):
            probabilities = dp[:, t-1] * transition_probs[:, s] * emission_probs[s, obs_indices[t]]
            dp[s, t] = np.max(probabilities)
            backpointer[s, t] = np.argmax(probabilities)
    
    # Backtracking step
    best_path = []
    best_last_state = np.argmax(dp[:, -1])  # Best final state
    best_path.append(best_last_state)
    
    for t in range(num_obs-1, 0, -1):
        best_last_state = backpointer[best_last_state, t]
        best_path.insert(0, best_last_state)
    
    return [states[state] for state in best_path], dp

# Function to generate insights
def generate_insights(observed_sequence, best_path):
    insights = []
    for i, (action, role) in enumerate(zip(observed_sequence, best_path)):
        insights.append(f"At step {i+1}, player performed '{action}' while likely in '{role}' role.")
    return "\n".join(insights)

# Main function to load data, get event IDs, and run HMM
def main():
    # Path to the Excel file
    excel_file = 'Pass.xlsx'  # Replace with the actual file path
    
    # Load the data
    df = load_excel_data(excel_file)
    
    # Get the event IDs for a player
    player_name = "Noa Lang"  # Replace with the player's name
    try:
        event_ids = get_event_ids(player_name, df)
        print(f"\nEvent IDs for {player_name}: {event_ids}")
    except ValueError as e:
        print(e)
        return

    # Simulate observed actions based on event IDs (for demonstration)
    # Assume we convert event IDs back to observations (Pass, Dribble, etc.)
    # You can modify this part based on your real observation mapping
    observed_sequence = ["Pass", "Dribble", "Shot", "Cross", "Pass"]  # Replace with actual data mapping
    obs_indices = [observations.index(obs) for obs in observed_sequence]

    # Run the Viterbi Algorithm to get the most likely states
    best_path, dp_table = viterbi(obs_indices, states, initial_probs, transition_probs, emission_probs)

    # Display the results
    print("\nMost Likely States (Roles):", best_path)
    print("\nProbability Table (DP):")
    print(dp_table)

    # Generate and display detailed insights
    detailed_insights = generate_insights(observed_sequence, best_path)
    print("\nDetailed Insights:\n", detailed_insights)

if __name__ == "__main__":
    main()


Excel file loaded successfully.

Event IDs for Noa Lang: []

Most Likely States (Roles): ['Neutral', 'Attacking', 'Attacking', 'Attacking', 'Attacking']

Probability Table (DP):
[[2.00000e-02 4.00000e-03 1.28000e-03 6.40000e-05 3.20000e-06]
 [2.00000e-01 3.20000e-02 1.28000e-03 5.76000e-05 2.21184e-05]
 [9.00000e-02 3.20000e-02 3.84000e-03 1.84320e-04 3.31776e-05]]

Detailed Insights:
 At step 1, player performed 'Pass' while likely in 'Neutral' role.
At step 2, player performed 'Dribble' while likely in 'Attacking' role.
At step 3, player performed 'Shot' while likely in 'Attacking' role.
At step 4, player performed 'Cross' while likely in 'Attacking' role.
At step 5, player performed 'Pass' while likely in 'Attacking' role.
