### Code to implement collaborative filtering for soccer player substitutions:

##### Collaborative filtering methods in soccer analyze patterns of player performance and team dynamics to recommend substitutions based on similar historical situations. These methods typically use matrix factorization or neighborhood-based approaches to identify latent features and similarities between players, teams, and match contexts.

###### Our objective here is to get the Ranked lists of potential substitute players for the team northwestern wildcats based on their similarity to successful past substitutions in comparable match situations

In [4]:
import os
os.getcwd()

'c:\\Users\\John\\Desktop\\Hackathon 2024\\hackathon_test'

In [7]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


df = pd.read_csv('player-data/cleaned_player.csv')

# Filtering for Northwestern Wildcats team
nw_players = df[df['team'] == 'Northwestern Wildcats']

# Dynamic categorization function to classify positions into 'Back', 'Forward', 'Goalkeeper', and 'Midfield'
def categorize_position(position):
    """
    This function categorizes the player's position based on the string value in the 'Position' column.
    """
    if 'GK' in position:  # Goalkeeper
        return 'Goalkeeper'
    elif 'CB' in position or 'LB' in position or 'RB' in position or 'WB' in position:  # Defense/Back
        return 'Back'
    elif 'DMF' in position or 'CMF' in position or 'AMF' in position:  # Midfield
        if 'L' in position:
            return 'Midfield'  # Left Midfield
        elif 'R' in position:
            return 'Midfield'  # Right Midfield
        else:
            return 'Midfield'  # Central Midfield
    elif 'LW' in position or 'RW' in position or 'WF' in position:  # Wide Forward/Wing
        if 'L' in position:
            return 'Forward'  # Left Wing
        elif 'R' in position:
            return 'Forward'  # Right Wing
    elif 'CF' in position:  # Center Forward
        return 'Forward'
    else:
        return 'Other'

# Applying the dynamic categorization to 'Simple_Position' column
nw_players['Simple_Position'] = nw_players['Position'].apply(categorize_position)

# Function to create a player profile based on performance statistics
def create_player_profile(player_data):
    """
    This function calculates the average statistics for each player.
    The statistics considered are goals, assists, shots, shots on target, passes into box, and successful dribbles.
    """
    return player_data[['goals', 'assists', 'shots', 'shots_on_target', 'passes_into_box', 'successful_dribbles']].mean()

# Group the data by player and create player profiles (average of performance statistics)
player_profiles = nw_players.groupby('player_name', group_keys=False).apply(create_player_profile).fillna(0)

# Calculating the similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(player_profiles)


# Function to get substitute recommendations for a given player
def get_substitute_recommendations(player_name, top_n=3):
    """
    This function generates substitute recommendations based on player similarity.
    It considers players with similar profiles and matching positions, who have played less than 90 minutes.
    """
    if player_name not in player_profiles.index:
        return "Player not found in dataset."
    
    
    player_idx = player_profiles.index.get_loc(player_name)
    player_position = nw_players[nw_players['player_name'] == player_name]['Simple_Position'].iloc[0]
    
    # Getting the similarities of the input player with all other players
    player_similarities = similarity_matrix[player_idx]
    
    # Creating a Series of player similarities sorted in descending order, excluding the input player itself
    similar_players = pd.Series(player_similarities, index=player_profiles.index)
    similar_players = similar_players.sort_values(ascending=False)[1:]  # Exclude the player itself
    
    # Preparing the list of substitute recommendations
    recommendations = []
    for similar_player in similar_players.index:
        similar_player_data = nw_players[nw_players['player_name'] == similar_player].iloc[0]
        
        # Checking if the similar player has the same position and has played less than 90 minutes
        if similar_player_data['Simple_Position'] == player_position and similar_player_data['Minutes played'] < 90:
            recommendations.append((
                similar_player,  # Player's name
                similar_player_data['Simple_Position'],  # Player's position
                similar_player_data['Minutes played'],  # Player's minutes played
                similar_players[similar_player]  # Cosine similarity
            ))
        
        # Stop the loop once the required number of recommendations are met
        if len(recommendations) == top_n:
            break
    
    return recommendations

#getting substitute recommendations for a specific player say
player_name = "T. Glassberg" 
recommendations = get_substitute_recommendations(player_name)

print(f"Recommended substitutes for {player_name}:")
if isinstance(recommendations, list):  
    for i, (name, pos, mins, similarity) in enumerate(recommendations, 1):
        print(f"{i}. {name} (Position: {pos}, Avg. Minutes: {mins:.2f}, Similarity: {similarity:.2f})")
else:
    print(recommendations)  # If no recommendations found, print the message


Recommended substitutes for T. Glassberg:
1. H. Richter (Position: Back, Avg. Minutes: 13.00, Similarity: 0.90)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nw_players['Simple_Position'] = nw_players['Position'].apply(categorize_position)
  player_profiles = nw_players.groupby('player_name', group_keys=False).apply(create_player_profile).fillna(0)
