In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor

In [2]:
df = pd.read_csv('dataset_fifa\players_22.csv')

work_rate_mapping = {
    'Low': 1,
    'Medium': 2,
    'High': 3
}

# Function to convert work_rate into numerical score
def work_rate_to_numeric(work_rate):
    offensive, defensive = work_rate.split('/')
    return (work_rate_mapping[offensive.strip()] + work_rate_mapping[defensive.strip()]) / 2

# Apply the function to the 'work_rate' column
df['work_rate_numeric'] = df['work_rate'].apply(work_rate_to_numeric)

# Calculate BMI
df['BMI'] = df['weight_kg'] / (df['height_cm'] / 100) ** 2

# Stamina Score (combining power_stamina, movement_sprint_speed, and power_strength)
df['stamina_score'] = (df['power_stamina'] + df['movement_sprint_speed'] + df['power_strength']) / 3

# Injury Risk (based on mentality_aggression, power_strength, and physic)
df['injury_risk'] = ((df['mentality_aggression'] - df['power_strength']) + (100 - df['physic'])) / 2

# Training Intensity (based on skill_moves, work_rate_numeric, and power_stamina)
df['training_intensity'] = (df['skill_moves'] + df['work_rate_numeric'] + df['power_stamina']) / 3

# Recovery Time (based on power_stamina, power_jumping, and movement_reactions)
df['recovery_time'] = 100 - (df['power_stamina'] + df['power_jumping'] + df['movement_reactions']) / 3

# Display the computed values
print(df[['BMI', 'stamina_score', 'injury_risk', 'training_intensity', 'recovery_time']].head(2))



  df = pd.read_csv('dataset_fifa\players_22.csv')


         BMI  stamina_score  injury_risk  training_intensity  recovery_time
0  24.913495      73.666667          5.0           25.833333      22.000000
1  23.666910      80.333333          6.5           27.500000      15.333333


  df = pd.read_csv('dataset_fifa\players_22.csv')


In [3]:
from sklearn.preprocessing import OneHotEncoder

# Assuming df is your existing dataframe

# Step 1: Renaming the relevant columns
df.rename(columns={
    'age': 'Player_Age',
    'weight_kg': 'Player_Weight',
    'height_cm': 'Player_Height',
    'Previous_Injuries': 'Previous_Injuries',
    'training_intensity': 'Training_Intensity',
    'recovery_time': 'Recovery_Time',
}, inplace=True)

# Step 2: Classifying BMI
# Defining the gaps for BMI classification
gaps = [-float('inf'), 18.5, 24.9, 29.9, 34.9, 39.9, float('inf')]
categories = ['Underweight', 'Normal', 'Overweight', 'Obesity I', 'Obesity II', 'Obesity III']

# Create "BMI_Classification" column
df['BMI_Classification'] = pd.cut(df['BMI'], bins=gaps, labels=categories, right=False)

# Step 3: Adding age groups
df["Age_Group"] = pd.cut(
    df["Player_Age"],
    bins=[18, 22, 26, 30, 34, df["Player_Age"].max()],
    labels=["18-22", "23-26", "27-30", "31-34", "35+"],
    include_lowest=True
)

# Step 4: One-hot encoding for 'BMI_Classification' and 'Age_Group'
one_hot_cols = ["BMI_Classification", "Age_Group"]

# Selecting only categorical columns
df_categorical = df[one_hot_cols]

# Applying OneHotEncoder
encoder = OneHotEncoder()
encoded_data = encoder.fit_transform(df_categorical)

# Obtaining names of the features generated by OneHotEncoder
one_hot_feature_names = encoder.get_feature_names_out(one_hot_cols)
df_encoded = pd.DataFrame(encoded_data.toarray(), columns=one_hot_feature_names)

# Step 5: Joining the encoded columns back into the original DataFrame
df_final = pd.concat([df, df_encoded], axis=1)

# Step 6: Ensuring all required columns are present, even if some categories are missing
required_columns = [
    'Player_Age', 'Player_Weight', 'Player_Height', 'Previous_Injuries',
    'Training_Intensity', 'Recovery_Time', 'BMI_Classification_Normal',
    'BMI_Classification_Obesity I', 'BMI_Classification_Obesity II',
    'BMI_Classification_Overweight', 'BMI_Classification_Underweight'
]

# If a column is missing, add it and fill with zeros
for col in required_columns:
    if col not in df_final.columns:
        df_final[col] = np.zeros(len(df_final))

# Step 7: Selecting the final columns in the order you specified
df_final = df_final[required_columns]


In [4]:
# df['player_positions'].unique()

In [4]:
# List of columns to drop
columns_to_drop = ['ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 
                   'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 
                   'lb', 'lcb', 'cb', 'rcb', 'rb', 'gk', 'player_face_url', 'club_logo_url', 
                   'club_flag_url', 'nation_logo_url', 'nation_flag_url']

# Drop the columns from the DataFrame
df = df.drop(columns=columns_to_drop)

# Display the DataFrame to verify the columns have been removed
# df.head()


In [5]:
df = df.drop(columns=['club_team_id',
       'club_name', 'league_name', 'league_level', 'club_position',
       'club_jersey_number', 'club_loaned_from', 'club_joined',
       'club_contract_valid_until', 'nationality_id', 'nationality_name', 'long_name','player_url',])
df.columns 


Index(['sofifa_id', 'short_name', 'player_positions', 'overall', 'potential',
       'value_eur', 'wage_eur', 'Player_Age', 'dob', 'Player_Height',
       'Player_Weight', 'nation_team_id', 'nation_position',
       'nation_jersey_number', 'preferred_foot', 'weak_foot', 'skill_moves',
       'international_reputation', 'work_rate', 'body_type', 'real_face',
       'release_clause_eur', 'player_tags', 'player_traits', 'pace',
       'shooting', 'passing', 'dribbling', 'defending', 'physic',
       'attacking_crossing', 'attacking_finishing',
       'attacking_heading_accuracy', 'attacking_short_passing',
       'attacking_volleys', 'skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_acceleration', 'movement_sprint_speed', 'movement_agility',
       'movement_reactions', 'movement_balance', 'power_shot_power',
       'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots',
       'mentality_aggression',

In [7]:
df.columns

Index(['sofifa_id', 'short_name', 'player_positions', 'overall', 'potential',
       'value_eur', 'wage_eur', 'Player_Age', 'dob', 'Player_Height',
       'Player_Weight', 'nation_team_id', 'nation_position',
       'nation_jersey_number', 'preferred_foot', 'weak_foot', 'skill_moves',
       'international_reputation', 'work_rate', 'body_type', 'real_face',
       'release_clause_eur', 'player_tags', 'player_traits', 'pace',
       'shooting', 'passing', 'dribbling', 'defending', 'physic',
       'attacking_crossing', 'attacking_finishing',
       'attacking_heading_accuracy', 'attacking_short_passing',
       'attacking_volleys', 'skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_acceleration', 'movement_sprint_speed', 'movement_agility',
       'movement_reactions', 'movement_balance', 'power_shot_power',
       'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots',
       'mentality_aggression',

In [6]:
import pandas as pd
import numpy as np
attributes = ['pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 'overall', 'Player_Age', 'Player_Height', 'Player_Weight','BMI', 'stamina_score', 'injury_risk', 'Training_Intensity', 'Recovery_Time']


# Function to create balanced teams with proper player composition
team_composition = {
    'GK': 1,  # Goalkeeper
    'CB': 2,  # Center Backs
    'LB': 1,  # Left Back
    'RB': 1,  # Right Back
    'CDM': 1, # Central Defensive Midfielder
    'CM': 2,  # Central Midfielders
    'CAM': 1, # Central Attacking Midfielder
    'LW': 1,  # Left Winger
    'RW': 1,  # Right Winger
    'ST': 1   # Striker
}

def create_balanced_teams(df):
    team_a_players = pd.DataFrame()
    team_b_players = pd.DataFrame()

    # Make a copy of the DataFrame to keep track of remaining available players
    available_players = df.copy()

    for position, num_players in team_composition.items():
        # Filter available players by position
        players_in_position = available_players[available_players['player_positions'].apply(lambda x: position in x)]

        if len(players_in_position) < 2 * num_players:
            raise ValueError(f"Not enough players available for position: {position}")
        
        # Randomly select 'num_players' players for Team A based on 'sofifa_id'
        team_a_position_players = players_in_position.sample(num_players, random_state=1).reset_index(drop=True)

        # Remove the selected Team A players from the available pool using 'sofifa_id'
        available_players = available_players[~available_players['sofifa_id'].isin(team_a_position_players['sofifa_id'])]

        # Filter remaining players for Team B
        players_in_position_for_b = available_players[available_players['player_positions'].apply(lambda x: position in x)]
        team_b_position_players = players_in_position_for_b.sample(num_players, random_state=1).reset_index(drop=True)

        # Append players to Team A and Team B
        team_a_players = pd.concat([team_a_players, team_a_position_players], ignore_index=True)
        team_b_players = pd.concat([team_b_players, team_b_position_players], ignore_index=True)

    return team_a_players, team_b_players

# Example function to compare two players based on their statistics
def compare_players(player_a, player_b, player_position):
    # Define stats based on position categories
    position_stats = {
    'GK': ['goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_reflexes', 'goalkeeping_positioning'],
    'DF': ['defending_marking_awareness', 'defending_standing_tackle', 'mentality_interceptions', 'physic'],
    'MF': ['passing', 'dribbling', 'attacking_short_passing', 'mentality_vision'],
    'FW': ['shooting', 'dribbling', 'attacking_finishing'],
    'General': ['overall', 'potential', 'work_rate_numeric', 'stamina_score', 'injury_risk']
}


    # Select relevant stats based on player position
    if player_position == 'GK':
        stats_to_compare = position_stats['GK'] + position_stats['General']
    elif player_position in ['DF', 'LB', 'RB', 'CB']:
        stats_to_compare = position_stats['DF'] + position_stats['General']
    elif player_position in ['MF', 'CM', 'CDM', 'CAM']:
        stats_to_compare = position_stats['MF'] + position_stats['General']
    elif player_position in ['FW', 'ST', 'LW', 'RW']:
        stats_to_compare = position_stats['FW'] + position_stats['General']
    else:
        # In case of an unrecognized position, use general stats only
        stats_to_compare = position_stats['General']
    
    # Initialize comparison scores
    player_a_score = 0
    player_b_score = 0
    
    # Compare each stat
    for stat in stats_to_compare:
        if player_a[stat] > player_b[stat]:
            player_a_score += 1
        elif player_b[stat] > player_a[stat]:
            player_b_score += 1

    # Return the result for the position
    if player_a_score > player_b_score:
        return 1  # Player A wins
    elif player_b_score > player_a_score:
        return -1  # Player B wins
    else:
        return 0  # Draw


# Function to evaluate team performance based on player comparison
def evaluate_team_performance(team_a, team_b):
    total_score = 0

    # Loop through each position and compare corresponding players
    for i in range(11):
        player_a = team_a.iloc[i]
        player_b = team_b.iloc[i]
        position = player_a['player_positions']  # Assuming both teams have players in the same order of positions

        # Compare the players for the given position
        result = compare_players(player_a, player_b, position)

        # Add the result to the total score
        total_score += result

    # Determine the winning team based on the total score
    if total_score > 0:
        return "Team A wins"  # Team A wins if score is positive
    elif total_score < 0:
        return "Team B wins"  # Team B wins if score is negative
    else:
        return "It's a draw"  # Draw if the score is zero

def aggregate_team_stats(team_players):
    return team_players[attributes].mean()
# Main function to generate match data
def generate_match_data(df, num_matches=3000):
    match_data = []
    match_labels = []
    unique_team_compositions = set()  # Store unique team compositions
    
    while len(match_data) < num_matches:
        # Create balanced teams based on positions
        team_a_players, team_b_players = create_balanced_teams(df)
        
        # Sort `sofifa_id`s to ensure that Team A and Team B are uniquely identified
        team_a_ids = tuple(sorted(team_a_players['sofifa_id'].tolist()))
        team_b_ids = tuple(sorted(team_b_players['sofifa_id'].tolist()))
        
        # Create a unique identifier for the match by combining Team A and Team B's IDs
        match_identifier = (team_a_ids, team_b_ids)
        
        # Check if the team composition has been seen before
        if match_identifier in unique_team_compositions:
            continue  # Skip and regenerate new teams if the composition is repeated
        
        # If it's a new combination, add it to the set
        unique_team_compositions.add(match_identifier)
        
        # Aggregate key player statistics for both teams
        team_a_stats = aggregate_team_stats(team_a_players).add_suffix('_a')
        team_b_stats = aggregate_team_stats(team_b_players).add_suffix('_b')
        
        # Combine both teams' stats into a single row for match features
        match_features = pd.concat([team_a_stats, team_b_stats], axis=0)
        
        # Predict the winner based on player composition, balance, and player-to-player mapping
        winner = evaluate_team_performance(team_a_players, team_b_players)
        
        # Append to match data and labels
        match_data.append(match_features)
        match_labels.append(winner)
    
    return pd.DataFrame(match_data), match_labels


def generate_mvp_data(df):
    mvp_data = []
    mvp_labels = []
    
    # Attributes considered for MVP selection
    mvp_attributes = ['overall', 'shooting', 'passing', 'dribbling', 'attacking_finishing', 
                      'skill_dribbling', 'physic', 'stamina_score', 'Training_Intensity']
    
    # Verify which columns exist in the DataFrame
    available_attributes = [attr for attr in mvp_attributes if attr in df.columns]
    missing_attributes = [attr for attr in mvp_attributes if attr not in df.columns]

    # Notify if some required attributes are missing
    if missing_attributes:
        print(f"Warning: The following attributes are missing in the DataFrame: {missing_attributes}")
    
    # Iterate over the matches/players to gather stats and identify MVP
    for _, match in df.iterrows():
        # Extract the stats based on available attributes
        players_stats = match[available_attributes]
        
        # Normalize the stats (optional step based on importance of attributes)
        players_stats_normalized = (players_stats - players_stats.min()) / (players_stats.max() - players_stats.min())
        
        # Calculate a score based on normalized stats
        score = players_stats_normalized.mean()
        
        # MVP selection: The player with the highest score becomes MVP
        mvp = np.argmax(score)
        
        mvp_data.append(players_stats.tolist())  # Append the player stats
        mvp_labels.append(mvp)  # Append the calculated MVP label

    # Return a DataFrame of player stats and a list of MVP labels
    return pd.DataFrame(mvp_data, columns=available_attributes), mvp_labels

# Define offensive and defensive stats for goal prediction
offensive_stats = ['shooting', 'attacking_finishing', 'passing', 'dribbling']
defensive_stats = ['defending', 'physic', 'goalkeeping_diving', 'goalkeeping_positioning', 'goalkeeping_reflexes']

def goal_probability(offensive_score, defensive_score):
    # Example formula: Use the difference between offensive and defensive score to predict goals
    # This can be adjusted with weights or more complex models
    score_diff = offensive_score - defensive_score
    goal_prob = max(0, score_diff / 50)  # Normalize score difference to a goal probability range
    return np.clip(np.round(goal_prob), 0, 4)  # Max 4 goals per match for simplicity

def generate_goal_data(df, num_matches=3000):
    match_data = []
    goal_labels = []
    
    for _ in range(num_matches):
        # Randomly select 11 players for Team A and Team B
        team_a_players, team_b_players = create_balanced_teams(df)
        
        # Aggregate key player statistics for both teams
        team_a_offensive_stats = team_a_players[offensive_stats].mean().sum()  # Aggregate offensive stats for Team A
        team_a_defensive_stats = team_a_players[defensive_stats].mean().sum()  # Aggregate defensive stats for Team A
        
        team_b_offensive_stats = team_b_players[offensive_stats].mean().sum()  # Aggregate offensive stats for Team B
        team_b_defensive_stats = team_b_players[defensive_stats].mean().sum()  # Aggregate defensive stats for Team B
        
        # Predict goals based on offensive and defensive stats
        goals_team_a = goal_probability(team_a_offensive_stats, team_b_defensive_stats)
        goals_team_b = goal_probability(team_b_offensive_stats, team_a_defensive_stats)
        
        # Prepare feature set for the match (team stats combined)
        team_a_stats = aggregate_team_stats(team_a_players).add_suffix('_a')
        team_b_stats = aggregate_team_stats(team_b_players).add_suffix('_b')
        match_features = pd.concat([team_a_stats, team_b_stats], axis=0)
        
        # Append the match data and the goal labels
        match_data.append(match_features)
        goal_labels.append([goals_team_a, goals_team_b])
    
    # Return a DataFrame with match data and corresponding goal labels
    return pd.DataFrame(match_data), goal_labels




In [7]:
import pandas as pd

def generate_match_data(df, num_matches=3000, max_attempts=10000):
    match_data = []
    match_labels = []
    unique_team_compositions = set()  # Store unique team compositions
    attempts = 0  # To track how many times we have attempted to create a unique match
    
    while len(match_data) < num_matches and attempts < max_attempts:
        # Create balanced teams based on positions
        team_a_players, team_b_players = create_balanced_teams(df)
        
        # Sort `sofifa_id`s to ensure that Team A and Team B are uniquely identified
        team_a_ids = tuple(sorted(team_a_players['sofifa_id'].tolist()))
        team_b_ids = tuple(sorted(team_b_players['sofifa_id'].tolist()))
        
        # Create a unique identifier for the match by combining Team A and Team B's IDs
        match_identifier = (team_a_ids, team_b_ids)
        
        # Check if the team composition has been seen before
        if match_identifier in unique_team_compositions:
            attempts += 1  # Increment attempts for trying to create unique teams
            continue  # Skip and regenerate new teams if the composition is repeated
        
        # If it's a new combination, add it to the set
        unique_team_compositions.add(match_identifier)
        
        # Aggregate key player statistics for both teams
        team_a_stats = aggregate_team_stats(team_a_players).add_suffix('_a')
        team_b_stats = aggregate_team_stats(team_b_players).add_suffix('_b')
        
        # Combine both teams' stats into a single row for match features
        match_features = pd.concat([team_a_stats, team_b_stats], axis=0)
        
        # Predict the winner based on player composition, balance, and player-to-player mapping
        winner = evaluate_team_performance(team_a_players, team_b_players)
        
        # Append to match data and labels
        match_data.append(match_features)
        match_labels.append(winner)
        
        # Reset attempts since we successfully created a match
        attempts = 0
    
    if len(match_data) < num_matches:
        print(f"Warning: Only generated {len(match_data)} unique matches after {max_attempts} attempts.")
    
    return pd.DataFrame(match_data), match_labels


In [9]:
import pandas as pd
import numpy as np



# Example usage
# df should be your DataFrame containing player data
# team_a, team_b = create_balanced_teams(df)


In [8]:
# team_a_players, team_b_players = create_balanced_teams(df)
# team_a_stats = aggregate_team_stats(team_a_players).add_suffix('_a')
# team_b_stats = aggregate_team_stats(team_b_players).add_suffix('_b')
# winner = evaluate_team_performance(team_a_players, team_b_players)
X, y = generate_match_data(df)



In [1]:
import joblib

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Key player attributes for model
# Train a Random Forest classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# model=models['AdaBoostClassifier']
model_filename = 'teammodel.pkl'
joblib.dump(model, model_filename)
# Evaluate the model
accuracy = model.score(X_test, y_test)
print(f'Model accuracy: {accuracy:.2f}')

NameError: name 'train_test_split' is not defined

In [65]:

# Assuming df is already loaded
X_mvp, y_mvp = generate_mvp_data(df)

# Split data into training and test sets
X_train_mvp, X_test_mvp, y_train_mvp, y_test_mvp = train_test_split(X_mvp, y_mvp, test_size=0.2, random_state=42)

# Train the MVP prediction model
mvp_model = RandomForestClassifier(n_estimators=10, random_state=42)
mvp_model.fit(X_trai`n_mvp, y_train_mvp)

# Predict MVP from team selection
mvp_prediction = mvp_model.predict(X_test_mvp)
print(f"Predicted MVP: {mvp_prediction}")
# List of attributes used for injury prediction
# injury_attributes = ['age', 'strength', 'power_jumping', 'sprint_speed','stamina']
injury_attributes = ['age', 'power_jumping']
# Train Injury model (assuming historical injury data is available)
def generate_injury_data(df):
    injury_data = []
    injury_labels = []
    
    # Verify which columns exist in the DataFrame
    available_attributes = [attr for attr in injury_attributes if attr in df.columns]
    missing_attributes = [attr for attr in injury_attributes if attr not in df.columns]

    if missing_attributes:
        print(f"Warning: The following attributes are missing in the DataFrame: {missing_attributes}")
    
    # Iterate over players to gather injury risk stats
    for _, player in df.iterrows():
        player_stats = player[available_attributes]  # Only use available attributes
        injury = np.random.choice([0, 1], p=[0.85, 0.15])  # Assume 15% injury likelihood
        
        injury_data.append(player_stats)
        injury_labels.append(injury)

    return pd.DataFrame(injury_data), injury_labels

# Assuming df is already loaded
X_injury, y_injury = generate_injury_data(df)

# Split data into training and test sets
X_train_injury, X_test_injury, y_train_injury, y_test_injury = train_test_split(X_injury, y_injury, test_size=0.2, random_state=42)

# Train the injury prediction model
injury_model = RandomForestClassifier(n_estimators=100, random_state=42)
injury_model.fit(X_train_injury, y_train_injury)

# Predict Injury likelihood
injury_prediction = injury_model.predict(X_test_injury)
print(f"Injury Likelihood Prediction: {injury_prediction}")



# Generate match data with goals
X_goals, y_goals = generate_goal_data(df)

# Split into training and test sets
X_train_goals, X_test_goals, y_train_goals, y_test_goals = train_test_split(X_goals, y_goals, test_size=0.2, random_state=42)

# Train a Random Forest Regressor to predict goals
goal_model = RandomForestRegressor(n_estimators=100, random_state=42)
goal_model.fit(X_train_goals, [goals[0] for goals in y_train_goals])  # Train on goals for Team A (or Team B)

# Assume user has selected 11 players for each team
team_a_players = df[df['sofifa_id'].isin(list(df['sofifa_id'].sample(11)))]  # Replace with actual IDs
team_b_players = df[df['sofifa_id'].isin(list(df['sofifa_id'].sample(11)))]  # Replace with actual IDs

# Aggregate stats for both teams
team_a_stats = aggregate_team_stats(team_a_players).add_suffix('_a')
team_b_stats = aggregate_team_stats(team_b_players).add_suffix('_b')

# Combine both teams' stats into a single row
match_features = pd.concat([team_a_stats, team_b_stats], axis=0).to_frame().T
# Team Dominance Prediction
# Predict which team will dominate
team_dominance = model.predict(match_features)

if team_dominance == 0:
    dominant_team = 'Team A'
    dominant_team_players = team_a_players
    dominant_team_stats = team_a_stats
else:
    dominant_team = 'Team B'
    dominant_team_players = team_b_players
    dominant_team_stats = team_b_stats

print(f"{dominant_team} is predicted to dominate!")

# MVP Prediction for the dominant team
mvp_attributes = ['overall', 'shooting', 'passing', 'dribbling', 'attacking_finishing', 
                      'skill_dribbling', 'physic', 'stamina_score', 'Training_Intensity']
# Ensure that only available MVP attributes are used
available_mvp_attributes = [attr for attr in mvp_attributes if attr in dominant_team_players.columns]

# Check if all necessary attributes are present
missing_mvp_attributes = [attr for attr in mvp_attributes if attr not in dominant_team_players.columns]
if missing_mvp_attributes:
    print(f"Warning: The following MVP attributes are missing for {dominant_team}: {missing_mvp_attributes}")

# Predict MVP only for the dominant team using available attributes
mvp_prediction = mvp_model.predict(dominant_team_players[available_mvp_attributes])
print(f"Predicted MVP from {dominant_team}: {mvp_prediction}")

# Injury Prediction for the dominant team
# Ensure only available injury attributes are used
available_injury_attributes = [attr for attr in injury_attributes if attr in dominant_team_players.columns]

# Check for missing injury attributes
missing_injury_attributes = [attr for attr in injury_attributes if attr not in dominant_team_players.columns]
if missing_injury_attributes:
    print(f"Warning: The following injury attributes are missing for {dominant_team}: {missing_injury_attributes}")

# Predict injury likelihood for players in the dominant team
injury_likelihood = injury_model.predict(dominant_team_players[available_injury_attributes])
print(f"Injury Likelihood for {dominant_team} players: {injury_likelihood}")

# Reshape dominant team stats to match the input format of the model
# Ensure feature names match during prediction
def prepare_team_stats_for_prediction(team_a_players, team_b_players):
    # Aggregate stats for both teams
    team_a_stats = aggregate_team_stats(team_a_players).add_suffix('_a')
    team_b_stats = aggregate_team_stats(team_b_players).add_suffix('_b')
    
    # Combine both teams' stats into a single row
    match_features = pd.concat([team_a_stats, team_b_stats], axis=0).to_frame().T
    
    # Ensure the order and names of columns match those used during training
    expected_columns = X_train.columns  # The feature names used during model training
    match_features = match_features.reindex(columns=expected_columns, fill_value=0)
    
    return match_features

# Use the function before predicting
dominant_team_stats = prepare_team_stats_for_prediction(team_a_players, team_b_players)

# Predict the number of goals for the dominant team
predicted_goals = goal_model.predict(dominant_team_stats)


print(f"Predicted number of goals for {dominant_team}: {round(predicted_goals[0])}")

Predicted MVP: [0 0 0 ... 0 0 0]
Injury Likelihood Prediction: [0 0 0 ... 0 0 0]


In [None]:
%pip install shap
