In [1]:
#imports
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, r2_score, make_scorer
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
import numpy as np
import joblib
import matplotlib.pyplot as plt
from scipy.optimize import linprog
import time
from datetime import datetime

2024-10-17 13:34:04.387462: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Load and Combine Seasons

1. **Base URL and Seasons**: It defines a base URL to access raw data from a GitHub repository and specifies the seasons of interest (2024-25, 2023-24, 2022-23, 2021-22).

2. **Model Name**: It sets a model name (`MODEL_NAME`) that presumably refers to a pre-trained model file.

3. **Loading Season Data**: The function `load_season_data(season)` performs the following tasks:
   - Initializes a dictionary `data` to store data for various categories (players, player IDs, fixtures, teams, and player history).
   - Defines a dictionary of file names corresponding to each category.
   - Iterates over the file names, reading the CSV files from the specified season's folder using the base URL. 
   - For the "teams" category, it only loads specific columns (`id` and `name`).
   - Adds a new column, `season`, to each dataframe to indicate the season from which the data is sourced.

4. **Loading All Seasons Data**: The function `load_all_seasons_data(seasons)`:
   - Initializes a dictionary `all_data` to hold lists for each data category.
   - Calls `load_season_data(season)` for each specified season to load the data.
   - Appends the data from each season to the respective lists in `all_data`.
   - Finally, concatenates the lists into a single dataframe for each category, ignoring index conflicts.

5. **Combining Data**: After defining the functions, the code calls `load_all_seasons_data(seasons)` to load and combine data from all specified seasons.

6. **Accessing Combined Data**: It stores the combined dataframes for players, fixtures, player IDs, player history, and teams in separate variables for further analysis or processing.

In summary, the section fetches and organizes fantasy football data from multiple seasons into structured dataframes for subsequent use.

In [2]:
base_url = 'https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/'
seasons = ['2024-25', '2023-24', '2022-23', '2021-22']
MODEL_NAME = 'FPL_NeuralNet_20241017.joblib'
SCALER_FILENAME = 'FPL_NeuralNet_scaler.pkl'

def load_season_data(season):
    data = {}
    files = {
        'players': 'players_raw.csv',
        'player_ids': 'player_idlist.csv',
        'fixtures': 'fixtures.csv',
        'teams': 'teams.csv',
        'player_history': 'gws/merged_gw.csv'
    }
    
    for key, file in files.items():
        if key == 'teams':
            data[key] = pd.read_csv(f'{base_url}{season}/{file}', usecols=['id', 'name'])
        else:
            data[key] = pd.read_csv(f'{base_url}{season}/{file}')
        
        data[key]['season'] = season
    
    return data

def load_all_seasons_data(seasons):
    all_data = {key: [] for key in ['players', 'player_ids', 'fixtures', 'teams', 'player_history']}
    
    for season in seasons:
        season_data = load_season_data(season)
        for key in all_data:
            all_data[key].append(season_data[key])
    
    return {key: pd.concat(value, ignore_index=True) for key, value in all_data.items()}

# Load and combine data for all seasons
combined_data = load_all_seasons_data(seasons)

# Access the combined dataframes
players = combined_data['players']
fixtures = combined_data['fixtures']
player_ids = combined_data['player_ids']
player_history = combined_data['player_history']
teams = combined_data['teams']

# Clean Data and Prep for Model

### 1. **Creating Unique IDs**
- **Function `create_unique_ids(player_ids, teams)`**: 
  - Generates unique identifiers for players and teams.
  - Combines player first and second names to create a full name and drops duplicates to create a unique player dataframe (`players_unique`).
  - Creates a unique ID for each player based on their index.
  - Drops duplicates from the teams dataframe and creates a unique ID for each team.

### 2. **Merging Unique IDs**
- **Function `merge_unique_ids(players, player_history, teams, players_unique, teams_unique, fixtures)`**: 
  - Merges unique player IDs into player data and player history based on names.
  - Merges unique team IDs into team data and renames columns to better reflect the data.
  - Merges team IDs into players and player history based on their respective teams for the season.
  - Adds unique team IDs to fixtures and creates a combined `game` identifier for easier analysis.

### 3. **Cleaning Player Data**
- **Function `clean_players(df)`**: 
  - Cleans player data by converting certain columns to numeric types, filling missing values, and dropping unnecessary columns.
  - Renames the full name column for consistency.

### 4. **Cleaning Player History Data**
- **Function `clean_player_history(df)`**: 
  - Cleans player history data by dropping rows without unique player IDs and filling missing values in key columns.
  - Converts columns to appropriate data types and calculates cumulative statistics (points, minutes) and averages for performance metrics.

### 5. **Formatting Columns**
- **Function `format_columns(df)`**: 
  - Converts specific columns to strings or numeric types and rounds numeric values for consistency.

### 6. **Calculating Rolling Averages**
- **Functions `calculate_overall_rolling_averages(df, window=5)` and `calculate_opponent_rolling_difficulty(df, window=5)`**: 
  - Calculate rolling averages for goals scored and conceded for teams over a specified window (default is 5).
  - Determine opponent difficulty by calculating rolling averages of team difficulties.

### 7. **Merging Rolling Averages**
- **Function `merge_rolling_avgs(df, overall_averages, fixture_difficulty)`**: 
  - Merges calculated rolling averages and difficulty metrics into the player history dataframe for comprehensive analysis.

### 8. **Main Processing Function**
- **Function `process_data(player_ids, teams, players, player_history, fixtures)`**: 
  - Integrates all previous functions to clean and process data.
  - Creates unique IDs, merges them, cleans player and player history data, calculates rolling averages, and ensures that required columns are present in the final output.
  - Returns the processed dataframes for players, player history, teams, fixtures, unique IDs, required columns, overall averages, and fixture difficulty.

### 9. **Usage**
- The final comment shows how to call the `process_data` function with the relevant dataframes to perform the entire data processing workflow.

Overall, this section efficiently organizes and prepares the fantasy football data for analysis by generating unique identifiers, cleaning data, calculating performance metrics, and ensuring that all necessary information is readily available.

In [3]:
def create_unique_ids(player_ids, teams):
    # Create unique ID for each player
    player_ids['name'] = player_ids['first_name'] + ' ' + player_ids['second_name']
    players_unique = player_ids.drop(['id', 'season'], axis=1).drop_duplicates()
    players_unique['PlayerUniqueID'] = players_unique.index

    # Create unique ID for each team
    teams_unique = teams.drop(['id', 'season'], axis=1).drop_duplicates().reset_index(drop=True)
    teams_unique['TeamUniqueID'] = teams_unique.index + 1

    return players_unique, teams_unique

def merge_unique_ids(players, player_history, teams, players_unique, teams_unique, fixtures):
    # Merge player unique IDs
    players = players.merge(players_unique, on=['first_name', 'second_name'], how='left')
    player_history = player_history.merge(players_unique, on='name', how='left')

    # Merge team unique IDs
    teams = teams.merge(teams_unique, on='name', how='left')
    teams.rename(columns={'name': 'team_name'}, inplace=True)
    teams_opponent = teams.rename(columns={'team_name': 'team_name_oppo', 'TeamUniqueID': 'TeamUniqueID_oppo'})

    # Merge team IDs to players and player_history
    players = players.merge(teams, left_on=['team', 'season'], right_on=['id', 'season'], how='left')
    player_history = player_history.merge(teams, left_on=['team', 'season'], right_on=['team_name', 'season'], how='left')
    player_history = player_history.merge(teams_opponent, left_on=['opponent_team', 'season'], right_on=['id', 'season'], how='left')
    player_history.drop(['id_x', 'id_y'], axis=1, inplace=True)
    
    #add unique team ids to fixtures
    fixtures = fixtures.merge(teams, left_on=['team_h','season'], right_on=['id','season'], how='left')
    fixtures = fixtures.merge(teams_opponent, left_on=['team_a','season'], right_on=['id','season'], how='left')
    fixtures.drop(['id_x','id_y'],axis = 1,inplace=True)

    #add in column for season and gameweek
    fixtures['game']=fixtures['season'].str.replace('-','') + fixtures['event'].astype(str).str.zfill(2)

    return players, player_history, teams, fixtures

def clean_players(df):
    replace_cols = ['chance_of_playing_next_round', 'chance_of_playing_this_round', 'corners_and_indirect_freekicks_order',
                    'direct_freekicks_order', 'penalties_order', 'clean_sheets_per_90', 'expected_assists',
                    'expected_assists_per_90', 'expected_goal_involvements', 'expected_goal_involvements_per_90',
                    'expected_goals', 'expected_goals_conceded', 'expected_goals_conceded_per_90', 'expected_goals_per_90',
                    'form_rank', 'form_rank_type', 'goals_conceded_per_90', 'now_cost_rank', 'now_cost_rank_type',
                    'points_per_game_rank', 'points_per_game_rank_type', 'saves_per_90', 'selected_rank', 'selected_rank_type',
                    'starts', 'starts_per_90']

    drop_cols = ['corners_and_indirect_freekicks_text', 'direct_freekicks_text', 'news', 'news_added', 'penalties_text',
                 'photo', 'special', 'squad_number']

    df['form'] = df['form'].astype(float)
    df['total_points'] = df['total_points'].astype(float)
    df['minutes'] = df['minutes'].astype(float)
    df['cost'] = df['now_cost'] / 10
    df['name'] = df['first_name'] + ' ' + df['second_name']

    df[replace_cols] = df[replace_cols].fillna(0).replace('None', 0)
    df[replace_cols] = df[replace_cols].apply(pd.to_numeric, errors='coerce')
    df.drop(columns=drop_cols, inplace=True)

    return df

def clean_player_history(df):
    df.dropna(subset=["PlayerUniqueID"], inplace=True)
    
    replace_cols = ['expected_assists', 'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded', 'starts']
    df[replace_cols] = df[replace_cols].fillna(0)

    for col in ['influence', 'creativity', 'threat', 'ict_index']:
        df[col] = df[col].astype(float)

    df['cost'] = df['value'] / 10
    df['game'] = df['season'].str.replace('-', '') + df['GW'].astype(str).str.zfill(2)
    df['was_home'] = df['was_home'].astype(int)

    df = df.sort_values(by=['PlayerUniqueID', 'season', 'GW'])
    df['cumulative_points'] = df.groupby(['PlayerUniqueID', 'season'])['total_points'].cumsum()
    df['cumulative_minutes'] = df.groupby(['PlayerUniqueID', 'season'])['minutes'].cumsum()
    df['ppm'] = (df['cumulative_points'] / df['cumulative_minutes']).fillna(0).replace([np.inf, -np.inf], 0).round(5)
    df['points_per_cost'] = (df['cumulative_points'] / df['cost']).round(5)
    df['rolling_avg_points'] = df.groupby('PlayerUniqueID')['total_points'].rolling(window=5, min_periods=1).mean().reset_index(level=0, drop=True).round(5)

    df['position'] = df['position'].map({'GK': 1, 'DEF': 2, 'MID': 3, 'FWD': 4})
    df.dropna(subset=['position'], inplace=True)

    return df

def format_columns(df):
    strings = ['position', 'PlayerUniqueID', 'TeamUniqueID', 'TeamUniqueID_oppo', 'game']
    nums = ['xP', 'assists', 'clean_sheets', 'creativity', 'expected_assists',
            'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded', 'goals_conceded',
            'goals_scored', 'influence', 'minutes', 'own_goals', 'penalties_missed', 'penalties_saved',
            'red_cards', 'saves', 'selected', 'starts', 'threat', 'transfers_balance', 'cost', 'was_home',
            'yellow_cards', 'cumulative_points', 'cumulative_minutes', 'ppm', 'rolling_avg_points',
            'rolling_avg_goals_scored', 'rolling_avg_goals_conceded', 'rolling_team_difficulty']

    df[strings] = df[strings].astype(str)
    df[nums] = df[nums].round(5)
    
    return df

def calculate_overall_rolling_averages(df, window=5):
    team_data = pd.concat([
        df[['game', 'TeamUniqueID', 'team_h_score', 'team_a_score']].rename(columns={'TeamUniqueID': 'team_id', 'team_h_score': 'goals_scored', 'team_a_score': 'goals_conceded'}),
        df[['game', 'TeamUniqueID_oppo', 'team_a_score', 'team_h_score']].rename(columns={'TeamUniqueID_oppo': 'team_id', 'team_a_score': 'goals_scored', 'team_h_score': 'goals_conceded'})
    ])
    
    team_data = team_data.sort_values(['team_id', 'game'])
    
    team_data['rolling_avg_goals_scored'] = team_data.groupby('team_id')['goals_scored'].rolling(window=window, min_periods=1).mean().reset_index(0, drop=True)
    team_data['rolling_avg_goals_conceded'] = team_data.groupby('team_id')['goals_conceded'].rolling(window=window, min_periods=1).mean().reset_index(0, drop=True)
    
    return team_data

def calculate_opponent_rolling_difficulty(df, window=5):
    team_data_diff = pd.concat([
        df[['game', 'TeamUniqueID', 'team_h_difficulty']].rename(columns={'TeamUniqueID': 'team_id', 'team_h_difficulty': 'team_difficulty'}),
        df[['game', 'TeamUniqueID_oppo', 'team_a_difficulty']].rename(columns={'TeamUniqueID_oppo': 'team_id', 'team_a_difficulty': 'team_difficulty'})
    ])
    
    team_data_diff = team_data_diff.sort_values(['team_id', 'game'])
    team_data_diff['rolling_team_difficulty'] = team_data_diff.groupby('team_id')['team_difficulty'].rolling(window=window, min_periods=1).mean().reset_index(0, drop=True)
    
    return team_data_diff

def merge_rolling_avgs(df, overall_averages, fixture_difficulty):
    df = pd.merge(df, overall_averages[['game', 'team_id', 'rolling_avg_goals_scored', 'rolling_avg_goals_conceded']],
                  left_on=['game', 'TeamUniqueID'], right_on=['game', 'team_id'], how='left')
    df.drop(['team_id'], axis=1, inplace=True)

    df = pd.merge(df, fixture_difficulty[['game', 'team_id', 'team_difficulty', 'rolling_team_difficulty']],
                  left_on=['game', 'TeamUniqueID'], right_on=['game', 'team_id'], how='left')
    df.drop(['team_id'], axis=1, inplace=True)
    
    return df

def process_data(player_ids, teams, players, player_history, fixtures):
    players_unique, teams_unique = create_unique_ids(player_ids, teams)
    players, player_history, teams, fixtures = merge_unique_ids(players, player_history, teams, players_unique, teams_unique, fixtures)
    
    players = clean_players(players)
    player_history = clean_player_history(player_history)
    #clean_player_history function has things in it out of order to the rolling and merge functions
    
    overall_averages = calculate_overall_rolling_averages(fixtures)
    fixture_difficulty = calculate_opponent_rolling_difficulty(fixtures)
    
    player_history = merge_rolling_avgs(player_history, overall_averages, fixture_difficulty)
    
    player_history=format_columns(player_history)
    
    player_history['TeamUniqueID'] = player_history['TeamUniqueID'].astype(str)
    players['TeamUniqueID'] = players['TeamUniqueID'].astype(str)
    
    required_columns = [
        'position', 'xP', 'assists', 'clean_sheets', 'creativity', 'expected_assists',
        'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded', 'goals_conceded', 
        'goals_scored', 'influence', 'minutes', 'own_goals', 'penalties_missed', 'penalties_saved', 
        'red_cards', 'saves', 'starts', 'threat', 'transfers_balance', 'cost', 'was_home',
        'yellow_cards', 'PlayerUniqueID', 'TeamUniqueID', 'TeamUniqueID_oppo', 'cumulative_points',
        'cumulative_minutes', 'ppm', 'rolling_avg_points', 'rolling_avg_goals_scored', 
        'rolling_avg_goals_conceded', 'rolling_team_difficulty', 'game'
    ]
    
    for col in required_columns:
        if col not in player_history.columns:
            player_history[col] = np.nan
    #some of these columns are exported for use later on
    return players, player_history, teams, fixtures, players_unique, required_columns, overall_averages, fixture_difficulty

# Usage
# players, player_history, teams, fixtures = process_data(player_ids, teams, players, player_history, fixtures)

In [4]:
players, player_history, teams, fixtures, players_unique, required_columns, overall_averages, fixture_difficulty = process_data(player_ids, teams, players, player_history, fixtures)

# Run The Model

This code is designed to train and update a machine learning model that predicts player performance across game weeks, specifically for fantasy football, using player history data. Here's a breakdown of what each function does:

1. **Saving and Loading Model/Scaler**:
   - `save_model_and_scaler`: Saves a trained model and a scaler using `joblib`.
   - `load_model_and_scaler`: Loads a previously saved model and scaler. Handles file-not-found errors.

2. **Data Preparation**:
   - `prepare_data`: Prepares the player data by imputing missing numeric values, converting categorical columns, and ensuring columns like 'game' and 'position' are ordered.
   - `add_season_weights`: Adds a weight to each player's data based on how recent the season is, allowing for older seasons to have less influence in the model.

3. **Preprocessing**:
   - `preprocess_data`: Scales the feature data using `StandardScaler` and converts training labels and sample weights to numpy arrays for compatibility with TensorFlow.

4. **Model Creation**:
   - `create_model`: Creates a neural network using Keras with customizable parameters such as learning rate, number of neurons, layers, and dropout. The model is compiled with weighted metrics (`MAE`, `MSE`).

5. **Model Evaluation**:
   - `evaluate_model`: Evaluates the model on test data, calculates `MAE` and `R2`, and returns the results.

6. **Model Training**:
   - `train_nn_model_with_grid_search`: Trains a neural network using grid search over basic parameter settings. Implements early stopping to prevent overfitting.

7. **Dynamic Training Loop**:
   - `dynamic_model_training_with_updates`: Automates the process of iteratively training the model with new game data. It loads the latest model, preprocesses the data, trains or updates the model, and evaluates predictions for each game week. The model and scaler are saved after each game week.

8. **Model Update**:
   - `update_model`: Retrains the model on new data, ensuring the features are scaled correctly and optionally using sample weights.

9. **Usage**:
   - The main part of the script defines features, target, and current season games. It calls `dynamic_model_training_with_updates` to train the model on player history data across multiple game weeks and make predictions.

### Overall Workflow:
The section prepares player data, trains or updates a neural network for game outcomes, evaluates its performance, and saves the model for future use. Each game week is processed in a loop, allowing for dynamic updates to the model based on the most recent data.

In [5]:
def save_model_and_scaler(model, scaler, model_name, scaler_filename):
    # Save the model
    joblib.dump(model, model_name)
    print(f"Model saved as {model_name}")
    
    # Save the scaler
    joblib.dump(scaler, scaler_filename)
    print(f"Scaler saved as {scaler_filename}")

def load_model_and_scaler(model_name, scaler_filename):
    try:
        # Load the model
        model = joblib.load(model_name)
        print(f"Model loaded from {model_name}")
        
        # Load the scaler
        scaler = joblib.load(scaler_filename)
        print(f"Scaler loaded from {scaler_filename}")
        
        return model, scaler
    except FileNotFoundError as e:
        print(f"File not found: {str(e)}")
        return None, None

def prepare_data(player_history, features, target):
    #print(f"Shape of player_history before prepare_data: {player_history.shape}")
    
    # Identify numeric and categorical features
    numeric_features = player_history[features].select_dtypes(include=[np.number]).columns.tolist()
    categorical_features = player_history[features].select_dtypes(include=[object]).columns.tolist()

    # Impute numeric features
    numeric_imputer = SimpleImputer(strategy='mean')
    player_history[numeric_features] = numeric_imputer.fit_transform(player_history[numeric_features])

    # Convert object columns to 'category' dtype
    for col in categorical_features:
        player_history[col] = player_history[col].astype('category')
        
    # Ensure 'game' column is ordered
    if 'game' in player_history.columns:
        player_history['game'] = pd.Categorical(player_history['game'], ordered=True)

    # Ensure 'position' column is ordered if applicable
#     if 'position' in player_history.columns:
#         player_history['position'] = pd.Categorical(player_history['position'], 
#                                                     categories=['GK', 'DEF', 'MID', 'FWD'], 
#                                                     ordered=True)
    
    #print(f"Shape of player_history after prepare_data: {player_history.shape}")
    return player_history

def add_season_weights(player_history):
    #print(f"Shape of player_history before add_season_weights: {player_history.shape}")
    
    # Extract the year from the 'season' column
    player_history['season'] = player_history['season'].str.split('-').str[0]

    # Convert 'season' to numeric
    player_history['season'] = pd.to_numeric(player_history['season'], errors='coerce')
    
    # Drop any rows where 'season' is NaN
    player_history = player_history.dropna(subset=['season'])
    
    current_season = player_history['season'].max()
    min_season = player_history['season'].min()
    
    player_history['season_weight'] = (player_history['season'] - min_season + 1) / (current_season - min_season + 1)
    
    #print(f"Shape of player_history after add_season_weights: {player_history.shape}")
    return player_history

def preprocess_data(X_train, y_train, sample_weights=None):
    """
    Preprocess the data to ensure compatibility with TensorFlow.
    """
    # Convert y_train and sample_weights to numpy arrays
    y_train = np.array(y_train, dtype=np.float32)
    
    if sample_weights is not None:
        sample_weights = np.array(sample_weights, dtype=np.float32)
    
    # Scale the features using the DataFrame directly to keep column names
    scaler = StandardScaler()
    X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
    
    return X_train_scaled, y_train, sample_weights, scaler

def create_model(input_dim, learning_rate=0.001, neurons=64, layers=2, dropout_rate=0.2, activation='relu'):
    """Create and compile the neural network model with weighted metrics"""
    model = Sequential()
    
    # Input layer
    model.add(Dense(neurons, input_dim=input_dim, activation=activation))
    model.add(Dropout(dropout_rate))
    
    # Hidden layers
    for _ in range(layers - 1):
        model.add(Dense(neurons, activation=activation))
        model.add(Dropout(dropout_rate))
    
    # Output layer
    model.add(Dense(1))
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
    # Compile with weighted metrics explicitly specified
    model.compile(
        optimizer=optimizer,
        loss='mse',
        metrics=['mae', 'mse'],  # Regular metrics
        weighted_metrics=['mae', 'mse']  # Specify which metrics should be weighted
    )
    
    return model

def evaluate_model(model, X_test, y_test, sample_weights=None):
    """Evaluate model performance using weighted metrics"""
    if sample_weights is not None:
        evaluation = model.evaluate(
            X_test, 
            y_test, 
            sample_weight=sample_weights,
            verbose=0
        )
    else:
        evaluation = model.evaluate(
            X_test, 
            y_test, 
            verbose=0
        )
    
    # Get predictions for R2 score calculation
    predictions = model.predict(X_test, verbose=0)
    r2 = r2_score(y_test, predictions)
    
    # The first value is the loss, second is MAE, third is weighted MAE
    mae = evaluation[1]  # Use the unweighted MAE
    
    return mae, r2

def train_nn_model_with_grid_search(X_train, y_train, sample_weights=None):
    """
    Train neural network model with basic parameter settings
    """
    # Ensure all inputs are float32
    X_train_scaled = np.array(X_train, dtype=np.float32)
    y_train_processed = np.array(y_train, dtype=np.float32)
    
    if sample_weights is not None:
        sample_weights = np.array(sample_weights, dtype=np.float32)
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_scaled)
    
    # Model parameters
    params = {
        'learning_rate': 0.001,
        'neurons': 64,
        'layers': 2,
        'dropout_rate': 0.2,
        'activation': 'relu'
    }
    
    # Create model
    model = create_model(
        input_dim=X_train_scaled.shape[1],
        **params
    )
    
    # Early stopping callback
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )
    
    try:
        # Train the model
        if sample_weights is not None:
            history = model.fit(
                X_train_scaled,
                y_train_processed,
                sample_weight=sample_weights,
                validation_split=0.2,
                epochs=50,
                batch_size=32,
                callbacks=[early_stopping],
                verbose=1
            )
        else:
            history = model.fit(
                X_train_scaled,
                y_train_processed,
                validation_split=0.2,
                epochs=50,
                batch_size=32,
                callbacks=[early_stopping],
                verbose=1
            )
        
        return model, scaler
        
    except Exception as e:
        print(f"Error during training: {str(e)}")
        raise
        
def dynamic_model_training_with_updates(player_history, features, target, current_season_games, model_name, scaler_filename):
    player_history = prepare_data(player_history, features, target)
    player_history = add_season_weights(player_history)
    
    # Load the latest model and scaler
    latest_model, scaler = load_model_and_scaler(model_name, scaler_filename)
    
    all_predictions = []
    maes = []
    r2s = []
    
    # Loop through game weeks
    for game in current_season_games:
        print(f"\nProcessing game week: {game}")
        start_time = time.time()
        
        train_data = player_history[player_history['game'] < game]
        test_data = player_history[player_history['game'] == game]
        
        X_train = train_data[features]
        y_train = train_data[target]
        X_test = test_data[features]
        y_test = test_data[target]
        
        sample_weights = train_data['season_weight'].values.astype(np.float32)
        
        # Train the model if no previous model exists
        if latest_model is None:
            print("Training new model...")
            model, scaler = train_nn_model_with_grid_search(X_train, y_train, sample_weights)
            latest_model = model
        else:
            model = update_model(latest_model, X_train, y_train, X_test, y_test, scaler, sample_weights)
            latest_model = model
        
        # Scale the test data
        X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)
        
        # Make predictions
        predictions = model.predict(X_test_scaled, verbose=0)
        
        # Evaluate performance
        mae = mean_absolute_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        
        maes.append(mae)
        r2s.append(r2)
        all_predictions.extend(predictions)
        
        player_history.loc[player_history['game'] == game, 'predicted_points'] = predictions.flatten()
        
        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"Training time: {int(elapsed_time // 60)} minutes and {elapsed_time % 60:.2f} seconds")
        print(f"MAE: {mae:.4f}, R2: {r2:.4f}")
        
        # Save the updated model and scaler
        save_model_and_scaler(latest_model, scaler, model_name, scaler_filename)
    
    overall_mae = np.mean(maes)
    overall_r2 = np.mean(r2s)
    
    print(f"\nOverall MAE: {overall_mae:.4f}")
    print(f"Overall R2: {overall_r2:.4f}")
    
    return all_predictions, overall_mae, overall_r2, latest_model

def update_model(model, X_train, y_train, X_test, y_test, scaler, sample_weights=None):
    """Update existing model with new data"""
    if scaler is not None:
        X_train_scaled = scaler.transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    else:
        X_train_scaled = X_train
        X_test_scaled = X_test
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )
    
    try:
        if sample_weights is not None:
            model.fit(
                X_train_scaled,
                y_train,
                sample_weight=sample_weights,
                validation_data=(X_test_scaled, y_test),
                epochs=50,
                batch_size=32,
                callbacks=[early_stopping],
                verbose=1
            )
        else:
            model.fit(
                X_train_scaled,
                y_train,
                validation_data=(X_test_scaled, y_test),
                epochs=50,
                batch_size=32,
                callbacks=[early_stopping],
                verbose=1
            )
        
        return model
        
    except Exception as e:
        print(f"Error during model update: {str(e)}")
        raise

In [6]:
# Usage

#Start time
print("Start time:", datetime.now().strftime("%m-%d-%Y %H:%M:%S"))
#------

features = [
    'position', 'xP', 'assists', 'clean_sheets', 'creativity', 'expected_assists',
    'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded', 'goals_conceded', 
    'goals_scored', 'influence', 'minutes', 'own_goals', 'penalties_missed', 'penalties_saved', 
    'red_cards', 'saves', 'starts', 'threat', 'transfers_balance', 'cost', 'was_home',
    'yellow_cards', 'PlayerUniqueID', 'TeamUniqueID', 'TeamUniqueID_oppo', 'cumulative_points',
    'cumulative_minutes', 'ppm', 'rolling_avg_points', 'rolling_avg_goals_scored', 
    'rolling_avg_goals_conceded', 'rolling_team_difficulty', 'game'
]
target = 'total_points'
current_season_games = ['20242501', '20242502', '20242503', '20242504', '20242505', '20242506', '20242507']

predictions, overall_mae, overall_r2, final_model = dynamic_model_training_with_updates(
    player_history, 
    features, 
    target, 
    current_season_games,
    MODEL_NAME,
    SCALER_FILENAME
)

# To load the model later
#loaded_model = load_model(MODEL_NAME)

Start time: 10-17-2024 13:34:11
File not found: [Errno 2] No such file or directory: 'FPL_NeuralNet_20241017.joblib'

Processing game week: 20242501
Training new model...


2024-10-17 13:34:12.114197: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Training time: 2 minutes and 13.68 seconds
MAE: 0.1194, R2: 0.9795




INFO:tensorflow:Assets written to: ram://c1a4d031-9fd3-4954-adcf-ec157a2a662b/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Processing game week: 20242502




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Training time: 0 minutes and 45.89 seconds
MAE: 0.1612, R2: 0.9862




INFO:tensorflow:Assets written to: ram://df2ff220-2014-45ed-8f3b-75c05c407eae/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Processing game week: 20242503




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Training time: 1 minutes and 25.32 seconds
MAE: 0.1018, R2: 0.9870




INFO:tensorflow:Assets written to: ram://81e7aa02-c2c2-4c7f-a900-46f566de7542/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Processing game week: 20242504




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Training time: 1 minutes and 43.90 seconds
MAE: 0.1202, R2: 0.9849




INFO:tensorflow:Assets written to: ram://fe53f07a-405c-4dac-84c7-8e3ec0b140d3/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Processing game week: 20242505




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Training time: 1 minutes and 26.95 seconds
MAE: 0.1149, R2: 0.9815




INFO:tensorflow:Assets written to: ram://8c0e716c-cad4-42f2-9651-2eba998ae2ed/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Processing game week: 20242506




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Training time: 0 minutes and 54.24 seconds
MAE: 0.1149, R2: 0.9851




INFO:tensorflow:Assets written to: ram://f32d9343-e0c3-4532-98c3-962d00f0fd02/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Processing game week: 20242507




Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Training time: 0 minutes and 51.26 seconds
MAE: 0.1289, R2: 0.9772




INFO:tensorflow:Assets written to: ram://66ea1d7b-bfec-445c-a9f2-17d2406791dc/assets
Model saved as FPL_NeuralNet_20241017.joblib
Scaler saved as FPL_NeuralNet_scaler.pkl

Overall MAE: 0.1231
Overall R2: 0.9831


# Load Current Gameweek Data and run model


1. **`load_and_preprocess_data()`**: 
   - Loads raw player data from a CSV file.
   - Selects and renames columns, adds game week information, and calculates metrics such as points per minute (`ppm`) and average minutes played per game.
   - Outputs the processed player data for the upcoming game week.

2. **`load_and_process_fixtures()`**: 
   - Loads fixture data for the current season.
   - Filters the data to get only the fixtures for the current game week.

3. **`prepare_for_prediction(df, teams, players_unique, required_columns)`**:
   - Merges player data with team data, cleaning up columns.
   - Merges player data with unique player identifiers.
   - Calculates rolling averages for performance metrics and team difficulty ratings.
   - Outputs the prepared data for prediction.

4. **`merge_player_and_fixture_data(next_gw, fixtures)`**:
   - Merges player data with fixture data for both home and away teams.
   - Calculates additional metrics like transfers balance, player cost, and whether the player is playing at home.
   - Outputs the combined player and fixture data.

5. **`calculate_rolling_averages(df, window=5)`**:
   - Calculates rolling averages (over 5 games by default) for team-level statistics like goals scored and conceded.

6. **`calculate_team_difficulty(df)`**:
   - Calculates the average difficulty for each team based on fixture difficulty ratings and maps this back to the main dataframe.

7. **`convert_dtypes_for_model(df, model_type)`**:
   - Converts specific columns (like player and team IDs) into categorical data types, which are needed for model input.

8. **`calculate_advanced_metrics(df)`**:
   - Calculates advanced metrics like expected goals and a probability of scoring based on expected goals per minute.

### Usage Flow:
1. The script loads and preprocesses raw player data using `load_and_preprocess_data()`.
2. Fixture data for the current game week is loaded with `load_and_process_fixtures()`.
3. Player data is merged with fixture data using `merge_player_and_fixture_data()`.
4. The merged data is prepared for model input with `prepare_for_prediction()`, which calculates rolling averages and team difficulty.
5. Advanced metrics are computed with `calculate_advanced_metrics()`.
6. The processed data is then scaled and passed to a trained neural network model, which predicts player points for the upcoming game week.

Finally, the predicted points are normalized and added to the player data for analysis.


This section is structured to prepare and analyze fantasy football data, facilitating predictions based on player and team performance metrics.

In [9]:
def load_and_preprocess_data():
    # Load raw player data
    raw_2024_25 = pd.read_csv(RAW_URL)
    
    # Select and rename columns
    columns_to_select = [
        'element_type', 'ep_next', 'assists', 'bonus', 'bps', 'clean_sheets', 'creativity',
        'expected_assists', 'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded',
        'goals_conceded', 'goals_scored', 'influence', 'minutes', 'own_goals', 'penalties_missed',
        'penalties_saved', 'red_cards', 'saves', 'selected_by_percent', 'starts', 'threat',
        'transfers_in', 'transfers_out', 'now_cost', 'yellow_cards', 'total_points',
        'points_per_game', 'team', 'team_code', 'first_name', 'second_name'
    ]
    columns_to_rename = {
        'element_type': 'position', 'ep_next': 'xP', 'now_cost': 'value',
        'minutes': 'cumulative_minutes', 'total_points': 'cumulative_points',
        'points_per_game': 'rolling_avg_points'
    }
    next_gw = raw_2024_25[columns_to_select].rename(columns=columns_to_rename)
    
    # Add game week info and calculate new columns
    next_gw['game'] = GAME_ID
    next_gw['ppm'] = next_gw['cumulative_points'] / next_gw['cumulative_minutes'].replace(0, 1)
    next_gw['minutes'] = next_gw['cumulative_minutes'] / (CURRENT_GW - 1)
    return next_gw

def load_and_process_fixtures():
    fixtures_2024_25 = pd.read_csv(FIXTURES_URL, usecols=['event', 'team_a', 'team_h', 'team_h_difficulty', 'team_a_difficulty'])
    return fixtures_2024_25[fixtures_2024_25['event'] == CURRENT_GW]

def prepare_for_prediction(df, teams, players_unique, required_columns):
    # Merge with team data
    teams_new = teams[teams['season'] == CURRENT_SEASON]
    df = df.merge(teams_new, left_on='team', right_on='id', how='left')
    df = df.merge(teams_new, left_on='team_h', right_on='id', how='left', suffixes=('', '_oppo'))
    
    # Clean up columns
    columns_to_drop = ['id', 'id_oppo', 'team_name', 'team_name_oppo', 'season', 'season_oppo']
    df.drop(columns=columns_to_drop, errors='ignore', inplace=True)
    df.rename(columns={'TeamUniqueID': 'TeamUniqueID', 'TeamUniqueID_oppo': 'TeamUniqueID_oppo'}, inplace=True)
    
    # Merge with player data
    df = df.merge(players_unique, on=['first_name', 'second_name'], how='left')
    
    # Calculate rolling averages
    df = calculate_rolling_averages(df)
    
    # Calculate team difficulty
    df = calculate_team_difficulty(df)
    
    return df

def merge_player_and_fixture_data(next_gw, fixtures):
    # Merge for both home and away teams
    merged_home = pd.merge(next_gw, fixtures, left_on='team', right_on='team_h', how='inner')
    merged_away = pd.merge(next_gw, fixtures, left_on='team', right_on='team_a', how='inner')
    final_merged_df = pd.concat([merged_home, merged_away])
    
    # Additional processing
    final_merged_df['transfers_balance'] = final_merged_df['transfers_in'] - final_merged_df['transfers_out']
    final_merged_df['cost'] = final_merged_df['value'] / 10
    final_merged_df['was_home'] = np.where(final_merged_df['team'] == final_merged_df['team_h'], 1, 0)
    return final_merged_df

def calculate_rolling_averages(df, window=5):
    # Sort the dataframe by team and game
    df = df.sort_values(['team', 'game'])
    
    # Calculate rolling averages for team-level statistics
    df['rolling_avg_goals_scored'] = df.groupby('team')['goals_scored'].transform(lambda x: x.rolling(window, min_periods=1).mean())
    df['rolling_avg_goals_conceded'] = df.groupby('team')['goals_conceded'].transform(lambda x: x.rolling(window, min_periods=1).mean())
    
    return df

def calculate_team_difficulty(df):
    # Calculate average difficulty for each team
    team_difficulty = df.groupby('team')[['team_h_difficulty', 'team_a_difficulty']].mean().mean(axis=1)
    
    # Map team difficulty back to the dataframe
    df['rolling_team_difficulty'] = df['team'].map(team_difficulty)
    
    return df

def convert_dtypes_for_model(df, model_type):
    category_columns = ['position', 'PlayerUniqueID', 'TeamUniqueID', 'TeamUniqueID_oppo', 'game']
    for col in category_columns:
        df[col] = df[col].astype('category')
    return df

def calculate_advanced_metrics(df):
    df['expected_goals'] = (df['expected_goal_involvements'] * 0.6)
    df['expected_assists'] = df['expected_assists']
    df['goal_scoring_probability'] = (df['expected_goals'] / df['minutes'].replace(0, 1)) * 100
    df['goal_scoring_probability'] = df['goal_scoring_probability'].clip(0, 100)
    return df

In [19]:
#Usage

# Constants
RAW_URL = 'https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2024-25/players_raw.csv'
FIXTURES_URL = 'https://raw.githubusercontent.com/vaastav/Fantasy-Premier-League/master/data/2024-25/fixtures.csv'
CURRENT_SEASON = '2024-25'
CURRENT_GW = 8
GAME_ID = f'2024{CURRENT_GW:02d}'

# Define required columns based on your model's features
required_columns = [
    'position', 'xP', 'assists', 'clean_sheets', 'creativity', 'expected_assists',
'expected_goal_involvements', 'expected_goals', 'expected_goals_conceded', 'goals_conceded', 
'goals_scored', 'influence', 'minutes', 'own_goals', 'penalties_missed', 'penalties_saved', 
'red_cards', 'saves', 'starts', 'threat', 'transfers_balance', 'cost', 'was_home',
'yellow_cards', 'PlayerUniqueID', 'TeamUniqueID', 'TeamUniqueID_oppo', 'cumulative_points',
'cumulative_minutes', 'ppm', 'rolling_avg_points', 'rolling_avg_goals_scored', 
'rolling_avg_goals_conceded', 'rolling_team_difficulty', 'game'
]


# Load the trained neural network model and StandardScaler
nn_model = joblib.load(MODEL_NAME)
scaler = joblib.load(SCALER_FILENAME)

# Load and preprocess data
next_gw = load_and_preprocess_data()
fixtures = load_and_process_fixtures()
final_merged_df = merge_player_and_fixture_data(next_gw, fixtures)

upcoming_game_week_data = prepare_for_prediction(final_merged_df, teams, players_unique, required_columns)
upcoming_game_week_data = convert_dtypes_for_model(upcoming_game_week_data, 'neural_net')

upcoming_game_week_data = calculate_advanced_metrics(upcoming_game_week_data)

# Scale the feature data
X_upcoming = upcoming_game_week_data[required_columns]
X_upcoming_scaled = scaler.transform(X_upcoming)

# Make predictions using the neural network model
predictions_upcoming = nn_model.predict(X_upcoming_scaled)

# Add predictions to the dataframe
upcoming_game_week_data['predicted_points'] = predictions_upcoming.flatten()/6
#dividing by 6 right now as it looks more realistic





In [20]:
## NOTE ##
# Advanced Metrics are a work in progress and don't look all too accurate or correct. 

# Select Best Team

1. **Team Selection Optimization**:
   - **`optimize_team_selection(upcoming_game_week_data, budget=100)`**: This function uses linear programming to select an optimal team of players within a specified budget.
     - **Position Constraints**: It defines constraints for player positions (goalkeeper, defenders, midfielders, forwards) and their minimum and maximum counts.
     - **Objective Function**: The goal is to maximize the predicted points while adhering to the budget and positional constraints.
     - The function returns a selection of players (indicated by a binary array) based on the optimization results.

2. **Team Preparation**:
   - **`prepare_selected_team(upcoming_game_week_data, selected_players, teams)`**: This function prepares the final selected team by filtering for the chosen players and merging their data with team information.
     - It maps position codes to their text representations (e.g., GK, DEF) and formats other relevant columns for output.
     - The function returns a DataFrame containing details about the selected players, including their teams, positions, costs, and predicted points.

3. **Main Function**:
   - **`main(upcoming_game_week_data, teams, budget=100)`**: This function coordinates the selection and preparation of the optimized team by calling the previous two functions. It returns the final team lineup.

4. **Execution Block** (commented out): The code includes a section that would execute the `main()` function if the script is run as a standalone program, assuming the required data is already loaded.

Overall, the section is designed to optimize fantasy football team selections based on player performance predictions, budget constraints, and positional requirements.

In [21]:
def optimize_team_selection(upcoming_game_week_data, budget=100):
    # Constants and mappings
    POSITION_MAPPING = {1: 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'}
    POSITION_COUNTS = {
        'GK': (1, 2),
        'DEF': (3, 5),
        'MID': (3, 5),
        'FWD': (1, 3)
    }

    # Prepare data
    player_costs = upcoming_game_week_data['cost'].values
    player_points = upcoming_game_week_data['predicted_points'].values
    player_positions = upcoming_game_week_data['position'].values
    num_players = len(player_costs)

    # Objective function: maximize points (minimize negative points)
    c = -player_points

    # Constraints
    A = [player_costs]  # Budget constraint
    b = [budget]

    # Position constraints
    for pos, (min_count, max_count) in POSITION_COUNTS.items():
        pos_indicator = (player_positions == [k for k, v in POSITION_MAPPING.items() if v == pos][0]).astype(int)
        A.extend([pos_indicator, -pos_indicator])
        b.extend([max_count, -min_count])

    A = np.array(A)
    b = np.array(b)

    # Bounds for each player (0 or 1 - either selected or not)
    bounds = [(0, 1) for _ in range(num_players)]

    # Solve the linear programming problem
    result = linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='highs')

    # Get the selected players
    selected_players = result.x.round().astype(int)

    return selected_players

def prepare_selected_team(upcoming_game_week_data, selected_players, teams):
    # Filter for selected players
    selected_team = upcoming_game_week_data[selected_players == 1].copy()

    # Merge with team data
    current_season_teams = teams[teams['season'] == '2024-25']
    selected_team = selected_team.merge(current_season_teams[['TeamUniqueID', 'team_name']], on='TeamUniqueID', how='left')
    selected_team = selected_team.merge(current_season_teams[['TeamUniqueID', 'team_name']], left_on='TeamUniqueID_oppo', right_on='TeamUniqueID', how='left', suffixes=('', '_oppo'))
    
    # Map position codes to text
    position_mapping = {1: 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'}
    selected_team['position_txt'] = selected_team['position'].map(position_mapping)

    # Prepare output
    selected_team['was_home'] = selected_team['was_home'].apply(lambda x: 'H' if x == 1 else 'A')
    columns_to_display = ['team_name', 'name', 'position_txt', 'cost', 'predicted_points', 'team_name_oppo',
                          'was_home','expected_goals', 'expected_assists', 'goal_scoring_probability']

    selected_team = selected_team[columns_to_display].sort_values('predicted_points', ascending=False)

    selected_team.rename(columns={'team_name': 'Team', 'name':'Player', 'position_txt':'Position',
                                 'cost':'Player Cost', 'predicted_points':'Predicted Gameweek Points',
                                 'team_name_oppo':'Opposition Team Name', 'was_home':'Home or Away',
                                 'expected_goals':'Xg', 'expected_assists':'Xa',
                                 'goal_scoring_probability':'Goal Scoring Probability (%)'}, inplace=True)
    return selected_team

def main(upcoming_game_week_data, teams, budget=100):
    selected_players = optimize_team_selection(upcoming_game_week_data, budget)
    optimized_team = prepare_selected_team(upcoming_game_week_data, selected_players, teams)
    return optimized_team

# if __name__ == "__main__":
#     # Assuming upcoming_game_week_data and teams are already loaded
#     optimized_team = main(upcoming_game_week_data, teams)
#     print(optimized_team)

In [22]:
# Usage
optimized_team = main(upcoming_game_week_data, teams)
print(optimized_team)

         Team           Player Position  Player Cost  \
12  Liverpool    Mohamed Salah      MID         12.7   
5     Chelsea      Cole Palmer      MID         10.8   
1     Arsenal      Bukayo Saka      MID         10.1   
11  Liverpool        Luis Díaz      MID          8.1   
0     Arsenal      Kai Havertz      FWD          8.3   
4     Chelsea  Nicolas Jackson      FWD          7.9   
6     Everton    Dwight McNeil      MID          5.6   
3    Brighton    Danny Welbeck      FWD          5.8   
2   Brentford   Nathan Collins      DEF          4.5   
10  Leicester     James Justin      DEF          4.5   
14     Wolves  Rayan Aït-Nouri      DEF          4.4   
7     Ipswich       Leif Davis      DEF          4.5   
13    Man Utd      André Onana       GK          5.0   
8   Leicester        Wout Faes      DEF          4.1   
9   Leicester   Mads Hermansen       GK          4.5   

    Predicted Gameweek Points Opposition Team Name Home or Away     Xg    Xa  \
12                  11.

In [23]:
# Save the selected team to a CSV file
optimized_team.to_csv('selected_team_NN_20242508.csv', index=False)

In [15]:
#Create requirements file
#!pip freeze > requirements.txt