In [1]:
import pandas as pd
import requests
import json
from datetime import datetime

In [2]:
player_full_names = {
    "R. Shawcross": "Ryan Shawcross",
    "L. Dunk": "Lewis Dunk",
    "C. Clark": "Ciaran Clark",
    "S. Duffy": "Shane Duffy",
    "F. Lejeune": "Florian Lejeune",
    "W. Hoedt": "Wesley Hoedt",
    "Ahmed Hegazy": "Ahmed Elsayed Ali Elsayed Hegazy",
    "A. Wan-Bissaka": "Aaron Wan-Bissaka",
    "C. Kabasele": "Christian Kabasele",
    "K. Long": "Kevin Long",
    "C. Smalling": "Chris Smalling",
    "A. Williams": "Ashley Williams",
    "M. Rojo": "Marcos Rojo",
    "S. Mustafi": "Shkodran Mustafi",
    "C. Schindler": "Christopher Schindler",
    "M. Keane": "Michael Keane",
    "J. Tomkins": "James Tomkins",
    "A. Ogbonna": "Angelo Ogbonna",
    "P. Jagielka": "Phil Jagielka",
    "J. Kenny": "Jonjoe Kenny",
    "N. Pareja": "Nicolás Pareja",
    "L. Cabrera": "Leandro Cabrera",
    "Bartra": "Marc Bartra",
    "G. Maripán": "Guillermo Maripán",
    "F. Schär": "Fabian Schär",
    "Jesús Vallejo": "Jesús Vallejo",
    "Djené": "Djené Dakonam",
    "Juan Cala": "Juan Cala",
    "Rúben Vezo": "Rúben Vezo",
    "E. Muñoz": "Ezequiel Muñoz",
    "Z. Feddal": "Zouhair Feddal",
    "J. Murillo": "Jeison Murillo",
    "Luisinho": "Luis Carlos Almeida da Cunha",
    "Bóveda": "Eneko Bóveda Altube",
    "M. Dos Santos": "Mauro Dos Santos",
    "Víctor Ruíz": "Víctor Ruíz",
    "Rodrigo Ely": "Rodrigo Ely",
    "C. Lenglet": "Clément Lenglet",
    "S. Kjær": "Simon Kjær",
    "Muniesa": "Marc Muniesa",
    "P. Lienhart": "Philipp Lienhart",
    "A. Hack": "Alexander Hack",
    "L. Balogun": "Leon Balogun",
    "N. Stark": "Niklas Stark",
    "Felipe": "Felipe",
    "D. Maroh": "Dominique Maroh",
    "S. Bell": "Stefan Bell",
    "M. Gulde": "Manuel Gulde",
    "D. Heintz": "Dominik Heintz",
    "B. Hübner": "Benjamin Hübner",
    "T. Hübers": "Timo Hübers",
    "F. Sørensen": "Frederik Sørensen",
    "K. Papadopoulos": "Kyriakos Papadopoulos",
    "S. Sané": "Salif Sané",
    "J. Elez": "Josip Elez",
    "M. Akanji": "Manuel Akanji",
    "J. Vestergaard": "Jannik Vestergaard",
    "J. Bruma": "Jeffrey Bruma",
    "B. Pavard": "Benjamin Pavard",
    "M. Mavraj": "Mërgim Mavraj",
    "F. Helander": "Filip Helander",
    "F. Ceccherini": "Federico Ceccherini",
    "M. Lemos": "Mauricio Lemos",
    "L. Ceppitelli": "Luca Ceppitelli",
    "N. Spolli": "Nicolas Spolli",
    "L. Cabrera": "Leandro Cabrera", # Duplicate key, keeping the first one found
    "D. Dainelli": "Dario Dainelli",
    "J. Andersen": "Joachim Andersen",
    "B. Djimsiti": "Berat Djimsiti",
    "M. Bani": "Mattia Bani",
    "T. Heurtaux": "Thomas Heurtaux",
    "E. Goldaniga": "Edoardo Goldaniga",
    "G. Pezzella": "Germán Pezzella",
    "T. Cionek": "Thiago Cionek",
    "A. Caracciolo": "Antonio Caracciolo",
    "A. Ferrari": "Alessandro Ferrari",
    "T. Letschert": "Timo Letschert",
    "João Miranda": "João Miranda",
    "A. Izzo": "Armando Izzo",
    "E. Zukanović": "Ervin Zukanović",
    "K. Adénon": "Kassimou Adénon",
    "P. Djilobodji": "Papy Djilobodji",
    "D. Da Silva": "Damien Da Silva",
    "Diego Carlos": "Diego Carlos",
    "P. Gouano": "Prince-Désir Gouano",
    "Edgar Ié": "Edgar Ié",
    "B. Dibassy": "Bakaye Dibassy",
    "M. Debuchy": "Mathieu Debuchy",
    "N. Pallois": "Nicolas Pallois",
    "K. Théophile Catherine": "Kévin Théophile-Catherine",
    "C. Jullien": "Christopher Jullien",
    "I. Cissokho": "Issa Cissokho",
    "K. Mangane": "Kader Mangane",
    "N. Subotić": "Neven Subotić",
    "W. Lautoa": "Wesley Lautoa",
    "C. Kerbrat": "Christophe Kerbrat",
    "J. Koundé": "Jules Koundé",
    "M. Niakhaté": "Mouhamadou Niakhaté",
    "V. Selimovic": "Vojislav Selimović",
    "J. Cordoval": "Jimmy Cordoval"
}


In [3]:
len(player_full_names)

99

In [None]:
def get_player_market_value_history(player_name):
    """
    Fetches the market value history for a given player name using the Transfermarkt API.

    Args:
        player_name (str): The name of the player to search for.

    Returns:
        list: A list of dictionaries, where each dictionary represents an entry
              in the player's market value history, or None if the player
              is not found or an error occurs.
    """
    base_url = "https://transfermarkt-api.fly.dev"

    # Step 1: Get player ID
    # Construct the search URL by replacing spaces in the player's name with '%20'
    search_url = f"{base_url}/players/search/{player_name.replace(' ', '%20')}?page_number=1"
    print(f"Searching for player ID for '{player_name}': {search_url}")
    try:
        # Make the HTTP GET request to the search API
        search_response = requests.get(search_url)
        # Raise an HTTPError for bad responses (4xx or 5xx status codes)
        search_response.raise_for_status()
        # Parse the JSON response
        search_data = search_response.json()
    except requests.exceptions.RequestException as e:
        # Handle network-related errors or bad HTTP responses
        print(f"Error fetching player ID for {player_name}: {e}")
        return None
    except json.JSONDecodeError:
        # Handle errors if the response is not valid JSON
        print(f"Error decoding JSON from player ID search for {player_name}.")
        return None

    # Check if any results were returned
    if not search_data or not search_data.get("results"):
        print(f"Player '{player_name}' not found.")
        return None

    # Assuming the first result is the desired player for simplicity
    player_id = search_data["results"][0]["id"]
    player_found_name = search_data["results"][0]["name"]
    print(f"Found player: {player_found_name} with ID: {player_id}")

    # Step 2: Get market value history
    # Construct the market value URL using the obtained player ID
    market_value_url = f"{base_url}/players/{player_id}/market_value"
    print(f"Fetching market value history for '{player_found_name}': {market_value_url}")
    try:
        # Make the HTTP GET request to the market value API
        market_value_response = requests.get(market_value_url)
        # Raise an HTTPError for bad responses
        market_value_response.raise_for_status()
        # Parse the JSON response
        market_value_data = market_value_response.json()
    except requests.exceptions.RequestException as e:
        # Handle network-related errors or bad HTTP responses
        print(f"Error fetching market value for {player_found_name}: {e}")
        return None
    except json.JSONDecodeError:
        # Handle errors if the response is not valid JSON
        print(f"Error decoding JSON from market value API for {player_found_name}.")
        return None

    # Return the market value history list if available, otherwise None
    return market_value_data.get("marketValueHistory")


def get_multiple_players_market_values(player_names):
    """
    Fetches market value histories for a list of players and compiles them
    into a structured dataset.

    Args:
        player_names (list): A list of player names (strings).

    Returns:
        dict: A dictionary where keys are player names and values are their
              market value histories (list of dicts), or None if not found/error.
              Players not found will have a value of None.
    """
    dataset = {}
    for name in player_names:
        print(f"\n--- Processing {name} ---")
        history = get_player_market_value_history(name)
        dataset[name] = history
    return dataset



# Get the dataset
market_value_dataset = get_multiple_players_market_values([i for i in player_full_names.values()])

In [5]:
def analyze_market_value_changes(market_value_dataset, end_of_2017_18_season_date_str="2018-06-30"):
    """
    Analyzes the market value changes for each player in the dataset.
    Specifically, it finds the market value at the end of the 2017/2018 season,
    their age at that time, and the highest value achieved after that date.

    Args:
        market_value_dataset (dict): The dataset of player market value histories.
        end_of_2017_18_season_date_str (str): The cutoff date for the end of the 2017/2018 season (YYYY-MM-DD).

    Returns:
        dict: A dictionary containing analysis results for each player.
              Each player's entry will include:
              - 'value_end_2017_18_season': Market value at or just before the cutoff date.
              - 'age_end_2017_18_season': Player's age at or just before the cutoff date.
              - 'highest_value_after': Highest market value after the cutoff date.
              - 'difference': The difference between the highest_value_after and value_end_2017_18_season.
    """
    analysis_results = {}
    cutoff_date = datetime.strptime(end_of_2017_18_season_date_str, "%Y-%m-%d")

    for player_name, history in market_value_dataset.items():
        value_at_season_end = None
        age_at_season_end = None # New variable for age
        highest_value_after = None

        if not history:
            analysis_results[player_name] = {
                'value_end_2017_18_season': None,
                'age_end_2017_18_season': None, # Initialize new age field
                'highest_value_after': None,
                'difference': None,
                'status': 'No history data'
            }
            continue

        # Find the market value and age at or just before the end of the 2017/2018 season
        # We iterate to find the latest value before or on the cutoff date
        for entry in history:
            entry_date = datetime.strptime(entry["date"], "%Y-%m-%d")
            if entry_date <= cutoff_date:
                value_at_season_end = entry.get("marketValue")
                age_at_season_end = entry.get("age") # Capture age
            else:
                # Once we pass the cutoff date, we can break as history is chronological
                break

        # Find the highest market value after the end of the 2017/2018 season
        current_highest_after = 0
        found_value_after_cutoff = False
        for entry in history:
            entry_date = datetime.strptime(entry["date"], "%Y-%m-%d")
            if entry_date > cutoff_date:
                market_value = entry.get("marketValue")
                if isinstance(market_value, (int, float)):
                    if highest_value_after is None or market_value > highest_value_after:
                        highest_value_after = market_value
                    found_value_after_cutoff = True

        difference = None
        status = 'Analyzed'
        if value_at_season_end is not None and highest_value_after is not None:
            difference = highest_value_after - value_at_season_end
        elif value_at_season_end is None:
            status = 'No value found at end of 2017/18 season'
        elif highest_value_after is None:
            status = 'No highest value found after 2017/18 season'

        analysis_results[player_name] = {
            'value_end_2017_18_season': value_at_season_end,
            'age_end_2017_18_season': age_at_season_end, # Include age in results
            'highest_value_after': highest_value_after,
            'difference': difference,
            'status': status
        }
    return analysis_results

In [6]:
# Perform the analysis
market_value_analysis = analyze_market_value_changes(market_value_dataset)

In [7]:
df_market_value = pd.DataFrame(market_value_analysis).T

In [8]:
def format_value(value):
    if value is None:
        return "N/A"
    if isinstance(value, (int, float)):
        if value >= 1_000_000:
            return f"€{value / 1_000_000:.1f}m"
        elif value >= 1_000:
            return f"€{value / 1_000:.1f}k"
        else:
            return f"€{value}"
    return str(value)

df_market_value['difference_formatted'] = df_market_value['difference'].apply(format_value)

In [9]:
df_market_value.sort_values(by='difference', ascending=False, inplace=True) 

In [10]:
pd.set_option('display.max_rows', None)    

In [22]:
df_market_value[df_market_value['age_end_2017_18_season'] <= 28].head(10)

Unnamed: 0,value_end_2017_18_season,age_end_2017_18_season,highest_value_after,difference,status,difference_formatted,rank_of_DCI_in_league
Jules Koundé,7000000,19,65000000,58000000,Analyzed,€58.0m,17
Diego Carlos,10000000,25,50000000,40000000,Analyzed,€40.0m,4
Aaron Wan-Bissaka,1000000,20,40000000,39000000,Analyzed,€39.0m,8
Clément Lenglet,25000000,22,60000000,35000000,Analyzed,€35.0m,18
Joachim Andersen,1500000,22,35000000,33500000,Analyzed,€33.5m,8
Manuel Akanji,22000000,22,45000000,23000000,Analyzed,€23.0m,16
Berat Djimsiti,1000000,25,22000000,21000000,Analyzed,€21.0m,9
Benjamin Pavard,30000000,22,50000000,20000000,Analyzed,€20.0m,19
Philipp Lienhart,2500000,21,20000000,17500000,Analyzed,€17.5m,1
Lewis Dunk,10000000,26,27000000,17000000,Analyzed,€17.0m,2


In [23]:
df_market_value

Unnamed: 0,value_end_2017_18_season,age_end_2017_18_season,highest_value_after,difference,status,difference_formatted,rank_of_DCI_in_league
Jules Koundé,7000000.0,19.0,65000000.0,58000000.0,Analyzed,€58.0m,17
Diego Carlos,10000000.0,25.0,50000000.0,40000000.0,Analyzed,€40.0m,4
Aaron Wan-Bissaka,1000000.0,20.0,40000000.0,39000000.0,Analyzed,€39.0m,8
Clément Lenglet,25000000.0,22.0,60000000.0,35000000.0,Analyzed,€35.0m,18
Joachim Andersen,1500000.0,22.0,35000000.0,33500000.0,Analyzed,€33.5m,8
Manuel Akanji,22000000.0,22.0,45000000.0,23000000.0,Analyzed,€23.0m,16
Berat Djimsiti,1000000.0,25.0,22000000.0,21000000.0,Analyzed,€21.0m,9
Benjamin Pavard,30000000.0,22.0,50000000.0,20000000.0,Analyzed,€20.0m,19
Philipp Lienhart,2500000.0,21.0,20000000.0,17500000.0,Analyzed,€17.5m,1
Lewis Dunk,10000000.0,26.0,27000000.0,17000000.0,Analyzed,€17.0m,2


In [12]:
countries = ['England', 'Spain', 'Germany', 'Italy', 'France']
rank_of_DCI_in_league = []


for country in countries:
    country_df = pd.read_csv(f'defensive_summaries/{country}_defensive_summary.csv')
    # Function to decode JSON Unicode escape sequences like \u00e9 to é
    def fix_unicode_escape(s):
        try:
            # Decode Unicode escape sequences (like \u00e9 -> é)
            return s.encode('utf-8').decode('unicode_escape')
        except (UnicodeEncodeError, UnicodeDecodeError):
            # If something goes wrong, return the string as is
            return s

    # Apply the fix to the 'name' list
    country_df['name'] = [
        fix_unicode_escape(x) if isinstance(x, str) else x for x in country_df['name']
    ]

    country_df['team'] = [
        fix_unicode_escape(x) if isinstance(x, str) else x for x in country_df['team']
    ]
    final_df = country_df[country_df['total_time'] > 200].sort_values(by='new_DCI_per_90', ascending=False).head(20).reset_index(drop=True)

    #display(final_df)
    display(final_df[['team','name']])

    for row in final_df.iterrows():
        rank_of_DCI_in_league.append({
            'name': player_full_names[row[1]['name']],
            'rank': row[0] + 1
        })

Unnamed: 0,team,name
0,Stoke City,R. Shawcross
1,Brighton & Hove Albion,L. Dunk
2,Newcastle United,C. Clark
3,Brighton & Hove Albion,S. Duffy
4,Newcastle United,F. Lejeune
5,Southampton,W. Hoedt
6,West Bromwich Albion,Ahmed Hegazy
7,Crystal Palace,A. Wan-Bissaka
8,Watford,C. Kabasele
9,Burnley,K. Long


Unnamed: 0,team,name
0,Sevilla,N. Pareja
1,Getafe,L. Cabrera
2,Real Betis,Bartra
3,Deportivo Alavés,G. Maripán
4,Deportivo La Coruña,F. Schär
5,Real Madrid,Jesús Vallejo
6,Getafe,Djené
7,Getafe,Juan Cala
8,Valencia,Rúben Vezo
9,Leganés,E. Muñoz


Unnamed: 0,team,name
0,Freiburg,P. Lienhart
1,Mainz 05,A. Hack
2,Mainz 05,L. Balogun
3,Hertha BSC,N. Stark
4,Hannover 96,Felipe
5,Köln,D. Maroh
6,Mainz 05,S. Bell
7,Freiburg,M. Gulde
8,Köln,D. Heintz
9,Hoffenheim,B. Hübner


Unnamed: 0,team,name
0,Bologna,F. Helander
1,Crotone,F. Ceccherini
2,Sassuolo,M. Lemos
3,Cagliari,L. Ceppitelli
4,Genoa,N. Spolli
5,Crotone,L. Cabrera
6,Chievo,D. Dainelli
7,Sampdoria,J. Andersen
8,Benevento,B. Djimsiti
9,Chievo,M. Bani


Unnamed: 0,team,name
0,Amiens SC,K. Adénon
1,Dijon,P. Djilobodji
2,Caen,D. Da Silva
3,Nantes,Diego Carlos
4,Amiens SC,P. Gouano
5,Lille,Edgar Ié
6,Amiens SC,B. Dibassy
7,Saint-Étienne,M. Debuchy
8,Nantes,N. Pallois
9,Saint-Étienne,K. Théophile Catherine


In [13]:
df_market_value['rank_of_DCI_in_league'] = None

In [14]:
# add rank of DCI in league to df_market_value
for row in rank_of_DCI_in_league:
    player_name = row['name']
    rank = row['rank']
    if player_name in df_market_value.index:
        df_market_value.at[player_name, 'rank_of_DCI_in_league'] = rank

In [15]:
under_28 = df_market_value[df_market_value['age_end_2017_18_season'] <= 28]

In [16]:
print(f'Average expected profit from a randomly selected player younger than 29: {format_value(under_28["difference"].mean())}')

print(f'Chance of making a profit from a randomly selected player younger than 29: {sum(under_28["difference"] > 0) / len(under_28):.2%}')

print(f'Chance of at least breaking even (not losing value) from a randomly selected player younger than 29: {sum(under_28["difference"] >= 0) / len(under_28):.2%}')

Average expected profit from a randomly selected player younger than 29: €7.3m
Chance of making a profit from a randomly selected player younger than 29: 65.08%
Chance of at least breaking even (not losing value) from a randomly selected player younger than 29: 87.30%
