In [3]:
import pandas as pd
import plotly.express as px

In [18]:
pdf_transfers = pd.read_csv('data/input/transfers.csv')
pdf_transfers["overpayment"] = pdf_transfers["transfer_fee"] - pdf_transfers["market_value_in_eur"]

In [None]:
# Config variable to exclude players with market value or transfer fee equal to 0
exclude_zero_values = True

# Group by year and calculate mean transfer fee and market value
pdf_transfers["year"] = pd.to_datetime(pdf_transfers["transfer_date"]).dt.year
yearly_means = pdf_transfers.groupby("year")[["transfer_fee", "market_value_in_eur"]].mean().reset_index()



if exclude_zero_values:
    yearly_means_filtered = pdf_transfers[
        (pdf_transfers["market_value_in_eur"] > 0) & (pdf_transfers["transfer_fee"] > 0)
    ].groupby("year")[["transfer_fee", "market_value_in_eur"]].mean().reset_index()
else:
    yearly_means_filtered = yearly_means

fig_yearly = px.line(
    yearly_means_filtered,
    x="year",
    y=["transfer_fee", "market_value_in_eur"],
    labels={"value": "EUR", "variable": "Metric"},
    title="Mean Transfer Fee and Market Value per Year"
)
fig_yearly.update_layout(
    xaxis_title="Year",
    yaxis_title="Mean Value (EUR)",
    legend_title="Metric"
)
fig_yearly.show()

In [20]:
pdf_transfers.sort_values("overpayment")

Unnamed: 0,player_id,transfer_date,transfer_season,from_club_id,to_club_id,from_club_name,to_club_name,transfer_fee,market_value_in_eur,player_name,overpayment
4926,342229,2024-07-01,24/25,583,418,Paris SG,Real Madrid,0.0,180000000.0,Kylian Mbappé,-180000000.0
49883,342229,2018-06-30,17/18,583,162,Paris SG,Monaco,0.0,120000000.0,Kylian Mbappé,-120000000.0
2206,401923,2024-09-04,24/25,6195,141,Napoli,Galatasaray,0.0,100000000.0,Victor Osimhen,-100000000.0
40047,80444,2019-08-19,19/20,131,27,Barcelona,Bayern Munich,0.0,90000000.0,Philippe Coutinho,-90000000.0
20978,418560,2022-07-01,22/23,16,281,Bor. Dortmund,Man City,60000000.0,150000000.0,Erling Haaland,-90000000.0
...,...,...,...,...,...,...,...,...,...,...,...
79641,35207,1995-07-01,95/96,42281,11896,PTSV Dort. Yth.,Dortmund Yth.,0.0,,Marco Reus,
79642,22141,1994-07-01,94/95,59178,29463,Castellammare,Sorrento Youth,,,Antonio Mirante,
79643,33829,1994-07-01,94/95,58405,53957,FC Penafiel You,Sporting Yth.,,,José Fonte,
79644,40680,1994-07-01,94/95,47320,47318,Lichtenw. Yth.,Schornbach Yth.,0.0,,Sven Ulreich,


In [19]:
pdf_transfers.sort_values("overpayment" ,ascending=False)

Unnamed: 0,player_id,transfer_date,transfer_season,from_club_id,to_club_id,from_club_name,to_club_name,transfer_fee,market_value_in_eur,player_name,overpayment
51801,288230,2017-08-25,17/18,16,131,Bor. Dortmund,Barcelona,135000000.0,33000000.0,Ousmane Dembélé,102000000.0
16528,648195,2023-01-31,22/23,294,631,Benfica,Chelsea,121000000.0,55000000.0,Enzo Fernández,66000000.0
47851,342229,2018-07-01,18/19,162,583,Monaco,Paris SG,180000000.0,120000000.0,Kylian Mbappé,60000000.0
18559,602105,2022-08-30,22/23,610,985,Ajax,Man Utd,95000000.0,35000000.0,Antony,60000000.0
46468,192279,2018-08-08,18/19,621,631,Athletic,Chelsea,80000000.0,20000000.0,Kepa Arrizabalaga,60000000.0
...,...,...,...,...,...,...,...,...,...,...,...
79641,35207,1995-07-01,95/96,42281,11896,PTSV Dort. Yth.,Dortmund Yth.,0.0,,Marco Reus,
79642,22141,1994-07-01,94/95,59178,29463,Castellammare,Sorrento Youth,,,Antonio Mirante,
79643,33829,1994-07-01,94/95,58405,53957,FC Penafiel You,Sporting Yth.,,,José Fonte,
79644,40680,1994-07-01,94/95,47320,47318,Lichtenw. Yth.,Schornbach Yth.,0.0,,Sven Ulreich,


In [8]:
pdf_transfers.sort_values("transfer_fee" ,ascending=False)

Unnamed: 0,player_id,transfer_date,transfer_season,from_club_id,to_club_id,from_club_name,to_club_name,transfer_fee,market_value_in_eur,player_name
47851,342229,2018-07-01,18/19,162,583,Monaco,Paris SG,180000000.0,120000000.0,Kylian Mbappé
51801,288230,2017-08-25,17/18,16,131,Bor. Dortmund,Barcelona,135000000.0,33000000.0,Ousmane Dembélé
50887,80444,2018-01-08,17/18,31,131,Liverpool,Barcelona,135000000.0,90000000.0,Philippe Coutinho
41214,462250,2019-07-03,19/20,294,13,Benfica,Atlético Madrid,127200000.0,70000000.0,João Félix
16528,648195,2023-01-31,22/23,294,631,Benfica,Chelsea,121000000.0,55000000.0,Enzo Fernández
...,...,...,...,...,...,...,...,...,...,...
79635,35247,1997-07-01,97/98,54530,54529,Vitória SC Yth,Vitória SC U15,,,Vieirinha
79638,33829,1996-07-01,96/97,53957,53956,Sporting Yth.,Sporting Sub-15,,,José Fonte
79639,52480,1996-07-01,96/97,58405,54532,FC Penafiel You,Boavista CJ,,,Rúben Ribeiro
79642,22141,1994-07-01,94/95,59178,29463,Castellammare,Sorrento Youth,,,Antonio Mirante


In [11]:
# Filter transfers where Chelsea is either the buying or selling club
pdf_chelsea = pdf_transfers[
    (pdf_transfers["from_club_name"] == "Chelsea") | (pdf_transfers["to_club_name"] == "Chelsea")
]

pdf_chelsea

Unnamed: 0,player_id,transfer_date,transfer_season,from_club_id,to_club_id,from_club_name,to_club_name,transfer_fee,market_value_in_eur,player_name
1,1138758,2026-07-01,26/27,336,631,Sporting CP,Chelsea,52140000.0,45000000.0,Geovany Quenda
84,670717,2025-07-01,25/26,336,631,Sporting CP,Chelsea,22270000.0,15000000.0,Dário Essugo
302,386047,2025-06-30,24/25,405,631,Aston Villa,Chelsea,0.0,25000000.0,Axel Disasi
331,401173,2025-06-30,24/25,631,985,Chelsea,Man Utd,0.0,30000000.0,Jadon Sancho
383,462250,2025-06-30,24/25,5,631,AC Milan,Chelsea,0.0,25000000.0,João Félix
...,...,...,...,...,...,...,...,...,...,...
75805,52119,2009-12-31,09/10,990,631,Coventry,Chelsea,0.0,,Patrick van Aanholt
75882,74683,2009-08-18,09/10,779,631,MFK Kosice,Chelsea,1750000.0,1500000.0,Nemanja Matic
75891,52119,2009-08-07,09/10,631,990,Chelsea,Coventry,0.0,,Patrick van Aanholt
75996,52119,2009-07-01,09/10,6918,631,Chelsea U18,Chelsea,,,Patrick van Aanholt


In [17]:
from plotly.graph_objects import Figure
from plotly.subplots import make_subplots

# Add a column to indicate if Chelsea is the buying or selling club
pdf_chelsea["chelsea_role"] = pdf_chelsea.apply(
    lambda row: "Bought by Chelsea" if row["to_club_name"] == "Chelsea" else "Sold by Chelsea",
    axis=1
)

fig = px.scatter(
    pdf_chelsea,
    x="transfer_fee",
    y="market_value_in_eur",
    color="chelsea_role",
    hover_data=["player_name", "from_club_name", "to_club_name"],
    title="Chelsea Transfers: Transfer Fee vs Market Value"
)

# Add y = x diagonal line
max_val = max(pdf_chelsea["transfer_fee"].max(), pdf_chelsea["market_value_in_eur"].max())
fig.add_shape(
    type="line",
    x0=0, y0=0, x1=max_val, y1=max_val,
    line=dict(color="gray", dash="dash"),
    name="y = x"
)

fig.update_layout(
    title={
        "text": "Chelsea Transfers: Transfer Fee vs Market Value<br><sup>Points below the diagonal line indicate players bought for more than their market value.</sup>",
        "x": 0.5
    },
    xaxis_title="Transfer Fee (EUR)",
    yaxis_title="Market Value at Transfer (EUR)",
    legend_title="Chelsea Role",
    showlegend=True
)

fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

