In [13]:
import pandas as pd

year = 2023

df_tornei = pd.read_csv(f"tornei/tornei_{str(year)[-2:]}.csv")
df_iscrizioni = pd.read_csv(f"df_iscrizioni_player_event/df_{year}.csv")

# 1) Liste distinte
players = df_iscrizioni["PlayerCode"].dropna().unique()
events  = df_tornei["EventId"].dropna().unique()

# 2) Griglia completa (tutte le combinazioni player-event)
grid = (
    pd.MultiIndex.from_product([players, events], names=["PlayerCode", "EventId"])
      .to_frame(index=False)
)

# 3) Tieni solo le colonne chiave (una riga = "iscritto")
iscr_key = df_iscrizioni[["PlayerCode", "EventId"]].drop_duplicates()
iscr_key["iscritto"] = 1

# 4) Merge per marcare iscritti / non iscritti
out = grid.merge(iscr_key, on=["PlayerCode", "EventId"], how="left")
out["iscritto"] = out["iscritto"].fillna(0).astype(int)

# 5) Aggiungi info torneo (una sola volta)
out = out.merge(df_tornei, on="EventId", how="left")

display(out)


Unnamed: 0,PlayerCode,EventId,iscritto,year,date_tournament,EventName,EventType,EventCountry,Surface,TotPrizeMoney
0,mv14,328,0,2024,2024-10-21,Basel,500,Switzerland,Hard,2385100
1,mv14,7485,0,2024,2024-10-14,Antwerp,250,Belgium,Hard,690135
2,mv14,329,0,2024,2024-09-23,Tokyo,500,Japan,Hard,1818380
3,mv14,6242,0,2024,2024-08-19,Winston-Salem,250,"NC, U.S.A.",Hard,779780
4,mv14,314,0,2024,2024-07-15,Gstaad,250,Switzerland,Clay,579320
...,...,...,...,...,...,...,...,...,...,...
18355,m0bn,315,0,2024,2024-07-15,Newport,250,"RI, U.S.A.",Grass,661585
18356,m0bn,7694,0,2024,2024-05-20,Lyon,250,France,Clay,579320
18357,m0bn,360,0,2024,2024-04-01,Marrakech,250,Morocco,Clay,579320
18358,m0bn,316,0,2024,2024-07-15,Bastad,250,Sweden,Clay,579320


In [14]:
# aggiunta colonna di same nationality player-event
tornei = pd.read_csv("df_iscrizioni_player_event/df_2025.csv")
players = pd.read_csv("ranking_from_atp/atp_senza_ripetizioni_2025.csv")

#seleziona solo playercode, eventid, eventcountry
tornei = tornei[["PlayerCode", "EventId", "EventCountry"]]
players = players[["Player Id", "Nationality"]]
# df = pd.concat([tornei["PlayerCode"], tornei["EventId"], tornei["EventCountry"], players[""]])

df = tornei.merge(
    players,
    left_on="PlayerCode",
    right_on="Player Id",
    how="left"
)
# opzionale: elimina la colonna duplicata
# df = df.drop(columns=["Player Id"])

geo1 = pd.read_csv("geografia.csv")
geo2 = pd.read_csv("geografia.csv")
#df
df_primo_join = df.merge(geo1,
    left_on="EventCountry",
    right_on="name",
    how="left"
)

#nationality in upper case
df_primo_join["Nationality"] = df_primo_join["Nationality"].str.upper()


df_finale = df_primo_join[df_primo_join["Nationality"] == df_primo_join["alpha-3"]]
df_finale=df_finale[["PlayerCode", "EventId", "EventCountry","Nationality"]]
df_finale

Unnamed: 0,PlayerCode,EventId,EventCountry,Nationality
0,s0re,5014,China,CHN
1,s0re,747,China,CHN
2,s0re,7581,China,CHN
13,bd06,8994,Spain,ESP
19,bd06,1536,Spain,ESP
...,...,...,...,...
9632,b0bu,7009,France,FRA
9637,b0bu,2973,France,FRA
9639,b0bu,9162,France,FRA
9641,b0bu,7874,France,FRA


In [15]:
# add column same_n
out["Same_Nationality"] = 0

for index, row in out.iterrows():
    player_code = row["PlayerCode"]
    event_id = row["EventId"]
    
    match = df_finale[
        (df_finale["PlayerCode"] == player_code) & 
        (df_finale["EventId"] == event_id)
    ]
    
    if not match.empty:
        out.at[index, "Same_Nationality"] = 1
out

Unnamed: 0,PlayerCode,EventId,iscritto,year,date_tournament,EventName,EventType,EventCountry,Surface,TotPrizeMoney,Same_Nationality
0,mv14,328,0,2024,2024-10-21,Basel,500,Switzerland,Hard,2385100,0
1,mv14,7485,0,2024,2024-10-14,Antwerp,250,Belgium,Hard,690135,0
2,mv14,329,0,2024,2024-09-23,Tokyo,500,Japan,Hard,1818380,0
3,mv14,6242,0,2024,2024-08-19,Winston-Salem,250,"NC, U.S.A.",Hard,779780,0
4,mv14,314,0,2024,2024-07-15,Gstaad,250,Switzerland,Clay,579320,0
...,...,...,...,...,...,...,...,...,...,...,...
18355,m0bn,315,0,2024,2024-07-15,Newport,250,"RI, U.S.A.",Grass,661585,0
18356,m0bn,7694,0,2024,2024-05-20,Lyon,250,France,Clay,579320,0
18357,m0bn,360,0,2024,2024-04-01,Marrakech,250,Morocco,Clay,579320,0
18358,m0bn,316,0,2024,2024-07-15,Bastad,250,Sweden,Clay,579320,0


In [16]:
out.to_csv(f"df_per_aggiungere_pts_def/df_{year}.csv", index=False)