### transfermarkt 전처리코드
1. 대륙별 축구연맹 결측치 처리
2. 국가명 표기 통일
3. 리그명 결측치 처리
4. 선수 포지션 표기 통일
5. 선수 몸값 (유로 -> 원화 표기변경)

In [1]:
import pandas as pd
import numpy as np
from unidecode import unidecode

In [2]:
player_df = pd.read_csv('../data/tfm_totaldata.csv', encoding='utf-8-sig')
player_df

Unnamed: 0,name,position,age,nation,club,value,confederation,league
0,Kaoru Mitoma,Left Winger,28,Japan,Brighton & Hove Albion,€40.00m,UEFA,
1,Ryoya Yamashita,Right Winger,27,Japan,Gamba Osaka,€850k,UEFA,
2,Motohiko Nakajima,Second Striker,26,Japan,Cerezo Osaka,€850k,UEFA,
3,Hirokazu Ishihara,Right-Back,26,Japan,Urawa Red Diamonds,€850k,UEFA,
4,Katsuya Nagato,Left-Back,30,Japan,Vissel Kobe,€850k,UEFA,
...,...,...,...,...,...,...,...,...
5348,John Hou Saeter,Attacking Midfield,27,China,Yunnan Yukun,€350k,,Chinese_SuperLeague
5349,Shiqin Wang,Left-Back,22,China,Zhejiang FC,€350k,,Chinese_SuperLeague
5350,Bin Xu,Centre-Back,21,China,Qingdao West Coast,€350k,,Chinese_SuperLeague
5351,Rodrigo Henrique,Left Winger,32,Brazil,Meizhou Hakka,€350k,,Chinese_SuperLeague


In [None]:
print(player_df.nunique())
print(player_df.dtypes)
print(player_df.isna().sum())

name             5308
position           16
age                26
nation            147
club              760
value             109
confederation       6
league             20
dtype: int64
name             object
position         object
age               int64
nation           object
club             object
value            object
confederation    object
league           object
dtype: object
name                0
position            0
age                 0
nation              0
club                0
value               0
confederation    2488
league           4405
dtype: int64


- confederation값 전처리

In [4]:
player_df['name'] = player_df['name'].apply(unidecode)

In [5]:
# 대륙 연맹별 국가 리스트

# 유럽 축구 연맹
UEFA = [ "Albania", "Andorra", "Armenia", "Austria", "Azerbaijan",
    "Belarus", "Belgium", "Bosnia-Herzegovina", "Bulgaria",
    "Croatia", "Cyprus", "Czech Republic", "Denmark", "England",
    "Estonia", "Faroe Islands", "Finland", "France", "Georgia",
    "Germany", "Gibraltar", "Greece", "Hungary", "Iceland",
    "Israel", "Italy", "Kazakhstan", "Kosovo", "Latvia", "Liechtenstein",
    "Lithuania", "Luxembourg", "Malta", "Moldova", "Monaco",
    "Montenegro", "Netherlands", "North Macedonia", "Northern Ireland",
    "Norway", "Poland", "Portugal", "Ireland", "Romania",
    "Russia", "San Marino", "Scotland", "Serbia", "Slovakia",
    "Slovenia", "Spain", "Sweden", "Switzerland", "Türkiye",
    "Ukraine", "Wales" ]

# 아시아 축구 연맹
AFC = [ "Afghanistan", "Australia", "Bahrain", "Bangladesh", "Bhutan",
    "Brunei", "Cambodia", "China", "Chinese Taipei", "Guam",
    "Hong Kong", "India", "Indonesia", "Iran", "Iraq",
    "Japan", "Jordan", "Kuwait", "Kyrgyzstan", "Laos",
    "Lebanon", "Macau", "Malaysia", "Maldives", "Mongolia",
    "Myanmar", "Nepal", "Korea, North", "Oman", "Pakistan",
    "Palestine", "Philippines", "Qatar", "Saudi Arabia", "Singapore",
    "Korea, South", "Sri Lanka", "Syria", "Tajikistan", "Thailand",
    "Timor-Leste", "Turkmenistan", "United Arab Emirates", "Uzbekistan", "Vietnam",
    "Yemen" ]

# 아프리카 축구 연맹
CAF = [ "Algeria", "Angola", "Benin", "Botswana", "Burkina Faso",
    "Burundi", "Cabo Verde", "Cameroon", "Cote d'Ivoire", "Central African Republic", "Chad",
    "Comoros", "Congo", "Democratic Republic of the Congo", "Djibouti", "Egypt",
    "Equatorial Guinea", "Eritrea", "Eswatini", "Ethiopia", "Gabon",
    "Gambia", "Ghana", "Guinea", "Guinea-Bissau", "Ivory Coast",
    "Kenya", "Lesotho", "Liberia", "Libya", "Madagascar",
    "Malawi", "Mali", "Mauritania", "Mauritius", "Morocco",
    "Mozambique", "Namibia", "Niger", "Nigeria", "Rwanda",
    "São Tomé and Príncipe", "Senegal", "Seychelles", "Sierra Leone", "Somalia",
    "South Africa", "South Sudan", "Sudan", "Tanzania", "Togo",
    "Tunisia", "Uganda", "Zambia", "Zimbabwe" ]

# 북중미 축구 연맹
CONCACAF = [ "Anguilla", "Antigua and Barbuda", "Aruba", "Bahamas", "Barbados",
    "Belize", "Bermuda", "Bonaire", "British Virgin Islands", "Canada",
    "Cayman Islands", "Costa Rica", "Cuba", "Curacao", "Dominica",
    "Dominican Republic", "El Salvador", "French Guiana", "Grenada", "Guadeloupe",
    "Guatemala", "Guyana", "Haiti", "Honduras", "Jamaica",
    "Martinique", "Mexico", "Montserrat", "Nicaragua", "Panama",
    "Puerto Rico", "Saint Kitts and Nevis", "Saint Lucia", "Saint Martin", "Saint Vincent and the Grenadines",
    "Sint Maarten", "Suriname", "Trinidad and Tobago", "Turks and Caicos Islands", "United States",
    "US Virgin Islands" ]

# 남미 축구 연맹
CONMEBOL = [ "Argentina", "Bolivia", "Brazil", "Chile", "Colombia",
    "Ecuador", "Paraguay", "Peru", "Uruguay", "Venezuela" ]

# 오세아니아 축구 연맹
OFC = [ "American Samoa", "Cook Islands", "Fiji", "New Caledonia", "New Zealand",
    "Papua New Guinea", "Samoa", "Solomon Islands", "Tahiti", "Tonga",
    "Vanuatu" ]

In [6]:
confederation_map = {
    "UEFA": UEFA,
    "AFC": AFC,
    "CAF": CAF,
    "CONCACAF": CONCACAF,
    "CONMEBOL": CONMEBOL,
    "OFC": OFC
}

def get_confederation(nation):
    for confed, country_list in confederation_map.items():
        if nation.strip() in country_list:
            return confed
    return 'Unknown'

player_df['confederation'] = player_df['nation'].apply(get_confederation)
player_df

Unnamed: 0,name,position,age,nation,club,value,confederation,league
0,Kaoru Mitoma,Left Winger,28,Japan,Brighton & Hove Albion,€40.00m,AFC,
1,Ryoya Yamashita,Right Winger,27,Japan,Gamba Osaka,€850k,AFC,
2,Motohiko Nakajima,Second Striker,26,Japan,Cerezo Osaka,€850k,AFC,
3,Hirokazu Ishihara,Right-Back,26,Japan,Urawa Red Diamonds,€850k,AFC,
4,Katsuya Nagato,Left-Back,30,Japan,Vissel Kobe,€850k,AFC,
...,...,...,...,...,...,...,...,...
5348,John Hou Saeter,Attacking Midfield,27,China,Yunnan Yukun,€350k,AFC,Chinese_SuperLeague
5349,Shiqin Wang,Left-Back,22,China,Zhejiang FC,€350k,AFC,Chinese_SuperLeague
5350,Bin Xu,Centre-Back,21,China,Qingdao West Coast,€350k,AFC,Chinese_SuperLeague
5351,Rodrigo Henrique,Left Winger,32,Brazil,Meizhou Hakka,€350k,CONMEBOL,Chinese_SuperLeague


In [79]:
print(player_df.isna().sum())
print(player_df['confederation'].value_counts())

name                0
position            0
age                 0
nation              0
club                0
value               0
confederation       0
league           4405
dtype: int64
confederation
UEFA        2335
CONMEBOL     865
AFC          639
CAF          523
CONCACAF     485
OFC          450
Unknown       56
Name: count, dtype: int64


In [7]:
print(player_df['nation'].unique())

['Japan' 'Korea, South' 'China' 'Qatar' 'Australia' 'Iran' 'Saudi Arabia'
 'Thailand' 'Uzbekistan' 'United Arab Emirates' 'Iraq' 'Malaysia'
 'Palestine' 'Oman' 'Pakistan' 'Indonesia' 'Syria' 'Jordan' 'Lebanon'
 'Philippines' 'Hongkong' 'Bangladesh' 'Chinese Taipei' 'Bahrain'
 'Tajikistan' 'Spain' 'Netherlands' 'Greece' 'Germany' 'Italy' 'France'
 'Serbia' 'Switzerland' 'Portugal' 'England' 'Ukraine' 'Croatia' 'Sweden'
 'Belgium' 'Denmark' 'Türkiye' 'Norway' 'Ireland' 'Scotland' 'Poland'
 'Northern Ireland' 'Georgia' 'Slovakia' 'Slovenia' 'Hungary' 'Wales'
 'Czech Republic' 'Iceland' 'Russia' 'Austria' 'Bosnia-Herzegovina'
 'Romania' 'Armenia' 'Kosovo' 'Israel' 'New Zealand' 'Fiji'
 'Solomon Islands' 'Vanuatu' 'Samoa' 'Tonga' 'Neukaledonien' 'Cookinseln'
 'Uruguay' 'Brazil' 'Argentina' 'Chile' 'Ecuador' 'Colombia' 'Paraguay'
 'Venezuela' 'Curacao' 'United States' 'Mexico' 'Canada' 'Honduras'
 'Jamaica' 'Suriname' 'Guyana' 'Haiti' 'Guatemala' 'Panama'
 'Dominican Republic' 'Costa Rica' '

In [8]:
unknown_nations = player_df[player_df['confederation'] == 'Unknown']['nation'].unique().tolist()
print(unknown_nations)

['Hongkong', 'Neukaledonien', 'Cookinseln', 'DR Congo', 'Cape Verde', 'The Gambia']


In [9]:
nation_name_map = {
    'Hongkong': 'Hong Kong',
    'Neukaledonien': 'New Caledonia',
    'Cookinseln': 'Cook Islands',
    'DR Congo': 'Congo',
    'Cape Verde': 'Cabo Verde',
    'The Gambia': 'Gambia'
}

nation_conf_map = {
    'Hong Kong': 'AFC',
    'New Caledonia': 'OFC',
    'Cook Islands': 'OFC',
    'Congo': 'CAF',
    'Cabo Verde': 'CAF',
    'Gambia': 'CAF'
}

In [10]:
# 국가명 변환
player_df['nation'] = player_df['nation'].replace(nation_name_map)

# confederation 매핑
player_df.loc[(player_df['confederation'] == 'Unknown') & (player_df['nation'].isin(nation_conf_map)),'confederation'] = player_df['nation'].map(nation_conf_map)
print(player_df['confederation'].value_counts())

confederation
UEFA        2335
CONMEBOL     865
AFC          641
CAF          574
CONCACAF     485
OFC          453
Name: count, dtype: int64


In [11]:
print(player_df.isna().sum())

name                0
position            0
age                 0
nation              0
club                0
value               0
confederation       0
league           4405
dtype: int64


- league값 전처리

In [12]:
# 현재 클럽 리스트 & 클럽 종류 수 
club_list = player_df['club'].unique().tolist()
print(club_list)
print(player_df['club'].nunique())

['Brighton & Hove Albion', 'Gamba Osaka', 'Cerezo Osaka', 'Urawa Red Diamonds', 'Vissel Kobe', 'Jeonbuk Hyundai Motors', 'Daejeon Hana Citizen', 'Shanghai Shenhua', 'Al-Sadd SC', 'Sparta Rotterdam', 'FC Seoul', 'Avispa Fukuoka', 'Kashima Antlers', 'Kawasaki Frontale', 'Aberdeen FC', 'Nagoya Grampus', 'Sepahan FC', 'NEOM SC', 'NEC Nijmegen', 'BG Pathum United', 'Neftchi Fergana', 'Al-Hilal SFC', 'Kalba FC', 'Shimizu S-Pulse', 'Zakho SC', 'Machida Zelvia', 'Pakhtakor Tashkent', 'Chengdu Rongcheng', 'Kashiwa Reysol', 'Royal Antwerp FC', 'SV Darmstadt 98', 'Nasaf Qarshi', 'Ajman Club', 'Middlesbrough FC', 'Celtic FC', 'Al-Jazira Club', 'BK Häcken', 'Grasshopper Club Zurich', 'Al-Ahli SFC', 'FC Tokyo', 'Excelsior Rotterdam', 'Al-Ittihad Club', 'Mohun Bagan Super Giant', 'Yokohama F. Marinos', 'Minnesota United FC', 'Tractor FC', 'FC OKMK Olmaliq', 'Esteghlal FC', 'Heart of Midlothian FC', 'Le Havre AC', 'Buriram United', 'CS Universitatea Craiova', 'Al-Riyadh SC', 'True Bangkok United', 'Ul

In [13]:
# 현재 리그 리스트 & 리그 종류 수 
league_list = player_df['league'].unique().tolist()
print(league_list)
print(player_df['league'].nunique())

[nan, 'MLS', 'Championship', 'Eredivisie', 'SuperLig', 'Scottish_Premiership', 'Swiss_SuperLeague', 'Austria_Bundesliga', 'Danish_Superligaen', 'Saudi_ProLeague', 'Liga_Portugal', 'Russian_PremierLiga', 'Brasileiro_SerieA', 'Liga_Profesional', 'Liga_MX', 'Jupiler_ProLeague', 'SuperLeague1', 'A_League', 'J1_League', 'K_League1', 'Chinese_SuperLeague']
20


In [14]:
# club -> 리그별 구분 리스트 (25-26시즌 기준)

# 잉글랜드 1부리그
PremierLeague = [
    'Arsenal FC', 'Aston Villa', 'AFC Bournemouth', 'Brentford FC',
    'Brighton & Hove Albion', 'Burnley FC', 'Chelsea FC', 'Crystal Palace',
    'Everton FC', 'Fulham FC', 'Leeds United', 'Liverpool FC',
    'Manchester City', 'Manchester United', 'Newcastle United', 'Nottingham Forest',
    'Sunderland AFC', 'Tottenham Hotspur', 'West Ham United', 'Wolverhampton Wanderers'
]
# 잉글랜드 2부리그
Championship = [
    'Birmingham City', 'Blackburn Rovers', 'Bristol City', 'Charlton Athletic',
    'Coventry City', 'Derby County', 'Hull City', 'Ipswich Town',
    'Leicester City', 'Middlesbrough FC', 'Millwall FC', 'Norwich City',
    'Oxford United', 'Portsmouth FC', 'Preston North End', 'Queens Park Rangers',
    'Sheffield United', 'Sheffield Wednesday', 'Southampton FC', 'Stoke City',
    'Swansea City', 'Watford FC', 'West Bromwich Albion', 'Wrexham AFC'
]
# 스페인 1부리그
LaLiga = [
    'FC Barcelona', 'Atlético de Madrid', 'Real Madrid', 'Athletic Bilbao',
    'Villarreal CF', 'Real Betis Balompié', 'Rayo Vallecano', 'Celta de Vigo',
    'CA Osasuna', 'RCD Mallorca', 'Real Sociedad', 'Valencia CF',
    'Getafe CF', 'Deportivo Alavés', 'Girona FC', 'Sevilla FC',
    'RCD Espanyol Barcelona', 'Levante UD', 'Elche CF', 'Real Oviedo'
]
# 스페인 2부리그
LaLiga_2 = [
    'Real Valladolid CF', 'AD Ceuta FC', 'UD Las Palmas', 'SD Eibar',
    'Albacete Balompié', 'Burgos CF', 'Córdoba CF', 'Deportivo de La Coruña',
    'Cádiz CF', 'Málaga CF', 'Sporting Gijón', 'CD Castellón',
    'Cultural Leonesa', 'CD Leganés', 'UD Almería', 'Granada CF',
    'SD Huesca', 'Racing Santander', 'Real Zaragoza', 'FC Andorra',
    'CD Mirandés', 'Real Sociedad B'
]
# 독일 1부리그
Bundesliga = [
    'Bayern Munich', 'Bayer 04 Leverkusen', 'Eintracht Frankfurt', 'Borussia Dortmund',
    'SC Freiburg', '1.FSV Mainz 05', 'RB Leipzig', 'SV Werder Bremen',
    'VfB Stuttgart', 'Borussia Mönchengladbach', 'VfL Wolfsburg', 'FC Augsburg',
    '1.FC Union Berlin', 'FC St. Pauli', 'TSG 1899 Hoffenheim', '1.FC Heidenheim 1846',
    '1.FC Köln', 'Hamburger SV'
]
# 독일 2부리그
Bundesliga_2 = [
    'Holstein Kiel', 'VfL Bochum', 'SV 07 Elversberg', 'SC Paderborn 07',
    '1.FC Magdeburg', 'Fortuna Düsseldorf', '1.FC Kaiserslautern', 'Karlsruher SC',
    'Hannover 96', '1.FC Nuremberg', 'Hertha BSC', 'SV Darmstadt 98',
    'SpVgg Greuther Fürth', 'FC Schalke 04', 'Preußen Münster', 'Eintracht Braunschweig',
    'Arminia Bielefeld', 'SG Dynamo Dresden'
]
# 이탈리아 1부리그
Serie_A = [
    'SSC Napoli', 'Inter Milan', 'Atalanta BC', 'Juventus FC',
    'AS Roma', 'ACF Fiorentina', 'SS Lazio', 'AC Milan',
    'Bologna FC 1909', 'Como 1907', 'Torino FC', 'Udinese Calcio',
    'Genoa CFC', 'Hellas Verona', 'Cagliari Calcio', 'Parma Calcio 1913',
    'US Lecce', 'US Sassuolo', 'Pisa Sporting Club', 'US Cremonese'
]
# 이탈리아 2부리그
Serie_B= [
    'FC Empoli', 'Venezia FC', 'AC Monza', 'Spezia Calcio',
    'SS Juve Stabia', 'US Catanzaro', 'Cesena FC', 'Palermo FC',
    'SSC Bari', 'FC Südtirol', 'Modena FC', 'Carrarese Calcio 1908',
    'AC Reggiana 1919', 'Mantova 1911', 'Frosinone Calcio', 'UC Sampdoria',
    'Virtus Entella', 'Calcio Padova', 'US Avellino 1912', 'Delfino Pescara 1936'
]
# 프랑스 1부리그
Ligue1 = [
    'Paris Saint-Germain', 'Olympique Marseille', 'AS Monaco', 'OGC Nice',
    'LOSC Lille', 'Olympique Lyon', 'RC Strasbourg Alsace', 'RC Lens',
    'Stade Brestois 29', 'FC Toulouse', 'AJ Auxerre', 'Stade Rennais FC',
    'FC Nantes', 'Angers SCO', 'Le Havre AC', 'FC Lorient',
    'Paris FC', 'FC Metz'
]
# 프랑스 2부리그
Ligue2 = [
    'Stade Reims', 'AS Saint-Étienne', 'Montpellier HSC', 'USL Dunkerque',
    'EA Guingamp', 'FC Annecy', 'Stade Lavallois', 'SC Bastia',
    'Grenoble Foot 38', 'ESTAC Troyes', 'Amiens SC', 'Pau FC',
    'Rodez AF', 'Red Star FC', 'Clermont Foot 63', 'AS Nancy-Lorraine',
    'Le Mans FC', 'US Boulogne'
]
# 미국 리그
MLS = [
    'Chicago Fire FC', 'Atlanta United FC', 'Orlando City SC', 'New York City FC',
    'Philadelphia Union', 'Toronto FC', 'FC Cincinnati', 'Nashville SC',
    'Inter Miami CF', 'Charlotte FC', 'CF Montréal', 'D.C. United',
    'Columbus Crew', 'New England Revolution', 'New York Red Bulls', 'Austin FC',
    'Minnesota United FC', 'Los Angeles FC', 'St. Louis CITY SC', 'San Jose Earthquakes',
    'San Diego FC', 'Seattle Sounders FC', 'Houston Dynamo FC', 'FC Dallas',
    'Real Salt Lake City', 'Vancouver Whitecaps FC', 'Portland Timbers', 'Sporting Kansas City',
    'Colorado Rapids', 'Los Angeles Galaxy'
]
# 네덜란드 리그
Eredivisie = [
    'Ajax Amsterdam', 'AZ Alkmaar', 'Excelsior Rotterdam', 'Feyenoord Rotterdam',
    'Fortuna Sittard', 'Go Ahead Eagles', 'FC Groningen', 'SC Heerenveen',
    'Heracles Almelo', 'NAC Breda', 'NEC Nijmegen', 'PEC Zwolle',
    'PSV Eindhoven', 'Sparta Rotterdam', 'SC Telstar', 'Twente Enschede FC',
    'FC Utrecht', 'FC Volendam'
]
# 터키 리그
SuperLig = [
    'Galatasaray', 'Fenerbahce', 'Samsunspor', 'Besiktas JK',
    'Basaksehir FK', 'Eyüpspor', 'Trabzonspor', 'Göztepe',
    'Caykur Rizespor', 'Kasimpasa', 'Konyaspor', 'Alanyaspor',
    'Kayserispor', 'Gaziantep FK', 'Antalyaspor', 'Kocaelispor',
    'Genclerbirligi Ankara', 'Fatih Karagümrük'
]
# 스코틀랜드 리그
Scottish_Premiership = [
    'Aberdeen FC', 'Celtic FC', 'Dundee FC', 'Dundee United FC',
    'Falkirk FC', 'Heart of Midlothian FC', 'Hibernian FC', 'Kilmarnock FC',
    'Livingston FC', 'Motherwell FC', 'Rangers FC', 'St. Mirren FC'
]
# 스위스 리그
Swiss_SuperLeague = [
    'FC Basel 1893', 'Servette FC', 'BSC Young Boys', 'FC Luzern',
    'FC Lugano', 'FC Lausanne-Sport', 'FC St. Gallen 1879', 'FC Zürich',
    'FC Thun', 'FC Sion', 'FC Winterthur', 'Grasshopper Club Zurich'
]
# 오스트리아 리그
Austria_Bundesliga = [
    'SK Sturm Graz', 'Red Bull Salzburg', 'Austria Vienna', 'Wolfsberger AC',
    'Rapid Vienna', 'FC Blau-Weiss Linz', 'LASK', 'TSV Hartberg',
    'WSG Tirol', 'Grazer AK 1902', 'SCR Altach', 'SV Ried'
]
# 덴마크 리그
Danish_Superligaen = [
    'FC Copenhagen', 'FC Midtjylland', 'Bröndby IF', 'Randers FC',
    'FC Nordsjaelland', 'Aarhus GF', 'Silkeborg IF', 'Viborg FF',
    'Sönderjyske Fodbold', 'Vejle Boldklub', 'Odense Boldklub', 'FC Fredericia'
]
# 사우디 리그
Saudi_ProLeague = [
    'Al-Ittihad Club', 'Al-Hilal SFC', 'Al-Nassr FC', 'Al-Qadsiah FC',
    'Al-Ahli SFC', 'Al-Shabab FC', 'Al-Ettifaq FC', 'Al-Taawoun FC',
    'Al-Kholood Club', 'Al-Fateh SC', 'Al-Riyadh SC', 'Al-Khaleej FC',
    'Al-Fayha FC', 'Damac FC', 'Al-Okhdood Club', 'NEOM SC',
    'Al-Najma SC', 'Al-Hazem SC', 'Al Ahly FC'
]
# 포르투갈 리그
Liga_Portugal = [
    'Sporting CP', 'SL Benfica', 'FC Porto', 'SC Braga',
    'CD Santa Clara', 'Vitória Guimarães SC', 'FC Famalicão', 'GD Estoril Praia',
    'Casa Pia AC', 'Moreirense FC', 'Rio Ave FC', 'FC Arouca',
    'Gil Vicente FC', 'CD Nacional', 'CF Estrela Amadora', 'Avs Futebol',
    'CD Tondela', 'FC Alverca'
]
# 러시아 리그
Russian_PremierLiga = [
    'FC Krasnodar', 'Zenit St. Petersburg', 'CSKA Moscow', 'Spartak Moscow',
    'Dynamo Moscow', 'Lokomotiv Moscow', 'Rubin Kazan', 'FC Rostov',
    'Akron Togliatti', 'Krylya Sovetov Samara', 'Dinamo Makhachkala', 'FC Pari Nizhniy Novgorod',
    'Akhmat Grozny', 'Baltika Kaliningrad', 'FC Orenburg', 'FC Sochi'
]
# 브라질 리그
Brasileiro_SerieA = [
    'Clube Atlético Mineiro', 'Esporte Clube Bahia', 'Botafogo de Futebol e Regatas', 'Red Bull Bragantino',
    'Ceará Sporting Club', 'Sport Club Corinthians Paulista', 'Cruzeiro Esporte Clube', 'CR Flamengo',
    'Fluminense Football Club', 'Fortaleza Esporte Clube', 'Grêmio Foot-Ball Porto Alegrense', 'Sport Club Internacional',
    'Esporte Clube Juventude', 'Sociedade Esportiva Palmeiras', 'Mirassol Futebol Clube (SP)', 'Santos FC',
    'São Paulo Futebol Clube', 'Sport Club do Recife', 'Clube de Regatas Vasco da Gama', 'Esporte Clube Vitória'
]
# 아르헨티나 리그
Liga_Profesional = [
    'CA Huracán', 'CA Central Córdoba (SdE)', 'CA Barracas Central', 'CA Aldosivi',
    'CS Independiente Rivadavia', 'Club Atlético Tigre', 'CA Unión (Santa Fe)', 'Club Atlético Belgrano',
    'Defensa y Justicia', 'CA Boca Juniors', 'Racing Club', 'CA Newell\'s Old Boys',
    'Club Estudiantes de La Plata', 'AA Argentinos Juniors', 'CA Banfield', 'CA River Plate',
    'CA Rosario Central', 'Club Deportivo Riestra', 'Club Atlético Tucumán', 'CD Godoy Cruz Antonio Tomba',
    'CA Sarmiento (Junin)', 'CA Lanús', 'Club Atlético Platense', 'CA San Martín (San Juan)',
    'CA Vélez Sarsfield', 'CA Talleres', 'Club de Gimnasia y Esgrima La Plata', 'CA Independiente',
    'Instituto ACC', 'CA San Lorenzo de Almagro'
]
# 멕시코 리그
Liga_MX = [
    'CF América', 'Atlas Guadalajara', 'Atlético de San Luis', 'CD Cruz Azul', 'Deportivo Guadalajara', 'FC Juárez', 
    'Club León FC', 'Mazatlán FC', 'CF Monterrey', 'Club Necaxa', 'CF Pachuca', 'Puebla FC', 'Querétaro FC', 'Santos Laguna', 
    'Tigres UANL', 'Club Tijuana', 'Deportivo Toluca', 'UNAM Pumas'
]
# 벨기에 리그
Jupiler_ProLeague = [
    'Royal Antwerp FC', 'RSC Anderlecht', 'KRC Genk', 'KAA Gent',
    'Club Brugge KV', 'RAAL La Louvière', 'Zulte Waregem', 'Oud-Heverlee Leuven',
    'KV Mechelen', 'Sint-Truidense VV', 'Cercle Brugge', 'KVC Westerlo',
    'Union Saint-Gilloise', 'Royal Charleroi SC', 'Standard Liège', 'FCV Dender EH'
]
# 그리스 리그
SuperLeague1 = [ 
    'Olympiacos Piraeus', 'Panathinaikos FC', 'PAOK Thessaloniki', 'AEK Athens',
    'Aris Thessaloniki', 'Asteras Aktor', 'Atromitos Athens', 'OFI Crete FC',
    'APO Levadiakos', 'Panetolikos GFS', 'Volos NPS', 'Panserraikos', 'AE Larisa', 'AE Kifisias'
]
# 호주 리그
A_League = [
    'Adelaide United', 'Auckland FC', 'Brisbane Roar', 'Central Coast Mariners',
    'Macarthur FC', 'Melbourne City FC', 'Melbourne Victory', 'Newcastle United Jets',
    'Perth Glory', 'Sydney FC', 'Wellington Phoenix', 'Western Sydney Wanderers',
    'Western United FC'
]
# K리그1
K_League1 = [
    'Ulsan HD FC', 'Gangwon FC', 'Gimcheon Sangmu', 'FC Seoul',
    'Suwon FC', 'Pohang Steelers', 'Jeju SK', 'Daejeon Hana Citizen',
    'Gwangju FC', 'Jeonbuk Hyundai Motors', 'Daegu FC', 'FC Anyang'
]
# K리그2
K_League2 = [
    'Incheon United', 'Chungnam Asan', 'Seoul E-Land', 'Jeonnam Dragons',
    'Busan IPark', 'Suwon Samsung Bluewings', 'Gimpo FC', 'Bucheon FC 1995',
    'Cheonan City', 'Chungbuk Cheongju FC', 'Ansan Greeners', 'Gyeongnam FC',
    'Seongnam FC', 'Hwaseong FC'
]
# 일본 리그
J1_League = [
    'Kashima Antlers', 'Urawa Red Diamonds', 'Kashiwa Reysol', 'FC Tokyo',
    'Tokyo Verdy', 'Machida Zelvia', 'Kawasaki Frontale', 'Yokohama F. Marinos',
    'Yokohama FC', 'Shonan Bellmare', 'Albirex Niigata', 'Shimizu S-Pulse',
    'Nagoya Grampus', 'Kyoto Sanga', 'Gamba Osaka', 'Cerezo Osaka',
    'Vissel Kobe', 'Fagiano Okayama', 'Sanfrecce Hiroshima', 'Avispa Fukuoka'
]
# 중국 리그
CSL = [
    'Shanghai Port', 'Shanghai Shenhua', 'Chengdu Rongcheng', 'Beijing Guoan',
    'Shandong Taishan', 'Tianjin Jinmen Tiger', 'Zhejiang FC', 'Henan FC',
    'Changchun Yatai', 'Qingdao West Coast', 'Wuhan Three Towns', 'Qingdao Hainiu',
    'Shenzhen Peng City', 'Yunnan Yukun', 'Dalian Yingbo', 'Meizhou Hakka'
]
# 카타르 리그
QSL = [
    'Al-Sadd SC', 'Al-Duhail SC', 'Al-Gharafa SC', 'Al-Ahli SC',
    'Al-Rayyan SC', 'Al-Shamal SC', 'Al-Shahania SC', 'Al-Wakrah SC',
    'Al-Arabi SC', 'Qatar SC', 'Umm Salal SC', 'Al-Sailiya SC', 'Al-Khor SC'
]
# UAE 리그
UAE_ProLeague = [
    'Shabab Al-Ahli Club', 'Sharjah FC', 'Al-Wahda FC', 'Al-Wasl FC',
    'Al-Ain FC', 'Al-Nasr SC (UAE)', 'Al-Jazira Club', 'Khor Fakkan SSC',
    'Kalba FC', 'Ajman Club', 'Al-Bataeh CSC', 'FC Baniyas',
    'Al-Dhafra FC', 'Dibba SCC'
]
# 우크라이나 리그
Ukrainian_PremierLeague = [
    'Dynamo Kyiv', 'FC Oleksandriya', 'Shakhtar Donetsk', 'Polissya Zhytomyr',
    'Kryvbas Kryvyi Rig', 'Karpaty Lviv', 'Zorya Lugansk', 'Rukh Lviv',
    'NK Veres Rivne', 'Kolos Kovalivka', 'Obolon Kyiv', 'LNZ Cherkasy',
    'Epicentr Kamyanets-Podilskyi', 'SC Poltava', 'Metalist 1925 Kharkiv', 'FC Kudrivka'
]
# 폴란드 리그
Ekstraklasa = [ 
    'Lech Poznan', 'Raków Częstochowa', 'Jagiellonia Bialystok', 'Pogon Szczecin',
    'Legia Warszawa', 'Cracovia', 'Motor Lublin', 'GKS Katowice',
    'Górnik Zabrze', 'Piast Gliwice', 'Korona Kielce', 'Radomiak Radom',
    'Widzew Lodz', 'Lechia Gdansk', 'Zaglebie Lubin', 'Arka Gdynia',
    'Bruk-Bet Termalica Nieciecza', 'Wisla Plock'
]
# 노르웨이 리그
Eliteserien = [ 
    'FK Bodø/Glimt', 'SK Brann', 'Viking FK', 'Rosenborg BK',
    'Molde FK', 'Fredrikstad FK', 'Strømsgodset IF', 'KFUM-Kameratene Oslo',
    'Sarpsborg 08 FF', 'Sandefjord Fotball', 'Kristiansund BK', 'Hamarkameratene',
    'Tromsø IL', 'FK Haugesund', 'Vålerenga Fotball Elite', 'Bryne FK'
]
# 스웨덴 리그
Allsvenskan = [ 
    'Malmö FF', 'Hammarby IF', 'AIK', 'Djurgårdens IF',
    'Mjällby AIF', 'GAIS', 'IF Elfsborg', 'BK Häcken',
    'IK Sirius', 'IF Brommapojkarna', 'IFK Norrköping', 'Halmstads BK',
    'IFK Göteborg', 'IFK Värnamo', 'Degerfors IF', 'Östers IF'
]
# 칠레 리그
Primera_División = [
    'Club Universidad de Chile', 'CSD Colo-Colo', 'CD Universidad Católica', 'Unión Española',
    'Deportes La Serena', 'Audax Italiano', 'Huachipato FC', 'CD Palestino',
    'CD Everton', 'Coquimbo Unido', 'CD O\'Higgins', 'CD Ñublense',
    'CD Cobresal', 'Club Deportes Iquique', 'Unión La Calera', 'Deportes Limache'
]
# 크로아티아 리그
SuperSport_HNL = [
    'HNK Rijeka', 'GNK Dinamo Zagreb', 'HNK Hajduk Split', 'NK Varazdin',
    'Slaven Belupo Koprivnica', 'NK Istra 1961', 'NK Osijek', 'NK Lokomotiva Zagreb',
    'HNK Gorica', 'HNK Vukovar 1991'
]
# 체코 리그
Chance_Liga = [
    'SK Slavia Prague', 'FC Viktoria Plzen', 'FC Banik Ostrava', 'AC Sparta Prague',
    'FK Jablonec', 'SK Sigma Olomouc', 'FC Hradec Kralove', 'Bohemians Prague 1905',
    'FC Slovan Liberec', 'MFK Karvina', 'FK Teplice', 'FK Mlada Boleslav',
    '1.FC Slovacko', 'FK Dukla Prague', 'FK Pardubice', 'FC Zlin'
]
# 세르비아 리그
Superliga_Srbjie = [
    'Red Star Belgrade', 'FK Partizan Belgrade', 'FK Novi Pazar', 'FK Radnicki 1923 Kragujevac',
    'FK Vojvodina Novi Sad', 'OFK Beograd', 'FK TSC Backa Topola', 'FK Mladost Lucani',
    'Zeleznicar Pancevo', 'FK Cukaricki', 'FK IMT Belgrad', 'FK Spartak Subotica',
    'FK Radnicki Nis', 'FK Napredak Krusevac', 'FK Radnik Surdulica', 'FK Javor-Matis Ivanjica'
]
# 루마니아 리그
Romaina_SuperLiga = [
    'FCSB', 'CFR Cluj', 'CS Universitatea Craiova', 'FC Universitatea Cluj',
    'FC Rapid 1923', 'FC Dinamo 1948', 'FC Hermannstadt', 'SC Otelul Galati',
    'FCV Farul Constanta', 'Petrolul Ploiesti', 'UTA Arad', 'FC Botosani',
    'AFC Unirea 04 Slobozia', 'ACSC FC Arges', 'FK Csikszereda Miercurea Ciuc', 'FC Metaloglobus Bucharest'
]
# 잉글랜드 3부리그
League_One = [
    'AFC Wimbledon', 'Barnsley FC', 'Blackpool FC', 'Bolton Wanderers',
    'Bradford City', 'Burton Albion', 'Cardiff City', 'Doncaster Rovers',
    'Exeter City', 'Huddersfield Town', 'Leyton Orient', 'Lincoln City',
    'Luton Town', 'Mansfield Town', 'Northampton Town', 'Peterborough United',
    'Plymouth Argyle', 'Port Vale FC', 'Reading FC', 'Rotherham United',
    'Stevenage FC', 'Stockport County', 'Wigan Athletic', 'Wycombe Wanderers'
]
# 우즈벡키스탄 리그
Uzbekistan_SuperLeague = [
    'Nasaf Qarshi', 'FC OKMK Olmaliq', 'Navbahor Namangan', 'Sogdiana Jizzakh',
    'Neftchi Fergana', 'Pakhtakor Tashkent', 'Surkhon Termiz', 'Dinamo Samarqand',
    'FC Andijon', 'Bunyodkor Tashkent', 'FC Qizilqum', 'Mash\'al Mubarek',
    'FC Buxoro', 'FC Kokand 1912', 'Shurtan Guzar', 'Xorazm Urganch'
]
# 피지 리그
Fiji_PremierLeague = [
    'Rewa FA', 'Labasa FC', 'Lautoka FC', 'Navua FC', 'Ba FC', 'Nadi FA', 'Suva FA', 'Nadroga FC', 'Nasinu FC', 'Tavua FC'
]
# 뉴질랜드 리그
NewZealand_FootballLeague = [
    'Auckland City FC', 'Auckland FC Reserves', 'Auckland United FC', 'Bay Olympic FC',
    'Birkenhead United', 'East Coast Bays AFC', 'Eastern Suburbs AFC', 'Fencibles United AFC',
    'Manurewa AFC', 'Tauranga City AFC', 'West Coast Rangers', 'Western Springs AFC',
    'Island Bay United', 'Miramar Rangers AFC', 'Napier City Rovers', 'North Wellington FC',
    'Petone FC', 'Upper Hutt City FC', 'Waterside Karori AFC', 'Wellington Olympic AFC',
    'Wellington Phoenix FC Reserves', 'Western Suburbs FC', 'Cashmere Technical FC', 'Christchurch United',
    'Coastal Spirit FC', 'Dunedin City Royals', 'Ferrymead Bays FC', 'Nelson Suburbs FC',
    'Nomads United AFC', 'Selwyn United FC', 'University of Canterbury', 'Wanaka FC'
]
# 이란 리그
Persian_GulfProLeague = [
    'Tractor FC', 'Sepahan FC', 'Persepolis FC', 'Foolad FC',
    'Gol Gohar Sirjan FC', 'Zob Ahan Esfahan', 'Aluminium Arak FC', 'Esteghlal FC',
    'Chadormalu SC', 'Esteghlal Khuzestan', 'Malavan Bandar Anzali', 'Kheybar Khorramabad FC',
    'Shams Azar Qazvin', 'Mes Rafsanjan', 'Fajr Sepasi Shiraz', 'Paykan FC'
]
# 불가리아 리그
Efbet_Liga = [
    'Ludogorets Razgrad', 'Levski Sofia', 'Cherno More Varna', 'Arda Kardzhali',
    'CSKA Sofia', 'Botev Plovdiv', 'Spartak Varna', 'Beroe Stara Zagora',
    'Slavia Sofia', 'CSKA 1948', 'Lokomotiv Sofia', 'Septemvri Sofia',
    'Lokomotiv Plovdiv', 'Botev Vratsa', 'Dobrudzha Dobrich', 'Montana'
]
# 콜롬비아 리그
Liga_Dimayor = [
    'Alianza FC', 'CD América de Cali', 'Atlético Bucaramanga', 'Atlético Nacional',
    'Boyacá Chicó FC', 'Deportes Tolima', 'Deportivo Cali', 'Asociación Deportivo Pasto',
    'Deportivo Pereira', 'Envigado FC', 'Fortaleza CEIF', 'Independiente Medellín',
    'Independiente Santa Fe', 'Junior FC', 'CD La Equidad Seguros SA', 'Llaneros FC',
    'Millonarios FC', 'Once Caldas', 'AD Union Magdalena', 'Rionegro Águilas'
]
# 소속팀 없음
Without_Club = ['Unknown', 'Without Club']


league_dict = {
    'PremierLeague': PremierLeague,
    'LaLiga': LaLiga,
    'Bundesliga': Bundesliga,
    'Serie_A': Serie_A,
    'Ligue1': Ligue1,
    'MLS': MLS,
    'Championship': Championship,
    'LaLiga_2': LaLiga_2,
    'Bundesliga_2': Bundesliga_2,
    'Serie_B': Serie_B,
    'Ligue2': Ligue2,
    'Eredivisie': Eredivisie,
    'SuperLig': SuperLig,
    'Scottish_Premiership': Scottish_Premiership,
    'Swiss_SuperLeague': Swiss_SuperLeague,
    'Austria_Bundesliga': Austria_Bundesliga,
    'Danish_Superligaen': Danish_Superligaen,
    'Saudi_ProLeague': Saudi_ProLeague,
    'Liga_Portugal': Liga_Portugal,
    'Russian_PremierLiga': Russian_PremierLiga,
    'Brasileiro_SerieA': Brasileiro_SerieA,
    'Liga_Profesional': Liga_Profesional,
    'Liga_MX': Liga_MX,
    'Jupiler_ProLeague': Jupiler_ProLeague,
    'SuperLeague1': SuperLeague1,
    'A_League': A_League,
    'K_League1': K_League1,
    'K_League2': K_League2,
    'J1_League': J1_League,
    'Chinese_SuperLeague': CSL,
    'QSL': QSL,
    'UAE_ProLeague': UAE_ProLeague,
    'Ukrainian_PremierLeague': Ukrainian_PremierLeague,
    'Ekstraklasa': Ekstraklasa,
    'Eliteserien': Eliteserien,
    'Allsvenskan': Allsvenskan,
    'Primera_División': Primera_División,
    'Chance_Liga': Chance_Liga,
    'Superliga_Srbjie': Superliga_Srbjie,
    'SuperSport_HNL': SuperSport_HNL,
    'Romaina_SuperLiga': Romaina_SuperLiga,
    'League_One': League_One,
    'Uzbekistan_SuperLeague': Uzbekistan_SuperLeague,
    'Fiji_PremierLeague': Fiji_PremierLeague,
    'NewZealand_FootballLeague': NewZealand_FootballLeague,
    'Persian_GulfProLeague': Persian_GulfProLeague,
    'Efbet_Liga': Efbet_Liga,
    'Liga_Dimayor': Liga_Dimayor,
    'Without_Club': Without_Club
}

In [15]:
for league_name, clubs in league_dict.items():
    player_df.loc[player_df['club'].isin(clubs), 'league'] = league_name

print(player_df['league'].value_counts(dropna=False))

league
PremierLeague                478
Serie_A                      288
MLS                          254
Bundesliga                   253
Liga_MX                      227
Ligue1                       221
LaLiga                       220
Brasileiro_SerieA            200
NewZealand_FootballLeague    192
NaN                          184
Championship                 173
Without_Club                 159
J1_League                    156
Liga_Profesional             142
SuperLig                     140
Fiji_PremierLeague           138
Eredivisie                   135
Saudi_ProLeague              132
Jupiler_ProLeague            129
Russian_PremierLiga          125
A_League                     120
Liga_Portugal                119
SuperLeague1                 108
Swiss_SuperLeague            106
Scottish_Premiership         104
Danish_Superligaen           104
Austria_Bundesliga           102
K_League1                    101
Chinese_SuperLeague          100
UAE_ProLeague                 73
QSL

In [16]:
# 'league' 값이 없는 팀들만 필터링
nan_teams_df = player_df[player_df['league'].isna()][['club']]
nan_teams_df.to_csv('../data/tfm_filtering.csv', index=False)

- 다음 과정을 반복하면서 계속 리그 군집이 가능할때까지 필터링을 한다. 
- 기타 변방리그나 2/3부리그 + 유소년 리그 등 군집이 불가능한 클럽들은 기타리그로 분류하였음

In [17]:
player_df['league'] = player_df['league'].fillna('Other_leagues') # Nan값 -> 기타리그
print(player_df.isna().sum())

name             0
position         0
age              0
nation           0
club             0
value            0
confederation    0
league           0
dtype: int64


- position값 전처리

In [18]:
player_df

Unnamed: 0,name,position,age,nation,club,value,confederation,league
0,Kaoru Mitoma,Left Winger,28,Japan,Brighton & Hove Albion,€40.00m,AFC,PremierLeague
1,Ryoya Yamashita,Right Winger,27,Japan,Gamba Osaka,€850k,AFC,J1_League
2,Motohiko Nakajima,Second Striker,26,Japan,Cerezo Osaka,€850k,AFC,J1_League
3,Hirokazu Ishihara,Right-Back,26,Japan,Urawa Red Diamonds,€850k,AFC,J1_League
4,Katsuya Nagato,Left-Back,30,Japan,Vissel Kobe,€850k,AFC,J1_League
...,...,...,...,...,...,...,...,...
5348,John Hou Saeter,Attacking Midfield,27,China,Yunnan Yukun,€350k,AFC,Chinese_SuperLeague
5349,Shiqin Wang,Left-Back,22,China,Zhejiang FC,€350k,AFC,Chinese_SuperLeague
5350,Bin Xu,Centre-Back,21,China,Qingdao West Coast,€350k,AFC,Chinese_SuperLeague
5351,Rodrigo Henrique,Left Winger,32,Brazil,Meizhou Hakka,€350k,CONMEBOL,Chinese_SuperLeague


In [19]:
# 포지션 종류별 개수 출력
position_counts = player_df['position'].value_counts(dropna=False)
print(position_counts)

position
Centre-Back           821
Centre-Forward        748
Goalkeeper            626
Central Midfield      584
Defensive Midfield    458
Attacking Midfield    437
Right Winger          423
Left Winger           406
Right-Back            335
Left-Back             315
Midfield               40
Right Midfield         39
Left Midfield          35
Second Striker         32
Defender               30
Attack                 24
Name: count, dtype: int64


In [None]:
attack_players = player_df[player_df['position'] == 'Attack'].head(5)
print(attack_players)

# 예상치 못하게 기타리그 일부 선수들은 세부포지션이 미분류 되어있었음을 확인

                      name position  age           nation  \
1029          Ashneel Raju   Attack   30             Fiji   
1039        Asivorosi Rabo   Attack   20             Fiji   
1042          Jimson Abana   Attack   20  Solomon Islands   
1067        Caspar Mcgavin   Attack   19      New Zealand   
1102  Simione Ragoneturaga   Attack   22             Fiji   

                     club  value confederation                     league  
1029            Labasa FC  €125k           OFC         Fiji_PremierLeague  
1039              Rewa FA  €150k           OFC         Fiji_PremierLeague  
1042            Labasa FC  €150k           OFC         Fiji_PremierLeague  
1067  Western Springs AFC  €100k           OFC  NewZealand_FootballLeague  
1102            Labasa FC  €125k           OFC         Fiji_PremierLeague  


In [21]:
# 전체 포지션 분류
FW = ['Centre-Forward', 'Second Striker', 'Winger', 'Right Winger', 'Left Winger', 'Attack', 'ST', 'CF', 'LW', 'RW']
MF = ['Attacking Midfield', 'Right Midfield', 'Left Midfield', 'Central Midfield', 'Defensive Midfield', 'Midfield', 'MOC', 'CM', 'CDM', 'LM', 'RM']
DF = ['Centre-Back', 'Full-Back', 'Right-Back', 'Left-Back', 'Sweeper', 'Defender', 'CB', 'RB', 'LB']
GK = ['Goalkeeper', 'GK']

conditions = [
    player_df['position'].isin(FW),
    player_df['position'].isin(MF),
    player_df['position'].isin(DF),
    player_df['position'].isin(GK)
]
choices = ['FW', 'MF', 'DF', 'GK']

player_df['field'] = np.select(conditions, choices, default='Unknown')

In [22]:
player_df['field'].value_counts()

field
FW    1633
MF    1593
DF    1501
GK     626
Name: count, dtype: int64

In [23]:
position_mapping = {
    # Striker
    'Centre-Forward': 'Striker', 'Second Striker': 'Striker', 'ST': 'Striker', 'CF': 'Striker',

    # Winger
    'Right Winger': 'Right_Winger', 'RW': 'Right_Winger',
    'Left Winger': 'Left_Winger', 'LW': 'Left_Winger',
    'Attack': '(Uncategorized)Forwards',

    # Midfield
    'Attacking Midfield': 'Attacking_Midfielder', 'MOC': 'Attacking_Midfielder',
    'Central Midfield': 'Central_Midfielder', 'CM': 'Central_Midfielder',
    'Right Midfield': 'Side_Midfielder', 'Left Midfield': 'Side_Midfielder',
    'LM': 'Side_Midfielder', 'RM': 'Side_Midfielder',
    'Defensive Midfield': 'Defensive_Midfielder', 'CDM': 'Defensive_Midfielder',
    'Midfield': '(Uncategorized)Midfield',

    # Defender
    'Centre-Back': 'Centre_Back', 'Sweeper': 'Centre_Back', 'CB': 'Centre_Back',
    'Right-Back': 'Right_Back', 'RB': 'Right_Back',
    'Left-Back': 'Left_Back', 'LB': 'Left_Back',
    'Defender': '(Uncategorized)Defender',

    # Goalkeeper
    'Goalkeeper': 'Goalkeeper', 'GK': 'Goalkeeper'
}

player_df['position'] = player_df['position'].replace(position_mapping)

In [24]:
print(player_df['position'].value_counts())

position
Centre_Back                821
Striker                    780
Goalkeeper                 626
Central_Midfielder         584
Defensive_Midfielder       458
Attacking_Midfielder       437
Right_Winger               423
Left_Winger                406
Right_Back                 335
Left_Back                  315
Side_Midfielder             74
(Uncategorized)Midfield     40
(Uncategorized)Defender     30
(Uncategorized)Forwards     24
Name: count, dtype: int64


In [25]:
cols = player_df.columns.tolist()
cols

['name',
 'position',
 'age',
 'nation',
 'club',
 'value',
 'confederation',
 'league',
 'field']

In [31]:
new_column_order = ['name', 'field', 'position', 'age', 'nation', 'confederation', 'club', 'league', 'value']
player_df = player_df[new_column_order]
player_df.head()

Unnamed: 0,name,field,position,age,nation,confederation,club,league,value
0,Kaoru Mitoma,FW,Left_Winger,28,Japan,AFC,Brighton & Hove Albion,PremierLeague,€40.00m
1,Ryoya Yamashita,FW,Right_Winger,27,Japan,AFC,Gamba Osaka,J1_League,€850k
2,Motohiko Nakajima,FW,Striker,26,Japan,AFC,Cerezo Osaka,J1_League,€850k
3,Hirokazu Ishihara,DF,Right_Back,26,Japan,AFC,Urawa Red Diamonds,J1_League,€850k
4,Katsuya Nagato,DF,Left_Back,30,Japan,AFC,Vissel Kobe,J1_League,€850k


- 선수가치 유로(€) 표기 전처리

In [32]:
# 유로 금액을 float으로 변환
def convert_value(value_str):
    if pd.isna(value_str):
        return None
    value_str = value_str.strip().lower().replace('€', '')
    try:
        if value_str.endswith('m'):
            return float(value_str.replace('m', '')) * 1_000_000
        elif value_str.endswith('k'):
            return float(value_str.replace('k', '')) * 1_000
        else:
            return float(value_str)
    except:
        return None

In [None]:
exchange_rate = 1625 # 환율 설정 (25/07/28 기준)

# 원화(억원 단위)로 환산
def convert_to_krw(value_str, rate=exchange_rate):
    euro = convert_value(value_str)
    if euro is None:
        return None
    won = euro * rate
    krw = round(won / 100_000_000, 1)
    return krw

player_df.loc[:, 'value_billion_krw'] = player_df['value'].apply(convert_to_krw)

In [37]:
# 유로 + 원화
player_df.to_csv('../data/tfm_filtering3.csv', index=False, encoding='utf-8-sig')
player_df

Unnamed: 0,name,field,position,age,nation,confederation,club,league,value,value_billion_krw
0,Kaoru Mitoma,FW,Left_Winger,28,Japan,AFC,Brighton & Hove Albion,PremierLeague,€40.00m,650.0
1,Ryoya Yamashita,FW,Right_Winger,27,Japan,AFC,Gamba Osaka,J1_League,€850k,13.8
2,Motohiko Nakajima,FW,Striker,26,Japan,AFC,Cerezo Osaka,J1_League,€850k,13.8
3,Hirokazu Ishihara,DF,Right_Back,26,Japan,AFC,Urawa Red Diamonds,J1_League,€850k,13.8
4,Katsuya Nagato,DF,Left_Back,30,Japan,AFC,Vissel Kobe,J1_League,€850k,13.8
...,...,...,...,...,...,...,...,...,...,...
5348,John Hou Saeter,MF,Attacking_Midfielder,27,China,AFC,Yunnan Yukun,Chinese_SuperLeague,€350k,5.7
5349,Shiqin Wang,DF,Left_Back,22,China,AFC,Zhejiang FC,Chinese_SuperLeague,€350k,5.7
5350,Bin Xu,DF,Centre_Back,21,China,AFC,Qingdao West Coast,Chinese_SuperLeague,€350k,5.7
5351,Rodrigo Henrique,FW,Left_Winger,32,Brazil,CONMEBOL,Meizhou Hakka,Chinese_SuperLeague,€350k,5.7


In [None]:
# 원화 가치 칼럼만 살리고 내림차순 정렬
player_df.drop(columns=['value'], inplace=True)
player_df.sort_values(by='value_billion_krw', ascending=False)
player_df

Unnamed: 0,name,field,position,age,nation,confederation,club,league,value_billion_krw
0,Kaoru Mitoma,FW,Left_Winger,28,Japan,AFC,Brighton & Hove Albion,PremierLeague,650.0
1,Ryoya Yamashita,FW,Right_Winger,27,Japan,AFC,Gamba Osaka,J1_League,13.8
2,Motohiko Nakajima,FW,Striker,26,Japan,AFC,Cerezo Osaka,J1_League,13.8
3,Hirokazu Ishihara,DF,Right_Back,26,Japan,AFC,Urawa Red Diamonds,J1_League,13.8
4,Katsuya Nagato,DF,Left_Back,30,Japan,AFC,Vissel Kobe,J1_League,13.8
...,...,...,...,...,...,...,...,...,...
5348,John Hou Saeter,MF,Attacking_Midfielder,27,China,AFC,Yunnan Yukun,Chinese_SuperLeague,5.7
5349,Shiqin Wang,DF,Left_Back,22,China,AFC,Zhejiang FC,Chinese_SuperLeague,5.7
5350,Bin Xu,DF,Centre_Back,21,China,AFC,Qingdao West Coast,Chinese_SuperLeague,5.7
5351,Rodrigo Henrique,FW,Left_Winger,32,Brazil,CONMEBOL,Meizhou Hakka,Chinese_SuperLeague,5.7


In [None]:
print(player_df['value_billion_krw'].dtype) 

float64


> Issue파악: 원화가치 내림차순이 안된이유가 새로 바뀐 krw value가 str타입으로 되어있었음

In [40]:
player_df['value_billion_krw'] = player_df['value_billion_krw'].astype(float)
player_df = player_df.sort_values(by='value_billion_krw', ascending=False)
player_df

Unnamed: 0,name,field,position,age,nation,confederation,club,league,value_billion_krw
998,Lamine Yamal,FW,Right_Winger,18,Spain,UEFA,FC Barcelona,LaLiga,3250.0
659,Erling Haaland,FW,Striker,25,Norway,UEFA,Manchester City,PremierLeague,2925.0
660,Kylian Mbappe,FW,Striker,26,France,UEFA,Real Madrid,LaLiga,2925.0
623,Jude Bellingham,MF,Attacking_Midfielder,22,England,UEFA,Real Madrid,LaLiga,2925.0
1756,Vinicius Junior,FW,Left_Winger,25,Brazil,CONMEBOL,Real Madrid,LaLiga,2762.5
...,...,...,...,...,...,...,...,...,...
1314,Vuniuci Tikomaimereke,DF,Left_Back,35,Fiji,OFC,Nadroga FC,Fiji_PremierLeague,0.8
1315,Frank Clarke,FW,Striker,27,New Zealand,OFC,Fencibles United AFC,NewZealand_FootballLeague,0.8
1316,Emori Ragata,GK,Goalkeeper,37,Fiji,OFC,Nasinu FC,Fiji_PremierLeague,0.8
1263,Kazik Swain,MF,Attacking_Midfielder,19,New Zealand,OFC,Bay Olympic FC,NewZealand_FootballLeague,0.8


In [41]:
player_df.to_csv('../data/tfm_finaldata.csv', index=False, encoding='utf-8-sig')