In [1]:
import pandas as pd
import re
from datetime import datetime

In [2]:
# Create a list to store DataFrames
dfs = []

# Read each CSV file into a DataFrame and append to the list
for i in range(1,14):
    df = pd.read_csv(f"../raw_data/names_{i}.csv")
    dfs.append(df)

# Concatenate the list of DataFrames into a single DataFrame
merged_df = pd.concat(dfs, ignore_index=True)

In [3]:
merged_df

Unnamed: 0,name,club,profile_image,year,market_value
0,Kevin De Bruyne,Manchester City,https://img.a.transfermarkt.technology/portrai...,32,€70.00m
1,Harry Kane,Bayern Munich,https://img.a.transfermarkt.technology/portrai...,30,€110.00m
2,Mohamed Salah,Liverpool FC,https://img.a.transfermarkt.technology/portrai...,31,€65.00m
3,Sadio Mané,Al-Nassr FC,https://img.a.transfermarkt.technology/portrai...,31,€25.00m
4,,,,,
...,...,...,...,...,...
174319,LPM Football Agents Ltd.,,,,
174320,,,,,
174321,SQUARE SPORTS GROUP,,,,
174322,25 HOURS SPORTS,,,,


In [4]:
def fill_duplicates_with_none(df):
    # Find duplicate rows based on all columns
    duplicate_rows = df[df.duplicated()]

    # Iterate through duplicate rows and fill them with None
    for index, row in duplicate_rows.iterrows():
        df.loc[index] = ['None' if pd.notnull(value) else 'None' for value in row]

    return df

# Example usage:
# Assuming your DataFrame is named 'your_dataframe'
# Replace this with your actual DataFrame
#your_dataframe = pd.DataFrame({
#    'Name': ['John', 'Alice', 'Bob', 'John', 'Alice'],
#    'Age': [25, 30, 22, 25, 30],
#    'City': ['New York', 'Paris', 'London', 'New York', 'Paris']
#})
#your_dataframe

In [5]:
merged_df = fill_duplicates_with_none(merged_df)

In [6]:
merged_df

Unnamed: 0,name,club,profile_image,year,market_value
0,Kevin De Bruyne,Manchester City,https://img.a.transfermarkt.technology/portrai...,32,€70.00m
1,Harry Kane,Bayern Munich,https://img.a.transfermarkt.technology/portrai...,30,€110.00m
2,Mohamed Salah,Liverpool FC,https://img.a.transfermarkt.technology/portrai...,31,€65.00m
3,Sadio Mané,Al-Nassr FC,https://img.a.transfermarkt.technology/portrai...,31,€25.00m
4,,,,,
...,...,...,...,...,...
174319,,,,,
174320,,,,,
174321,,,,,
174322,,,,,


In [7]:
df = pd.read_csv("../raw_data/clean_data.csv")

In [8]:
final_df = pd.merge(merged_df, df, left_index=True, right_index=True, how='inner')

In [9]:
final_df

Unnamed: 0,name_x,club_x,profile_image,year,market_value,name_y,club_y,nat,position,dob,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
0,Kevin De Bruyne,Manchester City,https://img.a.transfermarkt.technology/portrai...,32,€70.00m,Kevin De Bruyne,Man City,BEL,"M (RLC), AM (C)",28/6/1991 (29 years old),...,1,0,0,93.8,1793.71,2013.0,0,1,0,0
1,Harry Kane,Bayern Munich,https://img.a.transfermarkt.technology/portrai...,30,€110.00m,Harry Kane,Tottenham,ENG,"AM (C), ST (C)",28/7/1993 (27 years old),...,0,1,0,93.8,1807.88,1837.0,1,0,0,0
2,Mohamed Salah,Liverpool FC,https://img.a.transfermarkt.technology/portrai...,31,€65.00m,Mohamed Salah,Liverpool,EGY,"AM (RL), ST (C)",15/6/1992 (28 years old),...,0,0,1,93.8,1511.95,2042.0,0,0,1,0
3,Sadio Mané,Al-Nassr FC,https://img.a.transfermarkt.technology/portrai...,31,€25.00m,Sadio Mané,Liverpool,SEN,"AM (RL), ST (C)",10/4/1992 (28 years old),...,0,0,1,93.8,1600.82,2042.0,0,0,0,1
4,,,,,,Son Heung-Min,Tottenham,KOR,"M/AM (RL), ST (C)",8/7/1992 (28 years old),...,0,0,1,93.8,1540.35,1837.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,,,,,,Nick Brown,Charlotte FC Academy,USA,ST (C),13/12/2004 (15 years old),...,0,1,0,40.8,1675.89,1156.0,0,0,0,1
174320,,,,,,Luis Alcalá,Charlotte FC Academy,USA,ST (C),11/5/2004 (16 years old),...,0,1,0,40.8,1675.89,1156.0,0,0,0,1
174321,,,,,,Devin Lee,Charlotte FC Academy,USA,D (C),4/4/2003 (17 years old),...,0,0,0,40.8,1675.89,1156.0,0,0,0,1
174322,,,,,,Yeferson Suárez,Charlotte FC Academy,USA,M (C),4/1/2004 (16 years old),...,1,0,0,40.8,1675.89,1156.0,0,0,0,1


In [10]:
final_df[final_df.club_x == 'Inter Miami CF']

Unnamed: 0,name_x,club_x,profile_image,year,market_value,name_y,club_y,nat,position,dob,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
655,Sergiy Kryvtsov,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,32,€2.00m,Sergiy Kryvtsov,Shakhtar,UKR,D (C),15/3/1991 (29 years old),...,0,0,0,69.5,1549.33,1714.0,0,0,0,1
667,Kieran Gibbs,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,34,-,Kieran Gibbs,West Brom,ENG,D/WB (L),26/9/1989 (30 years old),...,0,0,0,93.8,1807.88,1478.0,0,0,1,0
709,DeAndre Yedlin,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,30,€2.00m,DeAndre Yedlin,Galatasaray,USA,D/WB (R),9/7/1993 (27 years old),...,0,0,0,80.8,1675.89,1524.0,0,0,0,1
2091,Nicolás Stefanelli,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,29,€1.20m,Nicolás Stefanelli,AIK,ARG,"AM (RLC), ST (C)",22/1/1994 (26 years old),...,0,1,0,74.2,1861.29,1538.0,0,0,0,1
3033,Sergio Busquets,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,35,€3.50m,Sergi,Istra 1961,ESP,D/WB (L),26/5/1995 (25 years old),...,0,0,0,40.8,1725.97,1245.0,0,0,1,0
3495,Robert Taylor,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,29,€1.50m,Robert Taylor,Brann,FIN,D/WB/M/AM (R),21/10/1994 (25 years old),...,0,0,0,72.1,1388.46,1156.0,0,0,0,1
5923,Christopher McVey,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,26,€600k,Christopher McVey,IF Elfsborg,SWE,D/WB (R),12/4/1997 (23 years old),...,0,0,0,74.2,1545.76,1468.0,0,0,0,1
15681,Diego Gómez,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,20,€3.00m,Diego Gómez,Other,COL,AM (RLC),21/10/1988 (31 years old),...,1,0,0,40.8,1626.6,1156.0,0,0,0,1
18197,Harvey Neville,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,21,€175k,Harvey Neville,Man Utd,IRL,D (R),26/6/2002 (18 years old),...,0,0,0,93.8,1406.94,1717.0,0,0,0,1
68559,Lionel Messi,Inter Miami CF,https://img.a.transfermarkt.technology/portrai...,36,€35.00m,Lionel Messi,Barcelona,ARG,"AM (RC), ST (C)",24/6/1987 (33 years old),...,0,1,0,90.8,1861.29,1851.0,0,0,1,0


In [11]:
final_df.columns

Index(['name_x', 'club_x', 'profile_image', 'year', 'market_value', 'name_y',
       'club_y', 'nat', 'position', 'dob', 'age', 'height', 'weight', 'wage',
       'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th', 'lon',
       'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen', 'pas',
       'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec', 'hea',
       'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc', 'dri',
       'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1', 'tro',
       'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right'],
      dtype='object')

In [12]:
def fill_none_with_values(df):
    for index, row in df.iterrows():
        if row['name_x'] == 'None' or row['club_x'] == 'None':
            df.at[index, 'name_x'] = row['name_y']
            df.at[index, 'club_x'] = row['club_y']

    return df

In [13]:
final_df = fill_none_with_values(final_df)

In [14]:
final_df

Unnamed: 0,name_x,club_x,profile_image,year,market_value,name_y,club_y,nat,position,dob,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
0,Kevin De Bruyne,Manchester City,https://img.a.transfermarkt.technology/portrai...,32,€70.00m,Kevin De Bruyne,Man City,BEL,"M (RLC), AM (C)",28/6/1991 (29 years old),...,1,0,0,93.8,1793.71,2013.0,0,1,0,0
1,Harry Kane,Bayern Munich,https://img.a.transfermarkt.technology/portrai...,30,€110.00m,Harry Kane,Tottenham,ENG,"AM (C), ST (C)",28/7/1993 (27 years old),...,0,1,0,93.8,1807.88,1837.0,1,0,0,0
2,Mohamed Salah,Liverpool FC,https://img.a.transfermarkt.technology/portrai...,31,€65.00m,Mohamed Salah,Liverpool,EGY,"AM (RL), ST (C)",15/6/1992 (28 years old),...,0,0,1,93.8,1511.95,2042.0,0,0,1,0
3,Sadio Mané,Al-Nassr FC,https://img.a.transfermarkt.technology/portrai...,31,€25.00m,Sadio Mané,Liverpool,SEN,"AM (RL), ST (C)",10/4/1992 (28 years old),...,0,0,1,93.8,1600.82,2042.0,0,0,0,1
4,Son Heung-Min,Tottenham,,,,Son Heung-Min,Tottenham,KOR,"M/AM (RL), ST (C)",8/7/1992 (28 years old),...,0,0,1,93.8,1540.35,1837.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,Nick Brown,Charlotte FC Academy,,,,Nick Brown,Charlotte FC Academy,USA,ST (C),13/12/2004 (15 years old),...,0,1,0,40.8,1675.89,1156.0,0,0,0,1
174320,Luis Alcalá,Charlotte FC Academy,,,,Luis Alcalá,Charlotte FC Academy,USA,ST (C),11/5/2004 (16 years old),...,0,1,0,40.8,1675.89,1156.0,0,0,0,1
174321,Devin Lee,Charlotte FC Academy,,,,Devin Lee,Charlotte FC Academy,USA,D (C),4/4/2003 (17 years old),...,0,0,0,40.8,1675.89,1156.0,0,0,0,1
174322,Yeferson Suárez,Charlotte FC Academy,,,,Yeferson Suárez,Charlotte FC Academy,USA,M (C),4/1/2004 (16 years old),...,1,0,0,40.8,1675.89,1156.0,0,0,0,1


In [15]:
final_df.columns

Index(['name_x', 'club_x', 'profile_image', 'year', 'market_value', 'name_y',
       'club_y', 'nat', 'position', 'dob', 'age', 'height', 'weight', 'wage',
       'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th', 'lon',
       'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen', 'pas',
       'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec', 'hea',
       'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc', 'dri',
       'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1', 'tro',
       'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right'],
      dtype='object')

In [16]:
def extract_date(input_string):
# Use regular expression to extract the date
    date_match = re.search(r'\b\d{1,2}/\d{1,2}/\d{4}\b', input_string)

    if date_match:
        return date_match.group()
    else:
        return None

final_df.dob = final_df.dob.apply(extract_date)

In [17]:
final_df

Unnamed: 0,name_x,club_x,profile_image,year,market_value,name_y,club_y,nat,position,dob,...,midfielder,striker,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right
0,Kevin De Bruyne,Manchester City,https://img.a.transfermarkt.technology/portrai...,32,€70.00m,Kevin De Bruyne,Man City,BEL,"M (RLC), AM (C)",28/6/1991,...,1,0,0,93.8,1793.71,2013.0,0,1,0,0
1,Harry Kane,Bayern Munich,https://img.a.transfermarkt.technology/portrai...,30,€110.00m,Harry Kane,Tottenham,ENG,"AM (C), ST (C)",28/7/1993,...,0,1,0,93.8,1807.88,1837.0,1,0,0,0
2,Mohamed Salah,Liverpool FC,https://img.a.transfermarkt.technology/portrai...,31,€65.00m,Mohamed Salah,Liverpool,EGY,"AM (RL), ST (C)",15/6/1992,...,0,0,1,93.8,1511.95,2042.0,0,0,1,0
3,Sadio Mané,Al-Nassr FC,https://img.a.transfermarkt.technology/portrai...,31,€25.00m,Sadio Mané,Liverpool,SEN,"AM (RL), ST (C)",10/4/1992,...,0,0,1,93.8,1600.82,2042.0,0,0,0,1
4,Son Heung-Min,Tottenham,,,,Son Heung-Min,Tottenham,KOR,"M/AM (RL), ST (C)",8/7/1992,...,0,0,1,93.8,1540.35,1837.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,Nick Brown,Charlotte FC Academy,,,,Nick Brown,Charlotte FC Academy,USA,ST (C),13/12/2004,...,0,1,0,40.8,1675.89,1156.0,0,0,0,1
174320,Luis Alcalá,Charlotte FC Academy,,,,Luis Alcalá,Charlotte FC Academy,USA,ST (C),11/5/2004,...,0,1,0,40.8,1675.89,1156.0,0,0,0,1
174321,Devin Lee,Charlotte FC Academy,,,,Devin Lee,Charlotte FC Academy,USA,D (C),4/4/2003,...,0,0,0,40.8,1675.89,1156.0,0,0,0,1
174322,Yeferson Suárez,Charlotte FC Academy,,,,Yeferson Suárez,Charlotte FC Academy,USA,M (C),4/1/2004,...,1,0,0,40.8,1675.89,1156.0,0,0,0,1


In [18]:
df = final_df.copy()

In [19]:
# Convert 'dob' column to datetime
df['dob'] = pd.to_datetime(df['dob'], errors='coerce')

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df['dob'], errors='coerce')
  df['dob'] = pd.to_datetime(df

In [20]:
# Convert 'dob' column to datetime
df['dob'] = pd.to_datetime(df['dob'])

# Calculate the age based on the current date
current_date = datetime.now()
df['age_in_days'] = (current_date - df['dob'])

# Convert age in days to years
df['current_age'] = df['age_in_days'] / 365.25  # Allowing for leap years

# Convert 'dob' column to string
df['current_age'] = df['current_age'].astype(str)


# Extract the days portion and convert it to an integer
df['current_age'] = df['current_age'].str.extract('(\d+)').astype(int)

In [21]:
df.current_age

0         32
1         30
2         31
3         31
4         31
          ..
174319    18
174320    19
174321    20
174322    19
174323    19
Name: current_age, Length: 174324, dtype: int64

In [22]:
df.columns

Index(['name_x', 'club_x', 'profile_image', 'year', 'market_value', 'name_y',
       'club_y', 'nat', 'position', 'dob', 'age', 'height', 'weight', 'wage',
       'last trans. fee', 'value', 'agg', 'jum', 'pun', 'vis', 'l th', 'lon',
       'otb', 'tck', 'tec', 'tea', 'cmp', 'fre', 'ref', 'pos', 'pen', 'pas',
       'fla', 'ant', 'cro', 'mar', 'ldr', 'cor', 'cnt', 'det', 'dec', 'hea',
       'fir', 'com', 'acc', 'pac', 'aer', 'str', 'thr', 'han', 'ecc', 'dri',
       'bal', 'kic', 'sta', 'agi', 'wor', 'bra', 'cmd', 'fin', '1v1', 'tro',
       'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
       'winger', 'division_rating', 'nat_rating', 'club_rating', 'either_left',
       'either_right', 'left', 'right', 'age_in_days', 'current_age'],
      dtype='object')

In [23]:
good_df = df[['name_y', 'club_x', 'nat','profile_image', 'value', 'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
   'winger', 'either_left','either_right', 'left', 'right','current_age']]

In [24]:
good_df.head(20)

Unnamed: 0,name_y,club_x,nat,profile_image,value,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right,current_age
0,Kevin De Bruyne,Manchester City,BEL,https://img.a.transfermarkt.technology/portrai...,94000000.0,0,0,0,1,0,0,0,1,0,0,32
1,Harry Kane,Bayern Munich,ENG,https://img.a.transfermarkt.technology/portrai...,88000000.0,0,0,0,0,1,0,1,0,0,0,30
2,Mohamed Salah,Liverpool FC,EGY,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,1,0,31
3,Sadio Mané,Al-Nassr FC,SEN,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,0,1,31
4,Son Heung-Min,Tottenham,KOR,,77000000.0,0,0,0,0,0,1,1,0,0,0,31
5,Raheem Sterling,Chelsea FC,ENG,https://img.a.transfermarkt.technology/portrai...,76000000.0,0,0,0,0,0,1,0,0,0,1,29
6,Bernardo Silva,Manchester City,POR,https://img.a.transfermarkt.technology/portrai...,76000000.0,0,0,0,1,0,0,0,0,1,0,29
7,Roberto Firmino,Al-Ahli SFC,BRA,https://img.a.transfermarkt.technology/portrai...,73000000.0,0,0,0,0,1,0,0,0,0,1,32
8,Virgil van Dijk,Liverpool FC,NED,https://img.a.transfermarkt.technology/portrai...,71000000.0,1,0,0,0,0,0,0,0,0,1,32
9,Bruno Fernandes,Manchester United,POR,https://img.a.transfermarkt.technology/portrai...,69000000.0,0,0,0,1,0,0,0,0,0,1,29


In [25]:
good_df.columns = ['name_y', 'club_name', 'nat','profile_image', 'value', 'centerback', 'fullback', 'goalkeeper', 'midfielder', 'striker',
   'winger', 'either_left','either_right', 'left', 'right','current_age']

In [26]:
good_df

Unnamed: 0,name_y,club_name,nat,profile_image,value,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right,current_age
0,Kevin De Bruyne,Manchester City,BEL,https://img.a.transfermarkt.technology/portrai...,94000000.0,0,0,0,1,0,0,0,1,0,0,32
1,Harry Kane,Bayern Munich,ENG,https://img.a.transfermarkt.technology/portrai...,88000000.0,0,0,0,0,1,0,1,0,0,0,30
2,Mohamed Salah,Liverpool FC,EGY,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,1,0,31
3,Sadio Mané,Al-Nassr FC,SEN,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,0,1,31
4,Son Heung-Min,Tottenham,KOR,,77000000.0,0,0,0,0,0,1,1,0,0,0,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,Nick Brown,Charlotte FC Academy,USA,,110.0,0,0,0,0,1,0,0,0,0,1,18
174320,Luis Alcalá,Charlotte FC Academy,USA,,110.0,0,0,0,0,1,0,0,0,0,1,19
174321,Devin Lee,Charlotte FC Academy,USA,,110.0,1,0,0,0,0,0,0,0,0,1,20
174322,Yeferson Suárez,Charlotte FC Academy,USA,,110.0,0,0,0,1,0,0,0,0,0,1,19


In [27]:
clubs = pd.read_csv("../raw_data/clubs_logo.csv")

In [28]:
clubs

Unnamed: 0,club_name,club_logo,fm_name
0,Manchester City,https://tmssl.akamaized.net/images/wappen/smal...,Man City
1,Tottenham Hotspur,https://tmssl.akamaized.net/images/wappen/smal...,Tottenham
2,Liverpool FC,https://tmssl.akamaized.net/images/wappen/smal...,Liverpool
3,Manchester United,https://tmssl.akamaized.net/images/wappen/smal...,Man Utd
4,Aston Villa,https://tmssl.akamaized.net/images/wappen/smal...,Aston Villa
...,...,...,...
3735,Daytona Rush SC,https://tmssl.akamaized.net/images/wappen/smal...,Daytona Rush
3736,,,Eastside FC
3737,OVF Alliance,https://tmssl.akamaized.net/images/wappen/smal...,OVF Alliance
3738,Austin FC Academy,https://tmssl.akamaized.net/images/wappen/smal...,Austin FC Academy


In [63]:
good_df.club_name[4]

'Tottenham'

In [86]:
clubs[clubs.fm_name == 'Al-Nassr (KSA)'].club_logo

2989    None
Name: club_logo, dtype: object

if club_name in good_df == club_name or fm_name in the clubs dataframe than 
set the club_link column to club_logo of the clubs dataframe

In [58]:
def make_links(df, clubs):
    links = []
    for club in df.club_name:
        if club == clubs.fm_name:
            result = clubs[clubs.fm_name == club].club_logo.iloc[0]
        elif club == clubs.club_name:
            result = clubs[clubs.club_name == club].club_logo.iloc[0]
        return links.append(result)
    
    return links
    

In [59]:
good_df[:5]

Unnamed: 0,name_y,club_name,nat,profile_image,value,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right,current_age
0,Kevin De Bruyne,Manchester City,BEL,https://img.a.transfermarkt.technology/portrai...,94000000.0,0,0,0,1,0,0,0,1,0,0,32
1,Harry Kane,Bayern Munich,ENG,https://img.a.transfermarkt.technology/portrai...,88000000.0,0,0,0,0,1,0,1,0,0,0,30
2,Mohamed Salah,Liverpool FC,EGY,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,1,0,31
3,Sadio Mané,Al-Nassr FC,SEN,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,0,1,31
4,Son Heung-Min,Tottenham,KOR,,77000000.0,0,0,0,0,0,1,1,0,0,0,31


In [70]:
def make_links2(df, clubs):
    links = []
    for club in df.club_name:
        if club in clubs.fm_name.values:
            result = clubs[clubs.fm_name == club].club_logo.iloc[0]
        elif club in clubs.club_name.values:
            result = clubs[clubs.club_name == club].club_logo.iloc[0]
        else:
            result = None  # Handle the case where no match is found
        links.append(result)
    
    return links

In [73]:
club_links = make_links2(good_df, clubs)

In [74]:
good_df['club_image'] = club_links

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  good_df['club_image'] = club_links


In [76]:
good_df.head(20)

Unnamed: 0,name_y,club_name,nat,profile_image,value,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right,current_age,club_image
0,Kevin De Bruyne,Manchester City,BEL,https://img.a.transfermarkt.technology/portrai...,94000000.0,0,0,0,1,0,0,0,1,0,0,32,https://tmssl.akamaized.net/images/wappen/smal...
1,Harry Kane,Bayern Munich,ENG,https://img.a.transfermarkt.technology/portrai...,88000000.0,0,0,0,0,1,0,1,0,0,0,30,https://tmssl.akamaized.net/images/wappen/smal...
2,Mohamed Salah,Liverpool FC,EGY,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,1,0,31,https://tmssl.akamaized.net/images/wappen/smal...
3,Sadio Mané,Al-Nassr FC,SEN,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,0,1,31,
4,Son Heung-Min,Tottenham,KOR,,77000000.0,0,0,0,0,0,1,1,0,0,0,31,https://tmssl.akamaized.net/images/wappen/smal...
5,Raheem Sterling,Chelsea FC,ENG,https://img.a.transfermarkt.technology/portrai...,76000000.0,0,0,0,0,0,1,0,0,0,1,29,
6,Bernardo Silva,Manchester City,POR,https://img.a.transfermarkt.technology/portrai...,76000000.0,0,0,0,1,0,0,0,0,1,0,29,https://tmssl.akamaized.net/images/wappen/smal...
7,Roberto Firmino,Al-Ahli SFC,BRA,https://img.a.transfermarkt.technology/portrai...,73000000.0,0,0,0,0,1,0,0,0,0,1,32,
8,Virgil van Dijk,Liverpool FC,NED,https://img.a.transfermarkt.technology/portrai...,71000000.0,1,0,0,0,0,0,0,0,0,1,32,https://tmssl.akamaized.net/images/wappen/smal...
9,Bruno Fernandes,Manchester United,POR,https://img.a.transfermarkt.technology/portrai...,69000000.0,0,0,0,1,0,0,0,0,0,1,29,https://tmssl.akamaized.net/images/wappen/smal...


In [87]:
xmv = pd.read_csv("../raw_data/df_with_XMV.csv")

In [88]:
xmv

Unnamed: 0,name,club,nat,position,dob,age,height,weight,wage,last trans. fee,...,winger,division_rating,nat_rating,club_rating,either_left,either_right,left,right,log_XMV,XMV
0,Kevin De Bruyne,Man City,BEL,"M (RLC), AM (C)",28/6/1991 (29 years old),29.0,181.0,68.0,1091000.0,60000000.0,...,0,93.8,1793.71,2013.0,0,1,0,0,18.780400,1.432926e+08
1,Harry Kane,Tottenham,ENG,"AM (C), ST (C)",28/7/1993 (27 years old),26.0,188.0,86.0,948000.0,0.0,...,0,93.8,1807.88,1837.0,1,0,0,0,18.609700,1.208063e+08
2,Mohamed Salah,Liverpool,EGY,"AM (RL), ST (C)",15/6/1992 (28 years old),28.0,175.0,72.0,948000.0,37500000.0,...,1,93.8,1511.95,2042.0,0,0,1,0,18.454720,1.034625e+08
3,Sadio Mané,Liverpool,SEN,"AM (RL), ST (C)",10/4/1992 (28 years old),28.0,175.0,69.0,854000.0,40000000.0,...,1,93.8,1600.82,2042.0,0,0,0,1,17.908072,5.989308e+07
4,Son Heung-Min,Tottenham,KOR,"M/AM (RL), ST (C)",8/7/1992 (28 years old),28.0,183.0,77.0,450000.0,24000000.0,...,1,93.8,1540.35,1837.0,1,0,0,0,17.961506,6.318048e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,Nick Brown,Charlotte FC Academy,USA,ST (C),13/12/2004 (15 years old),15.0,181.0,65.0,9.0,0.0,...,0,40.8,1675.89,1156.0,0,0,0,1,3.883437,4.859093e+01
174320,Luis Alcalá,Charlotte FC Academy,USA,ST (C),11/5/2004 (16 years old),16.0,178.0,65.0,9.0,0.0,...,0,40.8,1675.89,1156.0,0,0,0,1,4.532295,9.297166e+01
174321,Devin Lee,Charlotte FC Academy,USA,D (C),4/4/2003 (17 years old),17.0,196.0,82.0,9.0,0.0,...,0,40.8,1675.89,1156.0,0,0,0,1,4.509244,9.085310e+01
174322,Yeferson Suárez,Charlotte FC Academy,USA,M (C),4/1/2004 (16 years old),16.0,162.0,55.0,9.0,0.0,...,0,40.8,1675.89,1156.0,0,0,0,1,4.691098,1.089727e+02


In [89]:
xmv.XMV

0         1.432926e+08
1         1.208063e+08
2         1.034625e+08
3         5.989308e+07
4         6.318048e+07
              ...     
174319    4.859093e+01
174320    9.297166e+01
174321    9.085310e+01
174322    1.089727e+02
174323    2.017134e+02
Name: XMV, Length: 174324, dtype: float64

In [90]:
good_df['xmv'] = xmv.XMV

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  good_df['xmv'] = xmv.XMV


In [91]:
good_df

Unnamed: 0,name_y,club_name,nat,profile_image,value,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right,current_age,club_image,xmv
0,Kevin De Bruyne,Manchester City,BEL,https://img.a.transfermarkt.technology/portrai...,94000000.0,0,0,0,1,0,0,0,1,0,0,32,https://tmssl.akamaized.net/images/wappen/smal...,1.432926e+08
1,Harry Kane,Bayern Munich,ENG,https://img.a.transfermarkt.technology/portrai...,88000000.0,0,0,0,0,1,0,1,0,0,0,30,https://tmssl.akamaized.net/images/wappen/smal...,1.208063e+08
2,Mohamed Salah,Liverpool FC,EGY,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,1,0,31,https://tmssl.akamaized.net/images/wappen/smal...,1.034625e+08
3,Sadio Mané,Al-Nassr FC,SEN,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,0,1,31,,5.989308e+07
4,Son Heung-Min,Tottenham,KOR,,77000000.0,0,0,0,0,0,1,1,0,0,0,31,https://tmssl.akamaized.net/images/wappen/smal...,6.318048e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174319,Nick Brown,Charlotte FC Academy,USA,,110.0,0,0,0,0,1,0,0,0,0,1,18,https://tmssl.akamaized.net/images/wappen/smal...,4.859093e+01
174320,Luis Alcalá,Charlotte FC Academy,USA,,110.0,0,0,0,0,1,0,0,0,0,1,19,https://tmssl.akamaized.net/images/wappen/smal...,9.297166e+01
174321,Devin Lee,Charlotte FC Academy,USA,,110.0,1,0,0,0,0,0,0,0,0,1,20,https://tmssl.akamaized.net/images/wappen/smal...,9.085310e+01
174322,Yeferson Suárez,Charlotte FC Academy,USA,,110.0,0,0,0,1,0,0,0,0,0,1,19,https://tmssl.akamaized.net/images/wappen/smal...,1.089727e+02


In [93]:
good_df.to_csv("../raw_data/final_df.csv")

In [98]:
good_df.head(225)

Unnamed: 0,name_y,club_name,nat,profile_image,value,centerback,fullback,goalkeeper,midfielder,striker,winger,either_left,either_right,left,right,current_age,club_image,xmv
0,Kevin De Bruyne,Manchester City,BEL,https://img.a.transfermarkt.technology/portrai...,94000000.0,0,0,0,1,0,0,0,1,0,0,32,https://tmssl.akamaized.net/images/wappen/smal...,143292608.0
1,Harry Kane,Bayern Munich,ENG,https://img.a.transfermarkt.technology/portrai...,88000000.0,0,0,0,0,1,0,1,0,0,0,30,https://tmssl.akamaized.net/images/wappen/smal...,120806336.0
2,Mohamed Salah,Liverpool FC,EGY,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,1,0,31,https://tmssl.akamaized.net/images/wappen/smal...,103462472.0
3,Sadio Mané,Al-Nassr FC,SEN,https://img.a.transfermarkt.technology/portrai...,84000000.0,0,0,0,0,0,1,0,0,0,1,31,,59893076.0
4,Son Heung-Min,Tottenham,KOR,,77000000.0,0,0,0,0,0,1,1,0,0,0,31,https://tmssl.akamaized.net/images/wappen/smal...,63180480.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,Ashley Barnes,Norwich City,ENG,https://img.a.transfermarkt.technology/portrai...,18250000.0,0,0,0,0,0,1,0,0,0,1,34,https://tmssl.akamaized.net/images/wappen/smal...,7510230.0
221,Brandon Williams,Ipswich Town,ENG,https://img.a.transfermarkt.technology/portrai...,18000000.0,0,1,0,0,0,0,0,0,0,1,23,https://tmssl.akamaized.net/images/wappen/smal...,13879331.0
222,Adam Webster,Brighton & Hove Albion,ENG,https://img.a.transfermarkt.technology/portrai...,17750000.0,1,0,0,0,0,0,0,1,0,0,28,https://tmssl.akamaized.net/images/wappen/smal...,17065880.0
223,Jan Bednarek,Southampton FC,POL,https://img.a.transfermarkt.technology/portrai...,17750000.0,1,0,0,0,0,0,0,0,0,1,27,https://tmssl.akamaized.net/images/wappen/smal...,15457784.0
