In [None]:
# Imports
import os
import unicodedata

import pandas as pd
import re

from typing import Tuple

import time

In [41]:
# Functions
def convert_valuecol(df: pd.DataFrame, col: str)-> Tuple[float]:
    """
    Function to convert columns containing quantitative value to euros per millions
    
    Parameters
    ----------
    df : pd.DataFrame
        dataframe
    col : str
        column name
    
    Output
    ------
    value : list
        convert value in float
    """

    valuelistm = []

    for val in df[col].astype(str):
        numbers = re.findall('\\d+\\.\\d+|\\d+', val)

        if 'm' in val:
            valuelistm.append(float(numbers[0]))
    
        elif 'k' in val:
            valuelistm.append(float(numbers[0])/1e3)
    
        else:
            valuelistm.append(float(0))

    return(valuelistm)



In [2]:
# Read team table csv
teamdf = pd.read_csv(os.getcwd() + '/' + 'data/currentteaminfo.csv')
teamdf

Unnamed: 0,Player,Age,Position,Value (€ in M)
0,David Raya,29,Goalkeeper,40.0
1,Kepa Arrizabalaga,30,Goalkeeper,10.0
2,Karl Hein,23,Goalkeeper,3.0
3,William Saliba,24,Defender,80.0
4,Gabriel Magalhães,27,Defender,75.0
5,Cristhian Mosquera,21,Defender,30.0
6,Jakub Kiwior,25,Defender,28.0
7,Myles Lewis-Skelly,18,Defender,45.0
8,Riccardo Calafiori,23,Defender,35.0
9,Oleksandr Zinchenko,28,Defender,20.0


In [15]:
marktest = pd.read_csv(os.getcwd() + '/' + 'data/players/kai-havertz/marketinfo.csv')
trantest = pd.read_csv(os.getcwd() + '/' + 'data/players/kai-havertz/transferinfo.csv')

marktest.head()

Unnamed: 0,y,mw,datum_mw,verein,age
0,100000,€100k,"Jul 1, 2016",Bayer 04 Leverkusen,17
1,1500000,€1.50m,"Feb 7, 2017",Bayer 04 Leverkusen,17
2,5000000,€5.00m,"Mar 22, 2017",Bayer 04 Leverkusen,17
3,8000000,€8.00m,"Jun 19, 2017",Bayer 04 Leverkusen,18
4,18000000,€18.00m,"Dec 28, 2017",Bayer 04 Leverkusen,18


In [16]:
marktest.dtypes

y            int64
mw          object
datum_mw    object
verein      object
age          int64
dtype: object

In [17]:
# y column is mw in int, convert to millions and type float
# convert datum_mw to datetime
marktest['value (euros in millions)'] = marktest.y/1000000
marktest['value (euros in millions)'] = marktest['value (euros in millions)'].astype(float)
marktest['Date'] = pd.to_datetime(marktest['datum_mw'])
marktest.rename(columns= {'verein': 'Team'}, inplace=True)

marktest.head()

Unnamed: 0,y,mw,datum_mw,Team,age,value (euros in millions),Date
0,100000,€100k,"Jul 1, 2016",Bayer 04 Leverkusen,17,0.1,2016-07-01
1,1500000,€1.50m,"Feb 7, 2017",Bayer 04 Leverkusen,17,1.5,2017-02-07
2,5000000,€5.00m,"Mar 22, 2017",Bayer 04 Leverkusen,17,5.0,2017-03-22
3,8000000,€8.00m,"Jun 19, 2017",Bayer 04 Leverkusen,18,8.0,2017-06-19
4,18000000,€18.00m,"Dec 28, 2017",Bayer 04 Leverkusen,18,18.0,2017-12-28


In [8]:
trantest.head()


Unnamed: 0,date,season,marketValue,fee,to.clubName
0,"Jul 1, 2023",23/24,€55.00m,€75.00m,Arsenal
1,"Sep 4, 2020",20/21,€81.00m,€80.00m,Chelsea
2,"Jul 1, 2016",16/17,€100k,-,B. Leverkusen
3,"Jul 1, 2014",14/15,-,-,Leverkusen U17
4,"Jul 1, 2010",10/11,-,free transfer,Leverkusen Yth.


In [10]:
trantest.dtypes

date           object
season         object
marketValue    object
fee            object
to.clubName    object
dtype: object

In [18]:
# convert date to datetime
# convert marketvalue to int and in millions scale
# convert fee to int and in millions scale

trantest['Date'] = pd.to_datetime(trantest['date'])
trantest['season'] = trantest['season'].astype(str)

trantest

Unnamed: 0,date,season,marketValue,fee,to.clubName,Date
0,"Jul 1, 2023",23/24,€55.00m,€75.00m,Arsenal,2023-07-01
1,"Sep 4, 2020",20/21,€81.00m,€80.00m,Chelsea,2020-09-04
2,"Jul 1, 2016",16/17,€100k,-,B. Leverkusen,2016-07-01
3,"Jul 1, 2014",14/15,-,-,Leverkusen U17,2014-07-01
4,"Jul 1, 2010",10/11,-,free transfer,Leverkusen Yth.,2010-07-01
5,"Jul 1, 2009",09/10,-,free transfer,Aachen Yth.,2009-07-01


In [42]:
trantest['marketValue (euros in millions)'] = convert_valuecol(trantest,'marketValue')
trantest['Fee (euros in millions)'] = convert_valuecol(trantest,'fee')
trantest

Unnamed: 0,date,season,marketValue,fee,to.clubName,Date,marketValue (euros in millions),Fee (euros in millions)
0,"Jul 1, 2023",23/24,€55.00m,€75.00m,Arsenal,2023-07-01,55.0,75.0
1,"Sep 4, 2020",20/21,€81.00m,€80.00m,Chelsea,2020-09-04,81.0,80.0
2,"Jul 1, 2016",16/17,€100k,-,B. Leverkusen,2016-07-01,0.1,0.0
3,"Jul 1, 2014",14/15,-,-,Leverkusen U17,2014-07-01,0.0,0.0
4,"Jul 1, 2010",10/11,-,free transfer,Leverkusen Yth.,2010-07-01,0.0,0.0
5,"Jul 1, 2009",09/10,-,free transfer,Aachen Yth.,2009-07-01,0.0,0.0
