In [48]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PowerTransformer, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import pickle

df = pd.read_csv('fifa21_train.csv')



#### Subfunction

In [49]:
def standadize(df2):
    
    column_stand = []
    [column_stand.append(i.lower().replace(' ', '_')) for i in df2.columns]
    df2.columns = column_stand
    df2 = df2.rename(columns={'team_&_contract':'team_contract'})    
    return df2


def add (df):    
    col = ['ls', 'st', 'rs','lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm',
       'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb','rcb', 'rb', 'gk']
    for col in col: 
        ml = []        
        for elem in df[col]:            
            add1 = (elem[1].strip())
            add2 = (elem[0].strip())
            ml.append(int(add1+add2))
        df[col] = ml
   
    return df


def lbs (df2):   
    ml = []    
    for elem in df2['weight']:        
        ml.append(int(elem.replace('lbs', '')))
    df2['weight'] = ml    
    return df2


def finance (df2):    
    col = ['wage', 'hits', 'release_clause', 'value']    
    for col in col:        
        ml=[]     
        for elem in df2[col]:            
            ml.append(float(elem.replace('€', '').replace('.', '').replace('M', '00000').replace('K', '000')))
        df2[col] = ml
    return df2
        

def star (df2):    
    col = ['w/f', 'sm', 'ir']    
    for col in col:         
        ml = []
        for elem in df2[col]:
            if '★' in elem:
                ml.append(float((elem.replace('★','').strip())))
            else: 
                ml.append(float(elm.strip()))                
        df2[col] = ml        
    return df2


def nan (df2):    
    col = ['club', 'position', 'joined', 'volleys', 'curve', 'agility', 'balance', 
          'balance', 'jumping', 'interceptions', 'vision', 'sliding_tackle', 'a/w', 'd/w' ]    
    for col in col:        
        df2 = df2[df2[col].isna()==False]    
    #dealing with composure:    
    composure_mean = df2['composure'].mean()
    df2['composure'] = df2['composure'].fillna(composure_mean)    
    #dealing with position    
    df2['position'] = df2['position'].fillna(df2['bp'])    
    #loan_date_end    
    df2 = df2.drop('loan_date_end', axis = 1)    
    return df2


def cleanhight(df2):    
    
    def clean_comma(x):
        if ( '"' in x ):
            x = x.replace('"','')
        return x
    
    df2['height'] = list(map(clean_comma,df2['height']))
    
    feet = df2['height'].apply(lambda x: int(x.split("'")[0]))
    inches= df2['height'].apply(lambda x: int(x.split("'")[1]))
    feet_in_cm = feet * 30.48
    inches_in_cm = inches* 2.54
    df2['height']= feet_in_cm + inches_in_cm    
    return df2





#### Clean Function

In [50]:
def clean_dataset(df):    
    
    df2 = df.copy()
    df2 = standadize(df2)
    df2 = nan(df2)    
    df2 = finance(df2)
    df2 = add(df2)
    df2 = lbs(df2)
    df2 = star(df2)
    df2 = cleanhight(df2)
    
    return df2

dfn = clean_dataset(df)


#### Linear Model

In [51]:
def linear_model(df):
    
    X = df[['reactions', 'base_stats', 'composure']]
    y = df[['ova']]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100)

    pt = PowerTransformer()
    pt.fit(X_train)
    X_train_pt = pt.transform(X_train)
    X_test_pt = pt.transform(X_test)

    scaler = MinMaxScaler()
    scaler.fit(X_train_pt)
    X_train_pt_scaled = scaler.transform(X_train_pt)
    X_test_pt_scaled = scaler.transform(X_test_pt)
 
    model = pickle.load(open('model_group4.sav', 'rb'))
    result = model.score(X_test_pt_scaled, y_test)
    print(result)
    
    return result
