In [47]:
import pandas as pd
import seaborn as sns
import numpy as np

from pandas.api.types import CategoricalDtype

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVR, SVC

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, accuracy_score

import warnings
warnings.filterwarnings('ignore')

In [48]:
df = pd.read_csv("https://raw.githubusercontent.com/Gabvaztor/data_science_apr_2021/main/week10_ML_competition_pca_kmeans/day1_gridsearch_pipelines/data/Euro_2012_stats_TEAM.csv")

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 35 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Team                        16 non-null     object 
 1   Goals                       16 non-null     int64  
 2   Shots on target             16 non-null     int64  
 3   Shots off target            16 non-null     int64  
 4   Shooting Accuracy           16 non-null     object 
 5   % Goals-to-shots            16 non-null     object 
 6   Total shots (inc. Blocked)  16 non-null     int64  
 7   Hit Woodwork                16 non-null     int64  
 8   Penalty goals               16 non-null     int64  
 9   Penalties not scored        16 non-null     int64  
 10  Headed goals                16 non-null     int64  
 11  Passes                      16 non-null     int64  
 12  Passes completed            16 non-null     int64  
 13  Passing Accuracy            16 non-nu

In [49]:
le = LabelEncoder()
df["Team"] = le.fit_transform(df["Team"])
df.Team

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
Name: Team, dtype: int32

In [50]:
col_obj_col = df[df.dtypes[df.dtypes == "object"].index].columns.tolist()
col_obj_col

['Shooting Accuracy',
 '% Goals-to-shots',
 'Passing Accuracy',
 'Saves-to-shots ratio']

In [51]:
df['Shooting Accuracy']

0     51.9%
1     41.9%
2     50.0%
3     50.0%
4     37.9%
5     47.8%
6     30.7%
7     43.0%
8     25.0%
9     39.4%
10    34.3%
11    36.8%
12    22.5%
13    55.9%
14    47.2%
15    21.2%
Name: Shooting Accuracy, dtype: object

In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 35 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Team                        16 non-null     int32  
 1   Goals                       16 non-null     int64  
 2   Shots on target             16 non-null     int64  
 3   Shots off target            16 non-null     int64  
 4   Shooting Accuracy           16 non-null     object 
 5   % Goals-to-shots            16 non-null     object 
 6   Total shots (inc. Blocked)  16 non-null     int64  
 7   Hit Woodwork                16 non-null     int64  
 8   Penalty goals               16 non-null     int64  
 9   Penalties not scored        16 non-null     int64  
 10  Headed goals                16 non-null     int64  
 11  Passes                      16 non-null     int64  
 12  Passes completed            16 non-null     int64  
 13  Passing Accuracy            16 non-nu

In [53]:
for col in col_obj_col:    
    df[col] = df[col].apply(lambda x: float(x[:-1]))

In [54]:
df.dtypes[df.dtypes == "object"].index.tolist()

[]

In [55]:
df.dtypes

Team                            int32
Goals                           int64
Shots on target                 int64
Shots off target                int64
Shooting Accuracy             float64
% Goals-to-shots              float64
Total shots (inc. Blocked)      int64
Hit Woodwork                    int64
Penalty goals                   int64
Penalties not scored            int64
Headed goals                    int64
Passes                          int64
Passes completed                int64
Passing Accuracy              float64
Touches                         int64
Crosses                         int64
Dribbles                        int64
Corners Taken                   int64
Tackles                         int64
Clearances                      int64
Interceptions                   int64
Clearances off line           float64
Clean Sheets                    int64
Blocks                          int64
Goals conceded                  int64
Saves made                      int64
Saves-to-sho

In [56]:
df

Unnamed: 0,Team,Goals,Shots on target,Shots off target,Shooting Accuracy,% Goals-to-shots,Total shots (inc. Blocked),Hit Woodwork,Penalty goals,Penalties not scored,...,Saves made,Saves-to-shots ratio,Fouls Won,Fouls Conceded,Offsides,Yellow Cards,Red Cards,Subs on,Subs off,Players Used
0,0,4,13,12,51.9,16.0,32,0,0,0,...,13,81.3,41,62,2,9,0,9,9,16
1,1,4,13,18,41.9,12.9,39,0,0,0,...,9,60.1,53,73,8,7,0,11,11,19
2,2,4,10,10,50.0,20.0,27,1,0,0,...,10,66.7,25,38,8,4,0,7,7,15
3,3,5,11,18,50.0,17.2,40,0,0,0,...,22,88.1,43,45,6,5,0,11,11,16
4,4,3,22,24,37.9,6.5,65,1,0,0,...,6,54.6,36,51,5,6,0,11,11,19
5,5,10,32,32,47.8,15.6,80,2,1,0,...,10,62.6,63,49,12,4,0,15,15,17
6,6,5,8,18,30.7,19.2,32,1,1,1,...,13,65.1,67,48,12,9,1,12,12,20
7,7,6,34,45,43.0,7.5,110,2,0,0,...,20,74.1,101,89,16,16,0,18,18,19
8,8,2,12,36,25.0,4.1,60,2,0,0,...,12,70.6,35,30,3,5,0,7,7,15
9,9,2,15,23,39.4,5.2,48,0,0,0,...,6,66.7,48,56,3,7,1,7,7,17
