# Cristiano Ronaldo Performance Analysis

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Reading the dataset

In [30]:
cr = pd.read_csv('data/Cristiano_Ronaldo/yds_data.csv')
cr.head()
cr7 = cr.copy()

In [3]:
cr7.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30697 entries, 0 to 30696
Data columns (total 28 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             30697 non-null  int64  
 1   match_event_id         29134 non-null  float64
 2   location_x             29236 non-null  float64
 3   location_y             29157 non-null  float64
 4   remaining_min          29135 non-null  float64
 5   power_of_shot          29211 non-null  float64
 6   knockout_match         29180 non-null  float64
 7   game_season            24835 non-null  object 
 8   remaining_sec          29103 non-null  float64
 9   distance_of_shot       29130 non-null  float64
 10  is_goal                24429 non-null  float64
 11  area_of_shot           29195 non-null  object 
 12  shot_basics            29122 non-null  object 
 13  range_of_shot          29133 non-null  object 
 14  team_name              29162 non-null  object 
 15  da

In [4]:
cr7.columns

Index(['Unnamed: 0', 'match_event_id', 'location_x', 'location_y',
       'remaining_min', 'power_of_shot', 'knockout_match', 'game_season',
       'remaining_sec', 'distance_of_shot', 'is_goal', 'area_of_shot',
       'shot_basics', 'range_of_shot', 'team_name', 'date_of_game',
       'home/away', 'shot_id_number', 'lat/lng', 'type_of_shot',
       'type_of_combined_shot', 'match_id', 'team_id', 'remaining_min.1',
       'power_of_shot.1', 'knockout_match.1', 'remaining_sec.1',
       'distance_of_shot.1'],
      dtype='object')

In [5]:
cr7.describe()

Unnamed: 0.1,Unnamed: 0,match_event_id,location_x,location_y,remaining_min,power_of_shot,knockout_match,remaining_sec,distance_of_shot,is_goal,shot_id_number,match_id,team_id,remaining_min.1,power_of_shot.1,knockout_match.1,remaining_sec.1,distance_of_shot.1
count,30697.0,29134.0,29236.0,29157.0,29135.0,29211.0,29180.0,29103.0,29130.0,24429.0,29134.0,30697.0,30697.0,29162.0,29158.0,29204.0,29158.0,29129.0
mean,15348.0,249.576028,7.383876,91.126933,4.883233,2.519359,0.145956,28.329382,33.448884,0.445331,15368.581039,24764070.0,1610613000.0,18.204615,15.994109,16.599402,39.027303,38.801852
std,8861.604943,150.186019,110.263049,87.676395,3.452533,1.153976,0.353068,17.470663,9.369656,0.497013,8866.520772,7755175.0,0.0,29.416973,29.676815,35.172016,29.835284,18.787711
min,0.0,2.0,-250.0,-44.0,0.0,1.0,0.0,0.0,20.0,0.0,1.0,20000010.0,1610613000.0,0.0,1.0,0.0,0.0,9.4
25%,7674.0,111.0,-68.0,4.0,2.0,1.0,0.0,13.0,25.0,0.0,7693.25,20500080.0,1610613000.0,3.0,2.0,0.0,17.0,26.0
50%,15348.0,254.0,0.0,74.0,5.0,3.0,0.0,28.0,35.0,0.0,15378.5,20900350.0,1610613000.0,6.0,3.0,0.0,35.0,36.0
75%,23022.0,369.0,95.0,160.0,8.0,3.0,0.0,43.0,41.0,1.0,23055.75,29600470.0,1610613000.0,11.0,4.0,1.0,52.0,44.0
max,30696.0,659.0,248.0,791.0,11.0,7.0,1.0,59.0,99.0,1.0,30697.0,49900090.0,1610613000.0,128.7616,118.36,141.35232,144.7856,115.728


# Data Manipulation (Pre-processing)

* Dropping unwanted columns 
* Filling nan values with either zero or mean or some other value based on each column.


## Functions for data manipulation

In [6]:
def meanvaluefill(colname = [],inplace = False):
    for i in colname:
        cr7[i] = cr7[i].fillna(cr7[i].mean())
def zerovaluefill(colname = [], inplace = False):
    for i in colname:
        cr7[i] = cr7[i].fillna(0)
def replacewithnum(colname = [], inplace = False):
    for i in colname:
        x = cr7[i].unique()
        cr7[i].replace(to_replace = x,value = np.arange(len(x)),inplace = inplace)
# If season value is nan, we are copying the previous season value to the nan value since the data seems to be by season.
def prevtonext(colname = [], inplace = False):
    for j in colname:
        for i in range(len(cr7[j])):
            if cr7[j].isnull()[i] == True:
                cr7[j][i] = cr7[j][i-1]
            else:
                pass

## Operations for data manipulation

In [7]:
inplace = True

cr7.drop(['Unnamed: 0','remaining_min.1','power_of_shot.1', 'knockout_match.1', 
          'remaining_sec.1','distance_of_shot.1','match_event_id','team_id',
         'shot_id_number','match_id','date_of_game'],inplace = inplace, axis = 1)

zerovaluefill(colname = ['location_x','location_y','knockout_match'], inplace = inplace)

meanvaluefill(['remaining_min','power_of_shot','remaining_sec','distance_of_shot'], 
              inplace = inplace)

prevtonext(colname = ['game_season'], inplace = inplace)

replacewithnum(colname = ['area_of_shot','game_season','shot_basics',
                          'range_of_shot','team_name','lat/lng','type_of_shot',
                         'type_of_combined_shot','home/away'], inplace = inplace)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [8]:
cr7.isnull().sum()

location_x                  0
location_y                  0
remaining_min               0
power_of_shot               0
knockout_match              0
game_season                 0
remaining_sec               0
distance_of_shot            0
is_goal                  6268
area_of_shot                0
shot_basics                 0
range_of_shot               0
team_name                   0
home/away                   0
lat/lng                     0
type_of_shot                0
type_of_combined_shot       0
dtype: int64

## Splitting the data into train and test
* We dont have certain values for is_goal. We will be using that as the test data.

In [14]:
train = cr7[cr7['is_goal'].notnull()]
test = cr7[cr7['is_goal'].isnull()]
test.drop('is_goal',axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [26]:
train.isnull().sum()

location_x               0
location_y               0
remaining_min            0
power_of_shot            0
knockout_match           0
game_season              0
remaining_sec            0
distance_of_shot         0
is_goal                  0
area_of_shot             0
shot_basics              0
range_of_shot            0
team_name                0
home/away                0
lat/lng                  0
type_of_shot             0
type_of_combined_shot    0
dtype: int64

In [16]:
train

Unnamed: 0,location_x,location_y,remaining_min,power_of_shot,knockout_match,game_season,remaining_sec,distance_of_shot,is_goal,area_of_shot,shot_basics,range_of_shot,team_name,home/away,lat/lng,type_of_shot,type_of_combined_shot
1,-157.0,0.0,10.000000,1.0,0.0,0,22.0,35.0,0.0,1,0,1,0,0,0,1,0
2,-101.0,135.0,7.000000,1.0,0.0,0,45.0,36.0,1.0,2,0,0,0,1,0,2,0
3,138.0,175.0,6.000000,1.0,0.0,0,52.0,42.0,0.0,3,0,0,0,0,0,3,1
4,0.0,0.0,4.883233,2.0,0.0,0,19.0,20.0,1.0,4,1,2,1,0,0,3,2
5,-145.0,-11.0,9.000000,3.0,0.0,0,32.0,34.0,0.0,1,0,1,0,0,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30691,0.0,0.0,4.883233,4.0,1.0,19,4.0,20.0,0.0,4,1,2,1,15,1,10,0
30692,1.0,48.0,6.000000,4.0,1.0,19,5.0,24.0,0.0,4,4,2,0,15,1,17,0
30694,-134.0,166.0,3.000000,4.0,1.0,19,28.0,41.0,1.0,2,0,0,0,15,3,3,1
30695,31.0,267.0,2.000000,4.0,1.0,19,10.0,46.0,0.0,4,3,4,0,15,1,50,0


In [17]:
test

Unnamed: 0,location_x,location_y,remaining_min,power_of_shot,knockout_match,game_season,remaining_sec,distance_of_shot,area_of_shot,shot_basics,range_of_shot,team_name,home/away,lat/lng,type_of_shot,type_of_combined_shot
0,167.0,72.0,10.0,1.0,0.0,0,27.0,38.0,0,0,0,0,0,0,0,0
7,1.0,28.0,8.0,3.0,0.0,0,5.0,22.0,4,1,2,0,0,0,3,1
16,0.0,0.0,0.0,1.0,0.0,0,1.0,20.0,5,1,2,0,2,1,3,3
19,0.0,0.0,10.0,3.0,0.0,0,46.0,20.0,4,4,2,0,2,1,10,0
21,134.0,127.0,9.0,3.0,0.0,0,4.0,38.0,3,0,0,0,2,1,12,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30681,-18.0,261.0,0.0,2.0,1.0,19,2.0,46.0,4,3,3,0,15,1,4,0
30682,0.0,48.0,11.0,3.0,1.0,19,30.0,28.0,1,2,1,0,15,1,6,0
30686,16.0,93.0,5.0,3.0,1.0,19,37.0,29.0,4,2,1,0,15,1,6,0
30687,40.0,100.0,3.0,3.0,1.0,19,18.0,30.0,4,2,1,0,1,1,13,0


### Data is now ready to be passed into an ML model