# 1. Setting the Stage
This project is about Interpretable and explainable ML explanatipns

In [1]:
import numpy as np
import pandas as pd
from jupyterthemes import jtplot
jtplot.style(theme='monokai', context='notebook', ticks=True, grid=False) 

In [2]:
data = pd.read_csv('FIFA 2018 Statistics.csv')
data.head()

Unnamed: 0,Date,Team,Opponent,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,...,Yellow Card,Yellow & Red,Red,Man of the Match,1st Goal,Round,PSO,Goals in PSO,Own goals,Own goal Time
0,14-06-2018,Russia,Saudi Arabia,5,40,13,7,3,3,6,...,0,0,0,Yes,12.0,Group Stage,No,0,,
1,14-06-2018,Saudi Arabia,Russia,0,60,6,0,3,3,2,...,0,0,0,No,,Group Stage,No,0,,
2,15-06-2018,Egypt,Uruguay,0,43,8,3,3,2,0,...,2,0,0,No,,Group Stage,No,0,,
3,15-06-2018,Uruguay,Egypt,1,57,14,4,6,4,5,...,0,0,0,Yes,89.0,Group Stage,No,0,,
4,15-06-2018,Morocco,Iran,0,64,13,3,6,4,5,...,1,0,0,No,,Group Stage,No,0,1.0,90.0


In [3]:
y = (data['Man of the Match'] == "Yes") #Independent variable #Convert from categorical var/string "Yes"/"No" to binary 
feature_names = [i for i in data.columns if data[i].dtype in [np.int64]]
X = data[feature_names]
X

Unnamed: 0,Goal Scored,Ball Possession %,Attempts,On-Target,Off-Target,Blocked,Corners,Offsides,Free Kicks,Saves,Pass Accuracy %,Passes,Distance Covered (Kms),Fouls Committed,Yellow Card,Yellow & Red,Red,Goals in PSO
0,5,40,13,7,3,3,6,3,11,0,78,306,118,22,0,0,0,0
1,0,60,6,0,3,3,2,1,25,2,86,511,105,10,0,0,0,0
2,0,43,8,3,3,2,0,1,7,3,78,395,112,12,2,0,0,0
3,1,57,14,4,6,4,5,1,13,3,86,589,111,6,0,0,0,0
4,0,64,13,3,6,4,5,0,14,2,86,433,101,22,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,1,46,11,1,6,4,4,3,24,5,79,479,148,14,1,0,0,0
124,2,43,12,4,3,5,4,1,5,5,88,510,108,11,1,0,0,0
125,0,57,15,5,7,3,5,0,12,2,92,698,110,5,2,0,0,0
126,4,39,8,6,1,1,2,1,14,1,75,271,99,14,2,0,0,0


## 2. Making Predictions with a Decision Tree Classifier

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [5]:
train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)

In [6]:
tree_model = DecisionTreeClassifier(random_state=0,
                                    max_depth=5,
                                    min_samples_split=5)
tree_model.fit(train_X, train_y)

DecisionTreeClassifier(max_depth=5, min_samples_split=5, random_state=0)

In [7]:
y_pred = tree_model.predict(val_X)

In [8]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [9]:
cm = confusion_matrix(val_y, y_pred)
print(cm)
accuracy_score(val_y, y_pred)

[[ 9  7]
 [ 6 10]]


0.59375

This matrix here can be interpreted as follows. In total, 9 positives have been confirmed as positives, 10 negatives have been confirmed as negative. So, in 19 cases, the predicted values coincide with values in our validation / test data set. However, a couple of cases which have been predicted as positive, however, they have been predicted as negative and they turned out to be positive. 15 in total. So, out of the 32 cases, actually, 19 were correct, whereas 13 were incorrect. Therefore, this accuracy rate of ~60%.


## 3. Making Predictions with a Random Forest Classifier

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
my_model = RandomForestClassifier(n_estimators=100,
                                 random_state=0)
my_model.fit(train_X, train_y)

RandomForestClassifier(random_state=0)

In [12]:
y_pred = my_model.predict(val_X)

In [13]:
cm = confusion_matrix(val_y, y_pred)
print(cm)
accuracy_score(val_y, y_pred)

[[10  6]
 [ 4 12]]


0.6875

## 4. Extracting Most Impactful Features

In [14]:
#pip install eli5

In [15]:
from eli5.sklearn import PermutationImportance

In [16]:
perm = PermutationImportance(my_model, random_state = 1).fit(val_X, val_y)

In [17]:
import eli5

In [18]:
eli5.show_weights(perm, feature_names = val_X.columns.tolist())

Weight,Feature
0.1750  ± 0.0848,Goal Scored
0.0500  ± 0.0637,Distance Covered (Kms)
0.0437  ± 0.0637,Yellow Card
0.0187  ± 0.0500,Off-Target
0.0187  ± 0.0637,Free Kicks
0.0187  ± 0.0637,Fouls Committed
0.0125  ± 0.0637,Pass Accuracy %
0.0125  ± 0.0306,Blocked
0.0063  ± 0.0612,Saves
0.0063  ± 0.0250,Ball Possession %


In [19]:
perm = PermutationImportance(tree_model, random_state = 1).fit(val_X, val_y)
eli5.show_weights(perm, feature_names = val_X.columns.tolist())

Weight,Feature
0.1437  ± 0.1159,Goal Scored
0.0312  ± 0.0395,On-Target
0.0187  ± 0.0306,Distance Covered (Kms)
0.0063  ± 0.0729,Off-Target
0  ± 0.0000,Red
0  ± 0.0000,Ball Possession %
0  ± 0.0000,Blocked
0  ± 0.0000,Offsides
0  ± 0.0000,Goals in PSO
0  ± 0.0000,Saves


## 5. Extracting the most influential values of Impactful Features

In [38]:
pip install PDPbox

Collecting PDPbox
  Using cached PDPbox-0.2.1-py3-none-any.whl
Collecting matplotlib==3.1.1
  Using cached matplotlib-3.1.1.tar.gz (37.8 MB)
Building wheels for collected packages: matplotlib
  Building wheel for matplotlib (setup.py): started
  Building wheel for matplotlib (setup.py): finished with status 'error'
  Running setup.py clean for matplotlib
Failed to build matplotlib
Installing collected packages: matplotlib, PDPbox
  Attempting uninstall: matplotlib
    Found existing installation: matplotlib 3.3.4
    Uninstalling matplotlib-3.3.4:
      Successfully uninstalled matplotlib-3.3.4
    Running setup.py install for matplotlib: started
    Running setup.py install for matplotlib: finished with status 'error'
  Rolling back uninstall of matplotlib
  Moving to c:\users\kiit\anaconda3\lib\site-packages\__pycache__\pylab.cpython-38.pyc
   from C:\Users\KIIT\AppData\Local\Temp\pip-uninstall-0ju4sk59\pylab.cpython-38.pyc
  Moving to c:\users\kiit\anaconda3\lib\site-packages\matplo

  ERROR: Command errored out with exit status 1:
   command: 'C:\Users\KIIT\anaconda3\python.exe' -u -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\KIIT\\AppData\\Local\\Temp\\pip-install-c0b2w7nr\\matplotlib_cf2d0511fe3c40dd9628345a8533f805\\setup.py'"'"'; __file__='"'"'C:\\Users\\KIIT\\AppData\\Local\\Temp\\pip-install-c0b2w7nr\\matplotlib_cf2d0511fe3c40dd9628345a8533f805\\setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' bdist_wheel -d 'C:\Users\KIIT\AppData\Local\Temp\pip-wheel-s4etpt6u'
       cwd: C:\Users\KIIT\AppData\Local\Temp\pip-install-c0b2w7nr\matplotlib_cf2d0511fe3c40dd9628345a8533f805\
  Complete output (499 lines):
  Edit setup.cfg to change the build options
  
  BUILDING MATPLOTLIB
    matplotlib: yes [3.1.1]
        python: yes [3.8.8 (default, Apr 13 2021, 15:08:03) [MSC v.1916 64 bit
                    (AMD64)]]
      pl

  copying lib\matplotlib\backends\backend_qt5agg.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_qt5cairo.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_svg.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_template.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_tkagg.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_tkcairo.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_webagg.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_webagg_core.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_wx.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying lib\matplotlib\backends\backend_wxagg.py -> build\lib.win-amd64-3.8\matplotlib\backends
  copying

    copying lib\matplotlib\backends\backend_gtk3cairo.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_macosx.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_mixed.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_nbagg.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_pdf.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_pgf.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_ps.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_qt4.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_qt4agg.py -> build\lib.win-amd64-3.8\matplotlib\backends
    copying lib\matplotlib\backends\backend_qt4cairo.py -> build\lib.win-amd64-3.8\matplotlib\backends


    copying lib\matplotlib\backends\web_backend\single_figure.html -> build\lib.win-amd64-3.8\matplotlib\backends\web_backend
    copying lib\matplotlib\mpl-data\stylelib\fivethirtyeight.mplstyle -> build\lib.win-amd64-3.8\matplotlib\mpl-data\stylelib
    copying lib\matplotlib\backends\web_backend\jquery-ui-1.12.1\jquery-ui.structure.min.css -> build\lib.win-amd64-3.8\matplotlib\backends\web_backend\jquery-ui-1.12.1
    copying lib\matplotlib\mpl-data\images\back_large.png -> build\lib.win-amd64-3.8\matplotlib\mpl-data\images
    UPDATING build\lib.win-amd64-3.8\matplotlib\_version.py
    set build\lib.win-amd64-3.8\matplotlib\_version.py to '3.1.1'
    running build_ext
    building 'matplotlib.ft2font' extension
    error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/
    ----------------------------------------
ERROR: Command errored out with exit status 1: 'C:\Users\KIIT\anacond

In [None]:
from matplotlib import pyplot as plt
from pdpbox import pdp, get_dataset, info_plots

In [None]:
feature_to_plot = 'Distance Covered (Kms)'

In [None]:
pdp_dist = pdp.pdp_isolate(model = tree_model, dataset = val_X,
                           model_features=feature_names,
                           feature=feature_to_plot)

In [None]:
pdp.pdp_plot(pdp_dist, feature_to_plot)
plt.show()

In [None]:
pdp_dist = pdp.pdp_isolate(model = my_model, dataset = val_X,
                           model_features=feature_names,
                           feature=feature_to_plot)

In [None]:
pdp.pdp_plot(pdp_dist, feature_to_plot)
plt.show()