##### imports

In [1]:
import numpy as np
import pandas as pd

### Lecture 1 - Introducing a New Dataset

In [2]:
players_data = pd.read_csv('../data/soccer.csv')

In [3]:
players_data

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0
463,Ashley Fletcher,West+Ham,21,CF,1,1.0,412,4.5,5.90%,16,1,England,0,1,20,0,1


In [4]:
players_data.info(verbose=False, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Columns: 17 entries, name to new_signing
dtypes: float64(2), int64(10), object(5)
memory usage: 190.7 KB


In [5]:
players_data.dtypes.value_counts()

int64      10
object      5
float64     2
dtype: int64

In [6]:
players_data.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


### Lecture 2 -  Quick Review: Indexing With Boolean Masks

In [7]:
# boolean indexing:
#   step 1: generate sequence of booleans
#   step 2: use boolean sequence in [] or .loc[]

In [8]:
# question: what are the players that have a market value exceed 40M?

In [9]:
players_data.market_value

0      65.0
1      50.0
2       7.0
3      20.0
4      22.0
       ... 
460     5.0
461     7.0
462     4.5
463     1.0
464    10.0
Name: market_value, Length: 465, dtype: float64

In [10]:
players_data.market_value > 40

0       True
1       True
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: market_value, Length: 465, dtype: bool

In [11]:
players_data[players_data.market_value > 40]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
96,Eden Hazard,Chelsea,26,LW,1,75.0,4220,10.5,2.30%,224,2,Belgium,0,3,5,1,0
97,Diego Costa,Chelsea,28,CF,1,50.0,4454,10.0,3.00%,196,2,Spain,0,4,5,1,0
108,N%27Golo Kante,Chelsea,26,DM,2,50.0,4042,5.0,13.80%,83,2,France,0,3,5,1,1
218,Philippe Coutinho,Liverpool,25,AM,1,45.0,2958,9.0,30.80%,171,3,Brazil,0,3,10,1,0
244,Kevin De Bruyne,Manchester+City,26,AM,1,65.0,2252,10.0,17.50%,199,2,Belgium,0,3,11,1,0
245,Sergio Aguero,Manchester+City,29,CF,1,65.0,4046,11.5,9.70%,175,3,Argentina,0,4,11,1,0
246,Raheem Sterling,Manchester+City,22,LW,1,45.0,2074,8.0,3.80%,149,1,England,0,2,11,1,0
264,Romelu Lukaku,Manchester+United,24,CF,1,50.0,3727,11.5,45.00%,221,2,Belgium,0,2,12,1,0


In [12]:
players_data[players_data.market_value > 40].shape

(13, 17)

### Lecture 3 - More Approaches To Boolean Masking

In [13]:
# check all valid player position
players_data.position.unique()

array(['LW', 'AM', 'GK', 'RW', 'CB', 'RB', 'CF', 'LB', 'DM', 'RM', 'CM',
       nan, 'SS', 'LM'], dtype=object)

In [14]:
# check all valid player positons size
players_data.position.unique().size

14

In [15]:
# get boolean mask of defenders positions (LB, CB, RB)
players_data.position.isin(['LB', 'CB', 'RB'])

0      False
1      False
2      False
3      False
4       True
       ...  
460    False
461     True
462     True
463    False
464    False
Name: position, Length: 465, dtype: bool

In [16]:
# get the df of theese defenders
players_data[players_data.position.isin(['LB', 'CB', 'RB'])]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
17,Gabriel Paulista,Arsenal,26,CB,3,13.0,552,5.0,0.10%,45,3,Brazil,0,3,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
458,Angelo Ogbonna,West+Ham,29,CB,3,9.0,247,4.5,1.10%,45,2,Italy,0,4,20,0,0
459,Pablo Zabaleta,West+Ham,32,RB,3,7.0,698,5.0,2.70%,45,3,Argentina,0,5,20,0,0
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1


In [17]:
# let get all players that have market value between 40M and 50M
players_data.market_value.between(40, 50, inclusive=False)

  players_data.market_value.between(40, 50, inclusive=False)


0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: market_value, Length: 465, dtype: bool

In [18]:
# see the results in df
players_data[players_data.market_value.between(40, 50, inclusive=False)]

  players_data[players_data.market_value.between(40, 50, inclusive=False)]


Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
218,Philippe Coutinho,Liverpool,25,AM,1,45.0,2958,9.0,30.80%,171,3,Brazil,0,3,10,1,0
246,Raheem Sterling,Manchester+City,22,LW,1,45.0,2074,8.0,3.80%,149,1,England,0,2,11,1,0
380,Dele Alli,Tottenham,21,CM,2,45.0,4626,9.5,38.60%,225,1,England,0,1,17,1,0


In [19]:
# we can check all players that are less or equal than 25 yo
players_data.age <= 25

0      False
1      False
2      False
3      False
4      False
       ...  
460     True
461     True
462     True
463     True
464    False
Name: age, Length: 465, dtype: bool

In [20]:
# see the result in df
players_data.loc[players_data.age <= 25]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
9,Alex Iwobi,Arsenal,21,LW,1,10.0,1812,5.5,1.00%,89,4,Nigeria,0,1,1,1,0
10,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
11,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,Pedro Obiang,West+Ham,25,CM,2,9.0,286,4.5,0.30%,55,2,Spain,0,3,20,0,0
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0


In [21]:
# another way to do this is with 'le'
players_data.loc[players_data.age.le(25)]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
9,Alex Iwobi,Arsenal,21,LW,1,10.0,1812,5.5,1.00%,89,4,Nigeria,0,1,1,1,0
10,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
11,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,Pedro Obiang,West+Ham,25,CM,2,9.0,286,4.5,0.30%,55,2,Spain,0,3,20,0,0
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0


In [22]:
players_data.age.le(25).equals(players_data.age <= 25)

True

### Lecture 4 - Binary Operators With Booleans

In [23]:
# Binary OR -> |

In [24]:
True | False

True

In [25]:
False | True

True

In [26]:
True | True

True

In [27]:
False | False

False

In [28]:
# Binary AND -> &

In [29]:
True & False

False

In [30]:
False & True

False

In [31]:
False & False

False

In [32]:
True & True

True

In [33]:
# create a Seies with booleans

In [34]:
f = pd.Series(False)

In [35]:
f

0    False
dtype: bool

In [36]:
t = pd.Series(True)

In [37]:
t

0    True
dtype: bool

In [38]:
# combine theese two Series

In [39]:
f | t

0    True
dtype: bool

In [40]:
f & t

0    False
dtype: bool

In [41]:
# create a Series with a sequence of booleans

In [42]:
t = pd.Series([True if i % 2 == 0 else False for i in range(10)])

In [43]:
t

0     True
1    False
2     True
3    False
4     True
5    False
6     True
7    False
8     True
9    False
dtype: bool

In [44]:
f = pd.Series(False for i in range(10))

In [45]:
f

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [46]:
t & f

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [47]:
t | f

0     True
1    False
2     True
3    False
4     True
5    False
6     True
7    False
8     True
9    False
dtype: bool

In [48]:
# lets see that the labels are important when combine two series
f = pd.Series(data=[False, True, True], index=['c', 'b', 'a'])
t = pd.Series(data=[True, False, False], index=['a', 'b', 'c'])

In [49]:
f

c    False
b     True
a     True
dtype: bool

In [50]:
t

a     True
b    False
c    False
dtype: bool

In [51]:
f & t

a     True
b    False
c    False
dtype: bool

In [52]:
f | t

a     True
b     True
c    False
dtype: bool

### BONUS - XOR and Complement Binary Ops

In [53]:
# Binary XOR -> ^

In [55]:
True ^ False

True

In [56]:
False ^ True

True

In [58]:
False ^ False

False

In [59]:
True ^ True

False

In [60]:
True ^ (False | False & True) | False

True

In [54]:
# (Two's) Complement -> ~

In [61]:
~False

-1

In [62]:
~True

-2

In [63]:
t = pd.Series([True, True, False])

In [64]:
t

0     True
1     True
2    False
dtype: bool

In [65]:
~t

0    False
1    False
2     True
dtype: bool

### Lecture 5 - Combining Conditions 

In [66]:
# select all the left backs 

In [67]:
players_data.position == 'LB'

0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461     True
462    False
463    False
464    False
Name: position, Length: 465, dtype: bool

In [68]:
players_data[players_data.position == 'LB']

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
18,Kieran Gibbs,Arsenal,27,LB,3,10.0,489,5.0,0.50%,45,1,England,0,3,1,1,0
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
34,Charlie Daniels,Bournemouth,30,LB,3,3.0,185,5.0,19.80%,134,1,England,0,4,2,0,0
54,Brad Smith,Bournemouth,23,LB,3,2.0,297,4.0,3.30%,4,4,Australia,0,2,2,0,0
62,Gaetan Bong,Brighton+and+Hove,29,LB,3,1.5,97,4.5,0.20%,0,4,Cameroon,0,4,3,0,0
65,Markus Suttner,Brighton+and+Hove,30,LB,3,2.0,23,4.5,0.20%,0,2,Austria,0,4,3,0,0
82,Stephen Ward,Burnley,31,LB,3,1.5,152,4.5,2.50%,91,2,Ireland,0,4,4,0,0
99,Marcos Alonso Mendoza,Chelsea,26,LB,3,25.0,3069,7.0,12.40%,177,2,Spain,0,3,5,1,1
112,Kenedy,Chelsea,21,LB,3,7.0,566,5.0,0.10%,3,3,Brazil,0,1,5,1,0


In [69]:
# lets combine age and positon

In [71]:
players_data[(players_data.position == 'LB') & (players_data.age <= 25)]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
54,Brad Smith,Bournemouth,23,LB,3,2.0,297,4.0,3.30%,4,4,Australia,0,2,2,0,0
112,Kenedy,Chelsea,21,LB,3,7.0,566,5.0,0.10%,3,3,Brazil,0,1,5,1,0
128,Jeffrey Schlupp,Crystal+Palace,24,LB,3,8.0,385,5.0,0.30%,47,4,Ghana,0,2,6,0,0
212,Ben Chilwell,Leicester+City,20,LB,3,2.5,288,4.5,0.80%,19,1,England,0,1,9,0,0
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0
294,Paul Dummett,Newcastle+United,25,LB,3,3.5,177,4.5,1.00%,0,2,Wales,0,3,13,0,0
298,Massadio Haidara,Newcastle+United,24,LB,3,1.5,114,4.0,0.50%,0,2,France,0,2,13,0,0
328,Matt Targett,Southampton,21,LB,3,3.0,110,4.5,0.20%,12,1,England,0,1,14,0,0


In [72]:
# one more condition

In [73]:
players_data[(players_data.position == 'LB') & (players_data.age <= 25) & (players_data.market_value >= 10)]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0
389,Ben Davies,Tottenham,24,LB,3,12.0,396,5.5,1.80%,90,2,Wales,0,2,17,1,0


In [74]:
 # lets try exclude some players

In [80]:
players_data[
    (players_data.position == 'LB') &
    (players_data.age <= 25) &
    (players_data.market_value >= 10) &
    ~(players_data.club.isin(['Arsenal', 'Tottenham']))
]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0
