# Intro to Pandas

## Select Entries

In [1]:
# imports
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

### First let's work on Series

In [2]:
#Lets try some Series indexing
ser1 = Series(np.arange(4),index=['A','B','C','D'])

#multiply all values by 3, to avoid confusion in future
ser1 = 3 * ser1

#Show
ser1 

A    0
B    3
C    6
D    9
dtype: int32

By index name

In [3]:
#We can grab entry by index name. Lets get the value 3
ser1['B']

3

By index value

In [4]:
#We can grab a value by index 
ser1[1]

3

By index range

In [5]:
#Can also grab by index range
ser1[0:3]

A    0
B    3
C    6
dtype: int32

By index value range

In [6]:
#Or grab range by range of index values
ser1[['A','B','D']]

A    0
B    3
D    9
dtype: int32

By logical operator

In [7]:
#Or grab by logic
ser1[ser1>3]

C    6
D    9
dtype: int32

By logical operator. Changing values

In [8]:
# We can also implement the logical opeartor.
ser1[ser1 > 3] = 10

#Show
ser1

A     0
B     3
C    10
D    10
dtype: int32

## Select Entries on Data Frame

#### Data Frame cheet sheet

In [9]:
import webbrowser
url ='http://datacamp-community-prod.s3.amazonaws.com/dbed353d-2757-4617-8206-8767ab379ab3'
webbrowser.open(url)

True

#### Let's work on our "premier league" df

In [10]:
premier_df = pd.read_excel("../Data/premier_league.xlsx")
premier_df

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Manchester City,38,32,2,4,95,23,72,98
1,2,Liverpool,38,30,7,1,89,22,67,97
2,3,Chelsea,38,21,9,8,63,39,24,72
3,4,Tottenham Hotspur,38,23,2,13,67,39,28,71
4,5,Arsenal,38,21,7,10,73,51,22,70
5,6,Manchester United,38,19,9,10,65,54,11,66
6,7,Wolverhampton Wanderers,38,16,9,13,47,46,1,57
7,8,Everton,38,15,9,14,54,46,8,54
8,9,Leicester City,38,15,7,16,51,48,3,52
9,10,West Ham United,38,15,7,16,52,55,−3,52


### Selecting By Name

#### The [ ] slicing method

Select a single by column name

In [13]:
#Select by column name
premier_df['Team']

0           Manchester City 
1                  Liverpool
2                    Chelsea
3          Tottenham Hotspur
4                    Arsenal
5          Manchester United
6    Wolverhampton Wanderers
7                    Everton
8             Leicester City
9            West Ham United
Name: Team, dtype: object

Select multiple columns by name

In [14]:
#Select by multiple columns
premier_df[['Team','W','D']]

Unnamed: 0,Team,W,D
0,Manchester City,32,2
1,Liverpool,30,7
2,Chelsea,21,9
3,Tottenham Hotspur,23,2
4,Arsenal,21,7
5,Manchester United,19,9
6,Wolverhampton Wanderers,16,9
7,Everton,15,9
8,Leicester City,15,7
9,West Ham United,15,7


### Selecting By Position

#### The .loc() method

Single row

In [15]:
#Can alos use iloc as previously discussed to label-index
premier_df.iloc[1]

Pos             2
Team    Liverpool
Pld            38
W              30
D               7
L               1
GF             89
GA             22
GD             67
Pts            97
Name: 1, dtype: object

Multiple rows by slicing

In [16]:
# Gives you a slice of rows and all columns 
premier_df.iloc[1:6]

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
1,2,Liverpool,38,30,7,1,89,22,67,97
2,3,Chelsea,38,21,9,8,63,39,24,72
3,4,Tottenham Hotspur,38,23,2,13,67,39,28,71
4,5,Arsenal,38,21,7,10,73,51,22,70
5,6,Manchester United,38,19,9,10,65,54,11,66


Specific rows by passing a list

In [17]:
# Gives you all columns and the specific rows selected by the index
premier_df.iloc[[2,4,6,8]]

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
2,3,Chelsea,38,21,9,8,63,39,24,72
4,5,Arsenal,38,21,7,10,73,51,22,70
6,7,Wolverhampton Wanderers,38,16,9,13,47,46,1,57
8,9,Leicester City,38,15,7,16,51,48,3,52


Specific rows and specific columns. **Note:** When we specify the columns we don't write the name but the index.

In [18]:
# select columns and rows
premier_df.iloc[[2,4,6,8], [0,1,9]]

Unnamed: 0,Pos,Team,Pts
2,3,Chelsea,72
4,5,Arsenal,70
6,7,Wolverhampton Wanderers,57
8,9,Leicester City,52


## Selecting by position and label 

#### The .loc() method

In [19]:
premier_df.loc[[2,4,6,8], ['Pos','Team','Pts']]

Unnamed: 0,Pos,Team,Pts
2,3,Chelsea,72
4,5,Arsenal,70
6,7,Wolverhampton Wanderers,57
8,9,Leicester City,52


In [20]:
premier_df

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Manchester City,38,32,2,4,95,23,72,98
1,2,Liverpool,38,30,7,1,89,22,67,97
2,3,Chelsea,38,21,9,8,63,39,24,72
3,4,Tottenham Hotspur,38,23,2,13,67,39,28,71
4,5,Arsenal,38,21,7,10,73,51,22,70
5,6,Manchester United,38,19,9,10,65,54,11,66
6,7,Wolverhampton Wanderers,38,16,9,13,47,46,1,57
7,8,Everton,38,15,9,14,54,46,8,54
8,9,Leicester City,38,15,7,16,51,48,3,52
9,10,West Ham United,38,15,7,16,52,55,−3,52


### Boolean indexing

By values on a single column

In [24]:
# Gives you all the rows where 'Pts' is greater than...
premier_df[premier_df['Pts'] > 70]

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Manchester City,38,32,2,4,95,23,72,98
1,2,Liverpool,38,30,7,1,89,22,67,97
2,3,Chelsea,38,21,9,8,63,39,24,72
3,4,Tottenham Hotspur,38,23,2,13,67,39,28,71


In [25]:
# show the original df
premier_df

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Manchester City,38,32,2,4,95,23,72,98
1,2,Liverpool,38,30,7,1,89,22,67,97
2,3,Chelsea,38,21,9,8,63,39,24,72
3,4,Tottenham Hotspur,38,23,2,13,67,39,28,71
4,5,Arsenal,38,21,7,10,73,51,22,70
5,6,Manchester United,38,19,9,10,65,54,11,66
6,7,Wolverhampton Wanderers,38,16,9,13,47,46,1,57
7,8,Everton,38,15,9,14,54,46,8,54
8,9,Leicester City,38,15,7,16,51,48,3,52
9,10,West Ham United,38,15,7,16,52,55,−3,52


Specific range of a column values

In [26]:
premier_df[(premier_df['W'] < 21) & (premier_df['W'] > 15)]

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
5,6,Manchester United,38,19,9,10,65,54,11,66
6,7,Wolverhampton Wanderers,38,16,9,13,47,46,1,57


Specific range on two columns

In [28]:
premier_df[(premier_df['W'] >= 19) &  (premier_df['Pts'] <= 72)]

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
2,3,Chelsea,38,21,9,8,63,39,24,72
3,4,Tottenham Hotspur,38,23,2,13,67,39,28,71
4,5,Arsenal,38,21,7,10,73,51,22,70
5,6,Manchester United,38,19,9,10,65,54,11,66


# Exercises! 