# Indexing & Selection & Filtering in Pandas

In [1]:
import pandas as pd
import numpy as np

### Create a Data Frame

In [3]:
data={"name":["Bill","Tom","Tim","John","Alex","Vanessa","Kate"],      
      "score":[90,80,85,75,95,60,65],      
      "sport":["Wrestling","Football","Skiing","Swimming","Tennis",
               "Karete","Surfing"],      
      "sex":["M","M","M","M","F","F","F"]}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,name,score,sport,sex
0,Bill,90,Wrestling,M
1,Tom,80,Football,M
2,Tim,85,Skiing,M
3,John,75,Swimming,M
4,Alex,95,Tennis,F
5,Vanessa,60,Karete,F
6,Kate,65,Surfing,F


### Can we create a DataFrame from the Series

In [10]:
data={"name" :pd.Series(["Bill","Tom","Tim","John","Alex","Vanessa","Kate"],index=range(0,7)),      
      "score":pd.Series([90,80,85,75,95,60,65],index=range(0,7))}    

df = pd.DataFrame(data)
df

Unnamed: 0,name,score
0,Bill,90
1,Tom,80
2,Tim,85
3,John,75
4,Alex,95
5,Vanessa,60
6,Kate,65


#### 1 - Access one column or series from the data frame

In [12]:
df.columns

Index(['name', 'score'], dtype='object')

In [13]:
df['name']

0       Bill
1        Tom
2        Tim
3       John
4       Alex
5    Vanessa
6       Kate
Name: name, dtype: object

#### 2 - Accessing the single or multiple row using location method

In [20]:
df.loc[3]

name     John
score      75
Name: 3, dtype: object

In [21]:
df.loc[2:4]

Unnamed: 0,name,score
2,Tim,85
3,John,75
4,Alex,95


#### 3 - Accessing the rows by using integer indexes

In [27]:
df.index

RangeIndex(start=0, stop=7, step=1)

In [30]:
df.iloc[3]

name     John
score      75
Name: 3, dtype: object

In [31]:
index_list = []
for i in range(0,7) :
    index_list.append(f"my_{i}")
index_list

['my_0', 'my_1', 'my_2', 'my_3', 'my_4', 'my_5', 'my_6']

In [33]:
df.index = index_list

In [34]:
df

Unnamed: 0,name,score
my_0,Bill,90
my_1,Tom,80
my_2,Tim,85
my_3,John,75
my_4,Alex,95
my_5,Vanessa,60
my_6,Kate,65


In [35]:
df.iloc[3]

name     John
score      75
Name: my_3, dtype: object

In [36]:
df.loc['my_3']

name     John
score      75
Name: my_3, dtype: object

In [44]:
df[2:4]

Unnamed: 0,name,score
my_2,Tim,85
my_3,John,75


In [46]:
df[:]

Unnamed: 0,name,score
my_0,Bill,90
my_1,Tom,80
my_2,Tim,85
my_3,John,75
my_4,Alex,95
my_5,Vanessa,60
my_6,Kate,65


#### 4 - Accessing the multiple columns

In [22]:
df.columns

Index(['name', 'score'], dtype='object')

In [23]:
df['name']

0       Bill
1        Tom
2        Tim
3       John
4       Alex
5    Vanessa
6       Kate
Name: name, dtype: object

In [24]:
df[['name','score']]

Unnamed: 0,name,score
0,Bill,90
1,Tom,80
2,Tim,85
3,John,75
4,Alex,95
5,Vanessa,60
6,Kate,65


### Let me create a Data Frame

In [52]:
data={"name":["Bill","Tom","Tim","John","Alex","Vanessa","Kate"],      
      "score":[90,80,85,75,95,60,65],      
      "sport":["Wrestling","Football","Skiing","Swimming","Tennis",
               "Karete","Surfing"],      
      "sex":["M","M","M","M","F","F","F"]}

In [53]:
df = pd.DataFrame(data)
df

Unnamed: 0,name,score,sport,sex
0,Bill,90,Wrestling,M
1,Tom,80,Football,M
2,Tim,85,Skiing,M
3,John,75,Swimming,M
4,Alex,95,Tennis,F
5,Vanessa,60,Karete,F
6,Kate,65,Surfing,F


### Addition and Deletion of Rows

In [54]:
data1 = {"name":["Rock","Rone"],"score":[90,80],"sport":["Cricket","Football"],"sex":["M","M"]}
df1 = pd.DataFrame(data1)
df1

Unnamed: 0,name,score,sport,sex
0,Rock,90,Cricket,M
1,Rone,80,Football,M


#### 1 - Adding new data frame

In [58]:
df = pd.concat([df,df1])

In [59]:
df

Unnamed: 0,name,score,sport,sex
0,Bill,90,Wrestling,M
1,Tom,80,Football,M
2,Tim,85,Skiing,M
3,John,75,Swimming,M
4,Alex,95,Tennis,F
5,Vanessa,60,Karete,F
6,Kate,65,Surfing,F
0,Rock,90,Cricket,M
1,Rone,80,Football,M


In [66]:
new_index = range(0,df.shape[0])

In [68]:
df.index = new_index
df

Unnamed: 0,name,score,sport,sex
0,Bill,90,Wrestling,M
1,Tom,80,Football,M
2,Tim,85,Skiing,M
3,John,75,Swimming,M
4,Alex,95,Tennis,F
5,Vanessa,60,Karete,F
6,Kate,65,Surfing,F
7,Rock,90,Cricket,M
8,Rone,80,Football,M


#### 2 - dropping a row

In [69]:
df.drop(0)

Unnamed: 0,name,score,sport,sex
1,Tom,80,Football,M
2,Tim,85,Skiing,M
3,John,75,Swimming,M
4,Alex,95,Tennis,F
5,Vanessa,60,Karete,F
6,Kate,65,Surfing,F
7,Rock,90,Cricket,M
8,Rone,80,Football,M


#### dropping the rows

In [73]:
df.drop([0,2])

Unnamed: 0,name,score,sport,sex
1,Tom,80,Football,M
3,John,75,Swimming,M
4,Alex,95,Tennis,F
5,Vanessa,60,Karete,F
6,Kate,65,Surfing,F
7,Rock,90,Cricket,M
8,Rone,80,Football,M


#### 3. dropping a column

In [83]:
df.drop(['sex'],axis=1)

Unnamed: 0,name,score,sport
0,Bill,90,Wrestling
1,Tom,80,Football
2,Tim,85,Skiing
3,John,75,Swimming
4,Alex,95,Tennis
5,Vanessa,60,Karete
6,Kate,65,Surfing
7,Rock,90,Cricket
8,Rone,80,Football


#### dropping the columns

In [84]:
df.drop(['sport','sex'],axis=1)

Unnamed: 0,name,score
0,Bill,90
1,Tom,80
2,Tim,85
3,John,75
4,Alex,95
5,Vanessa,60
6,Kate,65
7,Rock,90
8,Rone,80


## DataFrame Indexing

### Lets Create a DataFrame

In [85]:
np.arange(16)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [86]:
np.arange(16).reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [87]:
data = pd.DataFrame(np.arange(16).reshape(4,4),
    index=["London","Paris","Berlin","Istanbul"],
    columns=["one","two","three","four"])
data

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11
Istanbul,12,13,14,15


In [88]:
data["two"]

London       1
Paris        5
Berlin       9
Istanbul    13
Name: two, dtype: int32

In [89]:
data[["one","two"]]

Unnamed: 0,one,two
London,0,1
Paris,4,5
Berlin,8,9
Istanbul,12,13


In [90]:
data[:3]

Unnamed: 0,one,two,three,four
London,0,1,2,3
Paris,4,5,6,7
Berlin,8,9,10,11


In [91]:
data[data["four"]>5]

Unnamed: 0,one,two,three,four
Paris,4,5,6,7
Berlin,8,9,10,11
Istanbul,12,13,14,15


In [92]:
data[data<5]=0
data

Unnamed: 0,one,two,three,four
London,0,0,0,0
Paris,0,5,6,7
Berlin,8,9,10,11
Istanbul,12,13,14,15


## Selecting with iloc and loc

In [93]:
data.iloc[1]

one      0
two      5
three    6
four     7
Name: Paris, dtype: int32

In [94]:
data.iloc[1,[1,2,3]]

two      5
three    6
four     7
Name: Paris, dtype: int32

In [95]:
data.iloc[[1,3],[1,2,3]]

Unnamed: 0,two,three,four
Paris,5,6,7
Istanbul,13,14,15


In [97]:
data

Unnamed: 0,one,two,three,four
London,0,0,0,0
Paris,0,5,6,7
Berlin,8,9,10,11
Istanbul,12,13,14,15


In [96]:
data.loc["Paris",["one","two"]]

one    0
two    5
Name: Paris, dtype: int32

In [22]:
data.loc[:"Paris","four"]

London    0
Paris     7
Name: four, dtype: int32

In [98]:
toy_data=pd.Series(np.arange(5),index=["a","b","c","d","e"])
toy_data

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [99]:
toy_data[-1]

4