# Indexing, Selection & Filtering

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'city': ['Delhi', 'Mumbai', 'Delhi', 'Chennai', 'Mumbai'],
    'year': [2021, 2021, 2022, 2022, 2023],
    'sales': [100, 150, 120, 90, 160],
    'profit': [20, 35, 25, 10, 40]
})

df

Unnamed: 0,city,year,sales,profit
0,Delhi,2021,100,20
1,Mumbai,2021,150,35
2,Delhi,2022,120,25
3,Chennai,2022,90,10
4,Mumbai,2023,160,40


## Column selection

In [2]:
df['sales']

0    100
1    150
2    120
3     90
4    160
Name: sales, dtype: int64

In [3]:
df[['city', 'profit']]

Unnamed: 0,city,profit
0,Delhi,20
1,Mumbai,35
2,Delhi,25
3,Chennai,10
4,Mumbai,40


In [4]:
df.sales

0    100
1    150
2    120
3     90
4    160
Name: sales, dtype: int64

In [5]:
df[['sales']].head()

Unnamed: 0,sales
0,100
1,150
2,120
3,90
4,160


## Row selection

In [6]:
df.loc[0]

city      Delhi
year       2021
sales       100
profit       20
Name: 0, dtype: object

In [7]:
df.loc[1:3]

Unnamed: 0,city,year,sales,profit
1,Mumbai,2021,150,35
2,Delhi,2022,120,25
3,Chennai,2022,90,10


In [8]:
df.iloc[0]

city      Delhi
year       2021
sales       100
profit       20
Name: 0, dtype: object

In [9]:
df.iloc[1:4]

Unnamed: 0,city,year,sales,profit
1,Mumbai,2021,150,35
2,Delhi,2022,120,25
3,Chennai,2022,90,10


In [10]:
df[df['sales'] > 120]

Unnamed: 0,city,year,sales,profit
1,Mumbai,2021,150,35
4,Mumbai,2023,160,40


In [11]:
df[(df['sales'] > 100) & (df['city'] == 'Mumbai')]

Unnamed: 0,city,year,sales,profit
1,Mumbai,2021,150,35
4,Mumbai,2023,160,40


In [12]:
df.query("sales > 120")

Unnamed: 0,city,year,sales,profit
1,Mumbai,2021,150,35
4,Mumbai,2023,160,40


In [13]:
df.query("city == 'Delhi' and year == 2022")

Unnamed: 0,city,year,sales,profit
2,Delhi,2022,120,25


## Index operations

In [14]:
df_indexed = df.set_index('city')
df_indexed

Unnamed: 0_level_0,year,sales,profit
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Delhi,2021,100,20
Mumbai,2021,150,35
Delhi,2022,120,25
Chennai,2022,90,10
Mumbai,2023,160,40


In [15]:
df_indexed.loc['Mumbai']

Unnamed: 0_level_0,year,sales,profit
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mumbai,2021,150,35
Mumbai,2023,160,40


In [16]:
df_reset = df_indexed.reset_index()
df_reset

Unnamed: 0,city,year,sales,profit
0,Delhi,2021,100,20
1,Mumbai,2021,150,35
2,Delhi,2022,120,25
3,Chennai,2022,90,10
4,Mumbai,2023,160,40


In [17]:
df_multi = df.set_index(['city', 'year'])
df_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,sales,profit
city,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Delhi,2021,100,20
Mumbai,2021,150,35
Delhi,2022,120,25
Chennai,2022,90,10
Mumbai,2023,160,40


In [18]:
df_multi.loc[('Delhi', 2022)]

sales     120
profit     25
Name: (Delhi, 2022), dtype: int64

In [19]:
df_multi.loc['Mumbai']

Unnamed: 0_level_0,sales,profit
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2021,150,35
2023,160,40
