# Data Filtering

<!--
Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
Description: Comprehensive guide to filtering data in Pandas DataFrames
-->

## Introduction

This notebook covers various filtering techniques including conditional filtering, using query(), and advanced filtering methods.



In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

import pandas as pd
import numpy as np

# Create sample DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Henry'],
    'Age': [25, 30, 35, 28, 32, 45, 29, 38],
    'Salary': [50000, 60000, 70000, 55000, 65000, 80000, 58000, 72000],
    'Department': ['IT', 'HR', 'IT', 'Finance', 'IT', 'HR', 'Finance', 'IT'],
    'City': ['New York', 'London', 'Tokyo', 'Paris', 'Sydney', 'Berlin', 'Mumbai', 'London'],
    'Experience': [2, 5, 8, 3, 6, 12, 4, 10]
})

print("Sample DataFrame:")
print(df)



## Basic Filtering


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Filter by single condition
print("=== Employees with Salary > 60000 ===")
print(df[df['Salary'] > 60000])

# Filter by string condition
print("\n=== Employees in IT Department ===")
print(df[df['Department'] == 'IT'])

# Filter by multiple conditions using & (AND)
print("\n=== IT employees with Salary > 60000 ===")
print(df[(df['Department'] == 'IT') & (df['Salary'] > 60000)])

# Filter by multiple conditions using | (OR)
print("\n=== Employees in IT or HR ===")
print(df[(df['Department'] == 'IT') | (df['Department'] == 'HR')])



In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Using isin() for multiple values
print("=== Employees in IT or HR (using isin) ===")
print(df[df['Department'].isin(['IT', 'HR'])])

# Using ~ (NOT) operator
print("\n=== Employees NOT in IT ===")
print(df[~df['Department'].isin(['IT'])])

# Using str.contains() for string filtering
print("\n=== Employees with 'a' in their name (case-insensitive) ===")
print(df[df['Name'].str.contains('a', case=False)])

# Filter by range
print("\n=== Employees aged between 30 and 40 ===")
print(df[(df['Age'] >= 30) & (df['Age'] <= 40)])



## Using query() Method


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Simple query
print("=== Using query() - Salary > 60000 ===")
print(df.query('Salary > 60000'))

# Query with multiple conditions
print("\n=== Query with AND condition ===")
print(df.query('Department == "IT" and Salary > 60000'))

# Query with OR condition
print("\n=== Query with OR condition ===")
print(df.query('Department == "IT" or Department == "HR"'))

# Query with variables
min_salary = 60000
max_age = 35
print("\n=== Query with variables ===")
print(df.query('Salary > @min_salary and Age <= @max_age'))



## Advanced Filtering


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Filter by top N values
print("=== Top 3 highest salaries ===")
print(df.nlargest(3, 'Salary'))

# Filter by bottom N values
print("\n=== Bottom 3 lowest salaries ===")
print(df.nsmallest(3, 'Salary'))

# Filter using where()
print("\n=== Using where() - keep original structure ===")
print(df.where(df['Salary'] > 60000))

# Filter and select specific columns
print("\n=== Filtered data with specific columns ===")
print(df.loc[df['Department'] == 'IT', ['Name', 'Salary', 'Experience']])

