# Data Filtering in Pandas DataFrame

### Importing Pandas and Creating a DataFrame

In [100]:
import pandas as pd

# Sample dataset
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [23, 35, 45, 25, 30],
    'Gender': ['Female', 'Male', 'Male', 'Male', 'Female'],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR'],
    'Salary': [50000, 60000, 80000, 55000, 62000]
}

# Creating DataFrame
df = pd.DataFrame(data)

# Displaying DataFrame
print(df)

      Name  Age  Gender Department  Salary
0    Alice   23  Female         HR   50000
1      Bob   35    Male         IT   60000
2  Charlie   45    Male    Finance   80000
3    David   25    Male         IT   55000
4      Eva   30  Female         HR   62000


## Single Condition Filtering

In [103]:
# Filtering rows where Age is greater than 30
filtered_df_single = df[df['Age'] > 30]

print("Single condition filtering (Age > 30):")
print(filtered_df_single)

Single condition filtering (Age > 30):
      Name  Age Gender Department  Salary
1      Bob   35   Male         IT   60000
2  Charlie   45   Male    Finance   80000


## Multiple Condition Filtering

In [106]:
# Multiple conditions: Age > 30 AND Department == 'IT'
filtered_df_multi = df[(df['Age'] > 30) & (df['Department'] == 'IT')]

print("Multiple condition filtering (Age > 30 and Department == 'IT'):")
print(filtered_df_multi)

Multiple condition filtering (Age > 30 and Department == 'IT'):
  Name  Age Gender Department  Salary
1  Bob   35   Male         IT   60000


## Filtering Using query()

In [111]:
#The query() function is an elegant and efficient way to filter data.
# Using query for filtering Age > 30 and Gender == 'Male'
filtered_df_query = df.query('Age > 30 and Gender == "Male"')

print("Query method filtering (Age > 30 and Gender == 'Male'):")
print(filtered_df_query)

Query method filtering (Age > 30 and Gender == 'Male'):
      Name  Age Gender Department  Salary
1      Bob   35   Male         IT   60000
2  Charlie   45   Male    Finance   80000


## Filtering Using loc

In [116]:
#The loc[] method allows filtering rows by label and selecting specific rows and columns.
# Using loc for filtering Age > 30 and selecting Name and Salary columns
filtered_df_loc = df.loc[df['Age'] > 30, ['Name', 'Salary']]

print("Using loc method filtering (Age > 30 and selecting columns Name and Salary):")
print(filtered_df_loc)

Using loc method filtering (Age > 30 and selecting columns Name and Salary):
      Name  Salary
1      Bob   60000
2  Charlie   80000


## Filtering by Row and Column Position Using iloc

In [126]:
#The iloc[] method is used to select rows and columns by index position.
#Example: Selecting rows 2 to 4 and columns 1 to 3.
# Selecting rows 2 to 4 (0-based index) and columns 1 to 3 (Age, Gender)
filtered_df_iloc = df.iloc[1:4, 1:3]

print("Using iloc for selecting rows 2 to 4 and columns 1 to 3:")
print(filtered_df_iloc)

Using iloc for selecting rows 2 to 4 and columns 1 to 3:
   Age Gender
1   35   Male
2   45   Male
3   25   Male


## Filtering by Row Position and Column Names

In [122]:
#We can filter rows by their position and specific column names using loc.
#Example: Selecting the first three rows and only 'Name' and 'Salary' columns.
# Filtering by row position and specific columns
filtered_df_specific = df.loc[df.index[0:3], ['Name', 'Salary']]

print("Filtering first three rows and selecting 'Name' and 'Salary' columns:")
print(filtered_df_specific)

Filtering first three rows and selecting 'Name' and 'Salary' columns:
      Name  Salary
0    Alice   50000
1      Bob   60000
2  Charlie   80000


In [130]:
# Sample dataset
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace'],
    'Age': [24, 30, 22, 35, 28, 40, 25],
    'Score': [88, 92, 85, 79, 94, 91, 87],
    'City': ['NY', 'LA', 'SF', 'CHI', 'HOU', 'MIA', 'SEA']
}

df = pd.DataFrame(data)

# 1. First 5 rows
print("1. First 5 Rows:")
print(df.iloc[:5, :])
print("\n")

# 2. Second to Fifth row
print("2. Second to Fifth Row:")
print(df.iloc[1:5, :])
print("\n")

# 3. Sixth row and 1st column
print("3. Sixth Row, First Column:")
print(df.iloc[5, 0])  # Output will be the name in the 6th row
print("\n")

# 4. Second to Fifth row, first column
print("4. Second to Fifth Row, First Column:")
print(df.iloc[1:5, 0])  # Names of rows 2 to 5
print("\n")

# 5. Second to Fifth Row, First 5 Columns
print("5. Second to Fifth Row, First 5 Columns:")
print(df.iloc[1:5, :5])  # This will give the entire row
print("\n")

# 6. Third to Seventh Row, 2nd and 3rd Column
print("6. Third to Seventh Row, 2nd and 3rd Column:")
print(df.iloc[2:7, 1:3])  # Age and Score columns for rows 3 to 7
print("\n")

1. First 5 Rows:
      Name  Age  Score City
0    Alice   24     88   NY
1      Bob   30     92   LA
2  Charlie   22     85   SF
3    David   35     79  CHI
4      Eva   28     94  HOU


2. Second to Fifth Row:
      Name  Age  Score City
1      Bob   30     92   LA
2  Charlie   22     85   SF
3    David   35     79  CHI
4      Eva   28     94  HOU


3. Sixth Row, First Column:
Frank


4. Second to Fifth Row, First Column:
1        Bob
2    Charlie
3      David
4        Eva
Name: Name, dtype: object


5. Second to Fifth Row, First 5 Columns:
      Name  Age  Score City
1      Bob   30     92   LA
2  Charlie   22     85   SF
3    David   35     79  CHI
4      Eva   28     94  HOU


6. Third to Seventh Row, 2nd and 3rd Column:
   Age  Score
2   22     85
3   35     79
4   28     94
5   40     91
6   25     87


