## Conditional Selection and Boolean Indexing

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Load Titanic dataset
titanic = sns.load_dataset('titanic')

## Basic Boolean Indexing

In [2]:
# Create sample dataset
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Henry'],
    'Age': [25, 30, 35, 40, 22, 45, 28, 33],
    'Salary': [50000, 60000, 75000, 90000, 45000, 95000, 52000, 68000],
    'Department': ['IT', 'HR', 'IT', 'Finance', 'HR', 'IT', 'Finance', 'HR'],
    'Experience': [2, 5, 8, 12, 1, 15, 3, 7],
    'Performance': ['Good', 'Excellent', 'Good', 'Excellent', 'Average', 'Excellent', 'Good', 'Average']
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
print("\n" + "="*70)

Original DataFrame:
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
1      Bob   30   60000         HR           5   Excellent
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
4      Eva   22   45000         HR           1     Average
5    Frank   45   95000         IT          15   Excellent
6    Grace   28   52000    Finance           3        Good
7    Henry   33   68000         HR           7     Average



## Example 1: Simple Single Condition

In [3]:
# 1. Employees older than 30
older_than_30 = df[df['Age'] > 30]
print("1. Employees older than 30:")
print(older_than_30)

# 2. Employees in IT department
it_employees = df[df['Department'] == 'IT']
print("\n2. Employees in IT Department:")
print(it_employees)

# 3. Employees with salary above 70,000
high_salary = df[df['Salary'] >= 70000]
print("\n3. Employees with salary >= $70,000:")
print(high_salary)

# 4. Employees with Excellent performance
excellent_perf = df[df['Performance'] == 'Excellent']
print("\n4. Employees with Excellent Performance:")
print(excellent_perf)

1. Employees older than 30:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent
7    Henry   33   68000         HR           7     Average

2. Employees in IT Department:
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
2  Charlie   35   75000         IT           8        Good
5    Frank   45   95000         IT          15   Excellent

3. Employees with salary >= $70,000:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent

4. Employees with Excellent Performance:
    Name  Age  Salary Department  Experience Performance
1    Bob   30   60000         HR    

## Example 2: Multiple Conditions (AND/OR)

In [4]:
# AND condition: IT employees older than 30
it_and_older = df[(df['Department'] == 'IT') & (df['Age'] > 30)]
print("1. IT Department AND Age > 30:")
print(it_and_older)

# OR condition: IT OR Finance department
it_or_finance = df[(df['Department'] == 'IT') | (df['Department'] == 'Finance')]
print("\n2. IT OR Finance Department:")
print(it_or_finance)

# Complex condition: (IT OR Finance) AND Salary > 60000
complex_condition = df[((df['Department'] == 'IT') | (df['Department'] == 'Finance')) & 
                       (df['Salary'] > 60000)]
print("\n3. (IT OR Finance) AND Salary > $60,000:")
print(complex_condition)

# Range condition: Age between 25 and 35 (inclusive)
age_range = df[(df['Age'] >= 25) & (df['Age'] <= 35)]
print("\n4. Age between 25 and 35 (inclusive):")
print(age_range)

# NOT condition: NOT in HR department
not_hr = df[~(df['Department'] == 'HR')]  # ~ means NOT
print("\n5. NOT in HR Department:")
print(not_hr)

1. IT Department AND Age > 30:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
5    Frank   45   95000         IT          15   Excellent

2. IT OR Finance Department:
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent
6    Grace   28   52000    Finance           3        Good

3. (IT OR Finance) AND Salary > $60,000:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent

4. Age between 25 and 35 (inclusive):
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         

## Example 3: Using isin() for Multiple Values

In [5]:
# Employees in specific departments
selected_depts = df[df['Department'].isin(['IT', 'Finance'])]
print("1. Employees in IT or Finance (using isin):")
print(selected_depts)

# Employees with specific performance ratings
selected_perf = df[df['Performance'].isin(['Excellent', 'Good'])]
print("\n2. Employees with Excellent or Good performance:")
print(selected_perf)

# Employees with specific ages
selected_ages = df[df['Age'].isin([25, 30, 35, 40])]
print("\n3. Employees aged 25, 30, 35, or 40:")
print(selected_ages)

# NOT in specific departments
not_selected_depts = df[~df['Department'].isin(['HR'])]
print("\n4. Employees NOT in HR (using isin with NOT):")
print(not_selected_depts)

1. Employees in IT or Finance (using isin):
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent
6    Grace   28   52000    Finance           3        Good

2. Employees with Excellent or Good performance:
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
1      Bob   30   60000         HR           5   Excellent
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent
6    Grace   28   52000    Finance           3        Good

3. Employees aged 25, 30, 35, or 40:
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT        

## Example 4: String Methods with Boolean Indexing

In [6]:
# Employees whose name starts with 'A'
starts_with_a = df[df['Name'].str.startswith('A')]
print("1. Names starting with 'A':")
print(starts_with_a)

# Employees whose name contains 'ra'
contains_ra = df[df['Name'].str.contains('ra', case=False)]
print("\n2. Names containing 'ra' (case-insensitive):")
print(contains_ra)

# Employees with names longer than 5 characters
long_names = df[df['Name'].str.len() > 5]
print("\n3. Names longer than 5 characters:")
print(long_names)

# Employees whose name ends with 'e'
ends_with_e = df[df['Name'].str.endswith('e')]
print("\n4. Names ending with 'e':")
print(ends_with_e)

1. Names starting with 'A':
    Name  Age  Salary Department  Experience Performance
0  Alice   25   50000         IT           2        Good

2. Names containing 'ra' (case-insensitive):
    Name  Age  Salary Department  Experience Performance
5  Frank   45   95000         IT          15   Excellent
6  Grace   28   52000    Finance           3        Good

3. Names longer than 5 characters:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good

4. Names ending with 'e':
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
2  Charlie   35   75000         IT           8        Good
6    Grace   28   52000    Finance           3        Good


## Example 5: Numerical Comparisons and Functions

In [7]:
# Top 30% earners
salary_threshold = df['Salary'].quantile(0.7)  # 70th percentile
top_earners = df[df['Salary'] >= salary_threshold]
print("1. Top 30% Earners (Salary >= 70th percentile):")
print(top_earners)
print(f"Threshold: ${salary_threshold:,.0f}")

# Employees with above average experience
avg_experience = df['Experience'].mean()
above_avg_exp = df[df['Experience'] > avg_experience]
print(f"\n2. Above Average Experience (> {avg_experience:.1f} years):")
print(above_avg_exp)

# Employees with salary within 1 standard deviation of mean
salary_mean = df['Salary'].mean()
salary_std = df['Salary'].std()
within_one_std = df[(df['Salary'] >= salary_mean - salary_std) & 
                    (df['Salary'] <= salary_mean + salary_std)]
print(f"\n3. Salary within 1 standard deviation of mean (${salary_mean - salary_std:.0f} - ${salary_mean + salary_std:.0f}):")
print(within_one_std)

# Employees in top and bottom salary quartiles
q1 = df['Salary'].quantile(0.25)
q3 = df['Salary'].quantile(0.75)
extreme_salaries = df[(df['Salary'] <= q1) | (df['Salary'] >= q3)]
print(f"\n4. Extreme Salaries (bottom 25% or top 25%):")
print(f"Bottom 25%: <= ${q1:,.0f}, Top 25%: >= ${q3:,.0f}")
print(extreme_salaries)

1. Top 30% Earners (Salary >= 70th percentile):
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent
Threshold: $74,300

2. Above Average Experience (> 6.6 years):
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent
7    Henry   33   68000         HR           7     Average

3. Salary within 1 standard deviation of mean ($48263 - $85487):
      Name  Age  Salary Department  Experience Performance
0    Alice   25   50000         IT           2        Good
1      Bob   30   60000         HR           5   Excellent
2  Charlie   35   75000         IT           8        Good
6    Grace   28   52000    Finance           3        Go

## Example 6: Combining Multiple Conditions with Parentheses

In [8]:
# Complex business rules
print("Complex Business Rules:")

# Rule 1: Senior employees (Experience > 5) in IT or Finance with Good+ performance
rule1 = df[
    (df['Experience'] > 5) & 
    (df['Department'].isin(['IT', 'Finance'])) & 
    (df['Performance'].isin(['Good', 'Excellent']))
]
print("1. Senior employees (Exp > 5) in IT/Finance with Good+ performance:")
print(rule1)

# Rule 2: Young high-potential (Age < 30, Salary > avg, Performance Excellent)
avg_salary = df['Salary'].mean()
rule2 = df[
    (df['Age'] < 30) & 
    (df['Salary'] > avg_salary) & 
    (df['Performance'] == 'Excellent')
]
print(f"\n2. Young high-potential (Age < 30, Salary > ${avg_salary:.0f}, Performance Excellent):")
print(rule2)

# Rule 3: Underpaid experienced employees
dept_avg_salary = df.groupby('Department')['Salary'].transform('mean')
rule3 = df[
    (df['Experience'] > 5) & 
    (df['Salary'] < dept_avg_salary)
]
print("\n3. Experienced employees (Exp > 5) paid below department average:")
print(rule3)

Complex Business Rules:
1. Senior employees (Exp > 5) in IT/Finance with Good+ performance:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent

2. Young high-potential (Age < 30, Salary > $66875, Performance Excellent):
Empty DataFrame
Columns: [Name, Age, Salary, Department, Experience, Performance]
Index: []

3. Experienced employees (Exp > 5) paid below department average:
Empty DataFrame
Columns: [Name, Age, Salary, Department, Experience, Performance]
Index: []


## Example 7: Boolean Indexing with query() Method

In [9]:
# Using query() method for cleaner syntax
print("Using query() method:")

# Simple query
young_it = df.query('Age < 30 and Department == "IT"')
print("1. Young IT employees (Age < 30):")
print(young_it)

# Query with variables
min_salary = 60000
max_age = 35
query_result = df.query('Salary >= @min_salary and Age <= @max_age')
print(f"\n2. Salary >= ${min_salary:,} and Age <= {max_age}:")
print(query_result)

# Complex query with multiple conditions
complex_query = df.query('(Department == "IT" or Department == "Finance") and Experience > 3 and Performance != "Average"')
print("\n3. IT/Finance, Experience > 3, Performance not Average:")
print(complex_query)

Using query() method:
1. Young IT employees (Age < 30):
    Name  Age  Salary Department  Experience Performance
0  Alice   25   50000         IT           2        Good

2. Salary >= $60,000 and Age <= 35:
      Name  Age  Salary Department  Experience Performance
1      Bob   30   60000         HR           5   Excellent
2  Charlie   35   75000         IT           8        Good
7    Henry   33   68000         HR           7     Average

3. IT/Finance, Experience > 3, Performance not Average:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent


## Example 8: Real-World Business Scenarios

In [10]:
# Create a more realistic business dataset
np.random.seed(42)
n_customers = 1000

customer_data = {
    'customer_id': range(1001, 1001 + n_customers),
    'age': np.random.randint(18, 70, n_customers),
    'income': np.random.randint(20000, 150000, n_customers),
    'purchase_amount': np.random.exponential(100, n_customers),
    'num_purchases': np.random.poisson(5, n_customers),
    'days_since_last_purchase': np.random.randint(0, 365, n_customers),
    'location': np.random.choice(['Urban', 'Suburban', 'Rural'], n_customers),
    'membership_tier': np.random.choice(['Basic', 'Silver', 'Gold', 'Platinum'], n_customers, p=[0.5, 0.3, 0.15, 0.05]),
    'churn_risk': np.random.choice([0, 1], n_customers, p=[0.7, 0.3])
}

customers = pd.DataFrame(customer_data)

print("Customer Dataset (first 10 rows):")
print(customers.head(10))
print(f"\nTotal customers: {len(customers)}")

# Business scenario 1: High-value customers
high_value = customers[
    (customers['income'] > 80000) & 
    (customers['num_purchases'] > 8) & 
    (customers['purchase_amount'].cumsum() > 1000)
]
print(f"\n1. High-value customers: {len(high_value)} ({len(high_value)/len(customers)*100:.1f}%)")

# Business scenario 2: At-risk customers for churn
at_risk = customers[
    (customers['days_since_last_purchase'] > 90) & 
    (customers['churn_risk'] == 1) & 
    (customers['membership_tier'].isin(['Basic', 'Silver']))
]
print(f"\n2. At-risk customers (likely to churn): {len(at_risk)}")

# Business scenario 3: Target for premium upgrade
upgrade_targets = customers[
    (customers['income'] > 60000) & 
    (customers['num_purchases'] >= 5) & 
    (customers['membership_tier'].isin(['Basic', 'Silver'])) & 
    (customers['days_since_last_purchase'] < 30)
]
print(f"\n3. Targets for premium upgrade: {len(upgrade_targets)}")

# Business scenario 4: Inactive customers needing reactivation
inactive = customers[
    (customers['days_since_last_purchase'] > 180) & 
    (customers['num_purchases'] > 0)  # Had purchased before
]
print(f"\n4. Inactive customers needing reactivation: {len(inactive)}")

Customer Dataset (first 10 rows):
   customer_id  age  income  purchase_amount  num_purchases  \
0         1001   56  125186       236.994059              7   
1         1002   69   54674        84.704209              5   
2         1003   46   55854       108.504083              5   
3         1004   32   66271        78.477774              7   
4         1005   60   93688       199.873110              3   
5         1006   25   58518       132.392038              4   
6         1007   38  131076        73.659828              3   
7         1008   56   30267       202.617231              4   
8         1009   36  110825        28.141579              5   
9         1010   40   21062        17.119251              1   

   days_since_last_purchase  location membership_tier  churn_risk  
0                       122     Rural           Basic           1  
1                       217  Suburban           Basic           0  
2                        60  Suburban           Basic           0  


## Example 9: Boolean Indexing with loc[] and iloc[]

In [11]:
# Using loc with boolean indexing
print("Using loc[] with boolean indexing:")

# Select specific columns for filtered rows
it_high_salary = df.loc[(df['Department'] == 'IT') & (df['Salary'] > 60000), 
                       ['Name', 'Age', 'Salary']]
print("1. IT employees with salary > $60,000 (specific columns):")
print(it_high_salary)

# Modify values using boolean indexing
df_modified = df.copy()
df_modified.loc[df_modified['Performance'] == 'Excellent', 'Bonus'] = df_modified['Salary'] * 0.1
df_modified.loc[df_modified['Performance'] == 'Good', 'Bonus'] = df_modified['Salary'] * 0.05
df_modified.loc[df_modified['Performance'] == 'Average', 'Bonus'] = df_modified['Salary'] * 0.02

print("\n2. DataFrame with calculated bonuses:")
print(df_modified[['Name', 'Performance', 'Salary', 'Bonus']])

# Chain multiple conditions with loc
young_high_performers = df.loc[
    (df['Age'] < 35) & 
    (df['Performance'].isin(['Excellent', 'Good'])) &
    (df['Experience'] > 3)
]
print("\n3. Young high performers (Age < 35, Good+ performance, Exp > 3):")
print(young_high_performers)

Using loc[] with boolean indexing:
1. IT employees with salary > $60,000 (specific columns):
      Name  Age  Salary
2  Charlie   35   75000
5    Frank   45   95000

2. DataFrame with calculated bonuses:
      Name Performance  Salary   Bonus
0    Alice        Good   50000  2500.0
1      Bob   Excellent   60000  6000.0
2  Charlie        Good   75000  3750.0
3    David   Excellent   90000  9000.0
4      Eva     Average   45000   900.0
5    Frank   Excellent   95000  9500.0
6    Grace        Good   52000  2600.0
7    Henry     Average   68000  1360.0

3. Young high performers (Age < 35, Good+ performance, Exp > 3):
  Name  Age  Salary Department  Experience Performance
1  Bob   30   60000         HR           5   Excellent


## Example 10: Advanced Boolean Operations

In [12]:
# Create boolean masks for reuse
is_it = df['Department'] == 'IT'
is_senior = df['Experience'] > 5
is_high_performer = df['Performance'].isin(['Excellent', 'Good'])
is_high_salary = df['Salary'] > df['Salary'].median()

print("Boolean Masks and Operations:")

# Combine masks
senior_it = is_it & is_senior
print("1. Senior IT employees mask:")
print(senior_it)
print("\nSenior IT employees:")
print(df[senior_it])

# Use masks for complex logic
promotion_candidates = (is_high_performer & is_high_salary) | (is_senior & is_it)
print("\n2. Promotion candidates (High performers OR Senior IT):")
print(df[promotion_candidates])

# Count True values
print(f"\n3. Statistics:")
print(f"IT employees: {is_it.sum()}")
print(f"Senior employees: {is_senior.sum()}")
print(f"High performers: {is_high_performer.sum()}")
print(f"Promotion candidates: {promotion_candidates.sum()}")

# Create new column based on boolean conditions
df['Employee_Type'] = 'Regular'
df.loc[is_high_salary & is_high_performer, 'Employee_Type'] = 'Star'
df.loc[is_senior & ~is_high_performer, 'Employee_Type'] = 'Veteran'
df.loc[df['Age'] < 25, 'Employee_Type'] = 'Junior'

print("\n4. DataFrame with Employee Types:")
print(df[['Name', 'Age', 'Salary', 'Performance', 'Employee_Type']])

Boolean Masks and Operations:
1. Senior IT employees mask:
0    False
1    False
2     True
3    False
4    False
5     True
6    False
7    False
dtype: bool

Senior IT employees:
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
5    Frank   45   95000         IT          15   Excellent

2. Promotion candidates (High performers OR Senior IT):
      Name  Age  Salary Department  Experience Performance
2  Charlie   35   75000         IT           8        Good
3    David   40   90000    Finance          12   Excellent
5    Frank   45   95000         IT          15   Excellent

3. Statistics:
IT employees: 3
Senior employees: 4
High performers: 6
Promotion candidates: 3

4. DataFrame with Employee Types:
      Name  Age  Salary Performance Employee_Type
0    Alice   25   50000        Good       Regular
1      Bob   30   60000   Excellent       Regular
2  Charlie   35   75000        Good          Star
3    David   40   9

## Example 11: Performance Tips and Best Practices

In [13]:
# Performance comparison of different methods
import time

# Create large dataset for performance testing
np.random.seed(42)
large_df = pd.DataFrame({
    'A': np.random.randn(1000000),
    'B': np.random.randn(1000000),
    'C': np.random.choice(['X', 'Y', 'Z'], 1000000),
    'D': np.random.randint(0, 100, 1000000)
})

print("Performance Comparison (1M rows):")

# Method 1: Direct boolean indexing
start = time.time()
result1 = large_df[(large_df['A'] > 0) & (large_df['D'] > 50)]
time1 = time.time() - start
print(f"1. Direct boolean indexing: {time1:.4f} seconds")

# Method 2: Using query()
start = time.time()
result2 = large_df.query('A > 0 and D > 50')
time2 = time.time() - start
print(f"2. Using query(): {time2:.4f} seconds")

# Method 3: Pre-computed masks
start = time.time()
mask_a = large_df['A'] > 0
mask_d = large_df['D'] > 50
result3 = large_df[mask_a & mask_d]
time3 = time.time() - start
print(f"3. Pre-computed masks: {time3:.4f} seconds")

print(f"\nResults are identical: {result1.equals(result2) and result2.equals(result3)}")

# Best practices summary
print("\n" + "="*70)
print("BEST PRACTICES FOR BOOLEAN INDEXING:")
print("="*70)
print("1. Use parentheses for complex conditions: (cond1) & (cond2)")
print("2. For repeated conditions, pre-compute boolean masks")
print("3. Use isin() instead of multiple OR conditions")
print("4. Use query() for cleaner syntax with complex conditions")
print("5. Avoid chained indexing: df[df['A'] > 0]['B'] is bad")
print("6. Use loc[] when modifying filtered data")
print("7. For large datasets, consider numpy.where() for speed")
print("8. Use ~ for NOT operations instead of != where possible")

Performance Comparison (1M rows):
1. Direct boolean indexing: 0.0533 seconds
2. Using query(): 0.0495 seconds
3. Pre-computed masks: 0.0215 seconds

Results are identical: True

BEST PRACTICES FOR BOOLEAN INDEXING:
1. Use parentheses for complex conditions: (cond1) & (cond2)
2. For repeated conditions, pre-compute boolean masks
3. Use isin() instead of multiple OR conditions
4. Use query() for cleaner syntax with complex conditions
5. Avoid chained indexing: df[df['A'] > 0]['B'] is bad
6. Use loc[] when modifying filtered data
7. For large datasets, consider numpy.where() for speed
8. Use ~ for NOT operations instead of != where possible


## Summary of Key Operators

In [14]:
print("="*70)
print("BOOLEAN INDEXING OPERATORS SUMMARY")
print("="*70)

operators = {
    '&': 'AND (use parentheses: (cond1) & (cond2))',
    '|': 'OR (use parentheses: (cond1) | (cond2))',
    '~': 'NOT (inverse of condition)',
    '==': 'Equals',
    '!=': 'Not equals',
    '>': 'Greater than',
    '<': 'Less than',
    '>=': 'Greater than or equal',
    '<=': 'Less than or equal',
    'isin()': 'Check if value is in list',
    'str.contains()': 'Check if string contains pattern',
    'str.startswith()': 'Check if string starts with pattern',
    'str.endswith()': 'Check if string ends with pattern',
    'isnull()': 'Check for null/missing values',
    'notnull()': 'Check for non-null values',
    'between()': 'Check if value is between two numbers',
    'query()': 'SQL-like query syntax'
}

for op, desc in operators.items():
    print(f"{op:15} : {desc}")

BOOLEAN INDEXING OPERATORS SUMMARY
&               : AND (use parentheses: (cond1) & (cond2))
|               : OR (use parentheses: (cond1) | (cond2))
~               : NOT (inverse of condition)
==              : Equals
!=              : Not equals
>               : Greater than
<               : Less than
>=              : Greater than or equal
<=              : Less than or equal
isin()          : Check if value is in list
str.contains()  : Check if string contains pattern
str.startswith() : Check if string starts with pattern
str.endswith()  : Check if string ends with pattern
isnull()        : Check for null/missing values
notnull()       : Check for non-null values
between()       : Check if value is between two numbers
query()         : SQL-like query syntax
