# Boolean Indexing with pandas

In [None]:
# Import necessary libraries
import pandas as pd

def create_people_dataframe():
    """
    Create a DataFrame with information about people.
    
    Returns:
    pandas.DataFrame: A DataFrame containing information about people, including their
                      name, age, country, number of siblings, and favorite color.
    """
    data = {
        "name": ["Erika Schumacher", "Javi López", "Maria Rovira", "Ana Gromek", "Shekhar Biswas",
                 "Muriel Adams", "Saira Polom", "Alex Kubiak", "Kit Ching", "Dog Woof"],
        "age": [22, 50, 23, 29, 44, 30, 25, 71, 35, 2],
        "country": ["DE", "ES", "ES", "PL", "IN", "FR", "IN", "PL", "UK", "XX"],
        "siblings": [2, 0, 4, 1, 1, 2, 3, 7, 0, 9],
        "favourite_colour": ["Red", "Yellow", "Yellow", "Blue", "Red", "Yellow", "Blue", "Blue", "Red", "Gray"]
    }
    return pd.DataFrame(data)

# Create the people DataFrame
people = create_people_dataframe()

# Display the first few rows of the DataFrame to verify its creation
people.head()

## Filtering Data Based on Conditions

In [None]:
# Example: Select only the rows where the favourite colour is "Yellow"
yellow_filter = people["favourite_colour"] == "Yellow"
people.loc[yellow_filter]

### Note: A pandas Series is like a list with an index. It represents a single column within a DataFrame.

In [None]:
def filter_by_color(dataframe, color="Yellow"):
    """
    Filter rows based on the favourite colour.
    
    Parameters:
    dataframe (pandas.DataFrame): The DataFrame to filter.
    color (str): The color to filter by.
    
    Returns:
    pandas.DataFrame: A DataFrame containing only rows with the specified favourite colour.
    """
    return dataframe[dataframe["favourite_colour"] == color]

# Apply the filter function and display the result
filter_by_color(people)


## Boolean Operators for Indexing

In [None]:

# Use | for OR, & for AND, and ~ for NOT, remembering to group conditions with parentheses.

def filter_people_excluding_country_with_siblings(dataframe, country="ES", min_siblings=3):
    """
    Filter the DataFrame to exclude certain country and with a minimum number of siblings.
    
    Parameters:
    dataframe (pandas.DataFrame): The DataFrame to filter.
    country (str): Country code to exclude.
    min_siblings (int): Minimum number of siblings required.
    
    Returns:
    pandas.DataFrame: A filtered DataFrame based on the given conditions.
    """
    condition = ~(dataframe["country"] == country) & (dataframe["siblings"] > min_siblings)
    return dataframe.loc[condition]

# Select people not from 'ES' with more than 3 siblings and display the results
filter_people_excluding_country_with_siblings(people)
