In [1]:
# Title: Group and Aggregate Data
# Description: Group the dataset by species (target) and compute the average of each feature.

In [2]:
import seaborn as sns
import pandas as pd

# Load the Iris dataset
iris = sns.load_dataset('iris')

# Group the DataFrame by the 'species' column and calculate the mean of each numerical feature
average_features_per_species = iris.groupby('species').mean()

# Display the resulting DataFrame with the average feature values per species
print("Average feature values per Iris species:")
print(average_features_per_species)
print("\n" + "="*50 + "\n")

# You can also group by multiple columns if your dataset has them
# For example, if there was another categorical column 'location', you could do:
# average_features_per_species_location = iris.groupby(['species', 'location']).mean()
# print(average_features_per_species_location)

# You can also calculate other aggregate functions like median, sum, count, etc.
median_features_per_species = iris.groupby('species').median()
print("Median feature values per Iris species:")
print(median_features_per_species)
print("\n" + "="*50 + "\n")

count_per_species = iris.groupby('species').size()
print("Count of observations per Iris species:")
print(count_per_species)
print("\n" + "="*50 + "\n")

# You can apply multiple aggregation functions at once using the .agg() method
aggregated_data = iris.groupby('species').agg(['mean', 'median', 'std', 'count'])
print("Aggregated statistics per Iris species:")
print(aggregated_data)

Average feature values per Iris species:
            sepal_length  sepal_width  petal_length  petal_width
species                                                         
setosa             5.006        3.428         1.462        0.246
versicolor         5.936        2.770         4.260        1.326
virginica          6.588        2.974         5.552        2.026


Median feature values per Iris species:
            sepal_length  sepal_width  petal_length  petal_width
species                                                         
setosa               5.0          3.4          1.50          0.2
versicolor           5.9          2.8          4.35          1.3
virginica            6.5          3.0          5.55          2.0


Count of observations per Iris species:
species
setosa        50
versicolor    50
virginica     50
dtype: int64


Aggregated statistics per Iris species:
           sepal_length                        sepal_width                   \
                   mean median  