# Import data
Lets pull in the data and display it. We have 4 feature columns and one target column called `target`.

In [None]:
from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris(as_frame=True)  # set as_frame=False if older scikit-learn
df = iris.data.copy()  # X as DataFrame
df['target'] = iris.target
df.head()

# High level Analysis
Lets check for nulls and examine the mean, median, etc for the different features across the different targets.

In [None]:
from IPython.display import display

# Check for missing values
print("Null values per column:")
print(df.isna().sum())

# Overall descriptive statistics
print("\nOverall descriptive statistics (features):")
display(df.describe().T)

# Select feature columns
features = df.columns.drop('target')

# Aggregate stats by target
group_stats = df.groupby('target')[features].agg(['count', 'mean', 'median', 'std', 'min', 'max'])
# Map numeric target to class names if `iris` is available
try:
    group_stats.index = group_stats.index.map(lambda i: iris.target_names[i])
except Exception:
    pass

print("\nAggregate statistics by target:")
display(group_stats)

# Interquartile range (IQR) by target
iqr = df.groupby('target')[features].agg(lambda x: x.quantile(0.75) - x.quantile(0.25))
try:
    iqr.index = iqr.index.map(lambda i: iris.target_names[i])
except Exception:
    pass

print("\nInterquartile range (IQR) by target:")
display(iqr)

# Plots
Lets examine how the data looks. Use copilot to generate plots of the data, e.g. use PCA and colour by target.

# Optional: Modelling
Use a model for classification from sklearn to try and predict the target based on the features or instead, discuss with copilot what model you can use in the chat!