
# Task 1 — Exploring & Visualizing the Iris Dataset

**Objective:** Read, summarize, and visualize a simple dataset.

**Checklist:** shape/columns/head • scatter • histogram • box plot • brief insights.


In [1]:

# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# seaborn is allowed per task instructions (plots use matplotlib for portability in this environment)
import seaborn as sns

# Display settings
pd.set_option('display.max_columns', None)


ModuleNotFoundError: No module named 'pandas'

In [None]:

# Load dataset (using scikit-learn to avoid external downloads)
from sklearn.datasets import load_iris
iris_bunch = load_iris(as_frame=True)
df = iris_bunch.frame.copy()
# Rename columns for readability
df.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "species"]
df.head()


In [None]:

# Basic structure
print("Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
display(df.head())
display(df.describe(include='all'))


In [None]:

# Scatter plot: petal_length vs petal_width by species
fig = plt.figure()
for sp in df['species'].unique():
    d = df[df['species']==sp]
    plt.scatter(d['petal_length'], d['petal_width'], label=sp, alpha=0.8)
plt.xlabel("Petal length (cm)")
plt.ylabel("Petal width (cm)")
plt.title("Petal length vs width by species")
plt.legend()
plt.show()


In [None]:

# Histogram: distribution of sepal_length
fig = plt.figure()
plt.hist(df['sepal_length'], bins=20, alpha=0.9)
plt.xlabel("Sepal length (cm)")
plt.ylabel("Count")
plt.title("Distribution of Sepal Length")
plt.show()


In [None]:

# Box plots: numeric features
numeric_cols = df.select_dtypes(include=[np.number]).columns
fig = plt.figure()
df[numeric_cols].plot(kind='box', grid=True)
plt.title("Box plots of numeric features")
plt.show()


In [None]:

# Quick correlations
df.corr(numeric_only=True)


In [None]:

# Conclusion (edit as needed)
print("""
Key notes:
• Petal measurements separate species clearly.
• Sepal features overlap more across species.
• No extreme outliers; ranges are tight.
""")
